allprofanity 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +253 -247
- package/dist/index.d.ts +80 -77
- package/dist/index.js +547 -488
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
// Import language dictionaries (assuming these exist)
|
|
1
2
|
import englishBadWords from "./languages/english-words.js";
|
|
2
3
|
import hindiBadWords from "./languages/hindi-words.js";
|
|
3
4
|
import frenchBadWords from "./languages/french-words.js";
|
|
@@ -15,6 +16,20 @@ export { default as spanishBadWords } from "./languages/spanish-words.js";
|
|
|
15
16
|
export { default as bengaliBadWords } from "./languages/bengali-words.js";
|
|
16
17
|
export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
17
18
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
19
|
+
/**
|
|
20
|
+
* Default console logger implementation
|
|
21
|
+
*/
|
|
22
|
+
class ConsoleLogger {
|
|
23
|
+
info(message) {
|
|
24
|
+
console.log(`[AllProfanity] ${message}`);
|
|
25
|
+
}
|
|
26
|
+
warn(message) {
|
|
27
|
+
console.warn(`[AllProfanity] ${message}`);
|
|
28
|
+
}
|
|
29
|
+
error(message) {
|
|
30
|
+
console.error(`[AllProfanity] ${message}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
18
33
|
/**
|
|
19
34
|
* Severity levels for profanity detection
|
|
20
35
|
*/
|
|
@@ -26,77 +41,136 @@ export var ProfanitySeverity;
|
|
|
26
41
|
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
|
|
27
42
|
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
|
|
28
43
|
/**
|
|
29
|
-
*
|
|
30
|
-
* No external dependencies - built from scratch for maximum performance and control
|
|
44
|
+
* Validates input parameters
|
|
31
45
|
*/
|
|
32
|
-
|
|
46
|
+
function validateString(input, paramName) {
|
|
47
|
+
if (typeof input !== "string") {
|
|
48
|
+
throw new TypeError(`${paramName} must be a string, got ${typeof input}`);
|
|
49
|
+
}
|
|
50
|
+
return input;
|
|
51
|
+
}
|
|
52
|
+
function validateStringArray(input, paramName) {
|
|
53
|
+
if (!Array.isArray(input)) {
|
|
54
|
+
throw new TypeError(`${paramName} must be an array`);
|
|
55
|
+
}
|
|
56
|
+
return input.filter((item) => {
|
|
57
|
+
if (typeof item !== "string") {
|
|
58
|
+
console.warn(`Skipping non-string item in ${paramName}: ${item}`);
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
return item.trim().length > 0;
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Efficient Trie data structure for fast string matching
|
|
66
|
+
*/
|
|
67
|
+
class TrieNode {
|
|
68
|
+
constructor() {
|
|
69
|
+
this.children = new Map();
|
|
70
|
+
this.isEndOfWord = false;
|
|
71
|
+
this.word = "";
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Add a word to the trie
|
|
75
|
+
*/
|
|
76
|
+
addWord(word) {
|
|
77
|
+
let current = this;
|
|
78
|
+
for (const char of word) {
|
|
79
|
+
if (!current.children.has(char)) {
|
|
80
|
+
current.children.set(char, new TrieNode());
|
|
81
|
+
}
|
|
82
|
+
const nextNode = current.children.get(char);
|
|
83
|
+
if (nextNode) {
|
|
84
|
+
current = nextNode;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
current.isEndOfWord = true;
|
|
88
|
+
current.word = word;
|
|
89
|
+
}
|
|
33
90
|
/**
|
|
34
|
-
*
|
|
35
|
-
* @param options - Configuration options
|
|
91
|
+
* Remove a word from the trie
|
|
36
92
|
*/
|
|
93
|
+
removeWord(word) {
|
|
94
|
+
return this.removeHelper(word, 0);
|
|
95
|
+
}
|
|
96
|
+
removeHelper(word, index) {
|
|
97
|
+
if (index === word.length) {
|
|
98
|
+
if (!this.isEndOfWord)
|
|
99
|
+
return false;
|
|
100
|
+
this.isEndOfWord = false;
|
|
101
|
+
return this.children.size === 0;
|
|
102
|
+
}
|
|
103
|
+
const char = word[index];
|
|
104
|
+
const node = this.children.get(char);
|
|
105
|
+
if (!node)
|
|
106
|
+
return false;
|
|
107
|
+
const shouldDeleteChild = node.removeHelper(word, index + 1);
|
|
108
|
+
if (shouldDeleteChild) {
|
|
109
|
+
this.children.delete(char);
|
|
110
|
+
return this.children.size === 0 && !this.isEndOfWord;
|
|
111
|
+
}
|
|
112
|
+
return false;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Find all matches starting at a given position
|
|
116
|
+
*/
|
|
117
|
+
findMatches(text, startPos, allowPartial) {
|
|
118
|
+
const matches = [];
|
|
119
|
+
let current = this;
|
|
120
|
+
let pos = startPos;
|
|
121
|
+
while (pos < text.length) {
|
|
122
|
+
const nextNode = current.children.get(text[pos]);
|
|
123
|
+
if (!nextNode)
|
|
124
|
+
break;
|
|
125
|
+
current = nextNode;
|
|
126
|
+
pos++;
|
|
127
|
+
if (current.isEndOfWord) {
|
|
128
|
+
if (!allowPartial) {
|
|
129
|
+
const wordStart = startPos;
|
|
130
|
+
const wordEnd = pos;
|
|
131
|
+
matches.push({
|
|
132
|
+
word: current.word,
|
|
133
|
+
start: wordStart - startPos,
|
|
134
|
+
end: wordEnd - startPos,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
matches.push({
|
|
139
|
+
word: current.word,
|
|
140
|
+
start: 0,
|
|
141
|
+
end: pos - startPos,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return matches;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Clear all words from the trie
|
|
150
|
+
*/
|
|
151
|
+
clear() {
|
|
152
|
+
this.children.clear();
|
|
153
|
+
this.isEndOfWord = false;
|
|
154
|
+
this.word = "";
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Advanced AllProfanity - Fixed profanity filter with multi-language support
|
|
159
|
+
* Addresses all critical issues from the original implementation
|
|
160
|
+
*/
|
|
161
|
+
export class AllProfanity {
|
|
37
162
|
constructor(options) {
|
|
38
|
-
var _a, _b, _c, _d;
|
|
39
|
-
this.
|
|
40
|
-
this.normalizedProfanityMap = new Map();
|
|
41
|
-
this.defaultPlaceholder = "*";
|
|
42
|
-
this.loadedLanguages = new Set();
|
|
163
|
+
var _a, _b, _c, _d, _e;
|
|
164
|
+
this.profanityTrie = new TrieNode();
|
|
43
165
|
this.whitelistSet = new Set();
|
|
166
|
+
this.loadedLanguages = new Set();
|
|
167
|
+
// Configuration
|
|
168
|
+
this.defaultPlaceholder = "*";
|
|
44
169
|
this.enableLeetSpeak = true;
|
|
45
170
|
this.caseSensitive = false;
|
|
46
171
|
this.strictMode = false;
|
|
47
|
-
this.detectPartialWords =
|
|
48
|
-
//
|
|
49
|
-
this.leetMap = {
|
|
50
|
-
a: ["4", "@", "^", "aye", "λ", "ª"],
|
|
51
|
-
b: ["8", "6", "|3", "ß", "β", "13"],
|
|
52
|
-
c: ["(", "<", "©", "¢", "see", "sea"],
|
|
53
|
-
d: ["|)", "|]", "0", "ð"],
|
|
54
|
-
e: ["3", "€", "£", "ë", "é", "è"],
|
|
55
|
-
f: ["|=", "ph", "|#", "ƒ"],
|
|
56
|
-
g: ["9", "6", "&", "gee"],
|
|
57
|
-
h: ["#", "|-|", "[-]", "}{", "ħ"],
|
|
58
|
-
i: ["1", "!", "|", "eye", "ï", "í", "ì"],
|
|
59
|
-
j: ["_|", "_/", "¿", "ĵ"],
|
|
60
|
-
k: ["|<", "1<", "l<", "|{", "ķ"],
|
|
61
|
-
l: ["1", "|", "7", "£", "ł", "ĺ"],
|
|
62
|
-
m: ["|/|", "//\\", "em", "ɱ"],
|
|
63
|
-
n: ["||", "//", "and", "ñ", "ń"],
|
|
64
|
-
o: ["0", "()", "oh", "ø", "ó", "ò", "ô"],
|
|
65
|
-
p: ["|*", "|o", "|^", "|>", "9", "þ"],
|
|
66
|
-
q: ["(_,)", "()_", "kw", "ĸ"],
|
|
67
|
-
r: ["|2", "12", ".-", "are", "ř", "ŕ"],
|
|
68
|
-
s: ["5", "$", "z", "ş", "ś", "š"],
|
|
69
|
-
t: ["7", "+", "-|-", "†", "ť", "ţ"],
|
|
70
|
-
u: ["(_)", "|_|", "v", "you", "ü", "ú", "ù"],
|
|
71
|
-
v: ["\\/", "|/", "|", "vee"],
|
|
72
|
-
w: ["\\/\\/", "vv", "dubya", "ŵ"],
|
|
73
|
-
x: ["><", "}{", "ecks", "χ"],
|
|
74
|
-
y: ["`/", "j", "why", "ÿ", "ý"],
|
|
75
|
-
z: ["2", "7_", "-/_", "zee", "ž", "ź", "ż"],
|
|
76
|
-
};
|
|
77
|
-
// Word boundary patterns
|
|
78
|
-
this.wordBoundaryChars = /[\s\.,;:!?\-_+=\[\]{}()"'\/\\]/;
|
|
79
|
-
// Common word variations and suffixes
|
|
80
|
-
this.commonSuffixes = [
|
|
81
|
-
"ing",
|
|
82
|
-
"ed",
|
|
83
|
-
"s",
|
|
84
|
-
"er",
|
|
85
|
-
"ers",
|
|
86
|
-
"est",
|
|
87
|
-
"ly",
|
|
88
|
-
"tion",
|
|
89
|
-
"ness",
|
|
90
|
-
];
|
|
91
|
-
this.commonPrefixes = [
|
|
92
|
-
"un",
|
|
93
|
-
"re",
|
|
94
|
-
"pre",
|
|
95
|
-
"dis",
|
|
96
|
-
"over",
|
|
97
|
-
"under",
|
|
98
|
-
"out",
|
|
99
|
-
];
|
|
172
|
+
this.detectPartialWords = false;
|
|
173
|
+
// Available language dictionaries
|
|
100
174
|
this.availableLanguages = {
|
|
101
175
|
english: englishBadWords || [],
|
|
102
176
|
hindi: hindiBadWords || [],
|
|
@@ -107,442 +181,293 @@ export class AllProfanity {
|
|
|
107
181
|
tamil: tamilBadWords || [],
|
|
108
182
|
telugu: teluguBadWords || [],
|
|
109
183
|
};
|
|
110
|
-
//
|
|
111
|
-
|
|
184
|
+
// Fixed leet speak mappings
|
|
185
|
+
this.leetMappings = new Map([
|
|
186
|
+
["@", "a"],
|
|
187
|
+
["^", "a"],
|
|
188
|
+
["4", "a"],
|
|
189
|
+
["8", "b"],
|
|
190
|
+
["6", "b"],
|
|
191
|
+
["|3", "b"],
|
|
192
|
+
["(", "c"],
|
|
193
|
+
["<", "c"],
|
|
194
|
+
["©", "c"],
|
|
195
|
+
["|)", "d"],
|
|
196
|
+
["0", "o"],
|
|
197
|
+
["3", "e"],
|
|
198
|
+
["€", "e"],
|
|
199
|
+
["|=", "f"],
|
|
200
|
+
["ph", "f"],
|
|
201
|
+
["9", "g"],
|
|
202
|
+
["#", "h"],
|
|
203
|
+
["|-|", "h"],
|
|
204
|
+
["1", "i"],
|
|
205
|
+
["!", "i"],
|
|
206
|
+
["|", "i"],
|
|
207
|
+
["_|", "j"],
|
|
208
|
+
["¿", "j"],
|
|
209
|
+
["|<", "k"],
|
|
210
|
+
["1<", "k"],
|
|
211
|
+
["7", "l"],
|
|
212
|
+
["|\\/|", "m"],
|
|
213
|
+
["/\\/\\", "m"],
|
|
214
|
+
["|\\|", "n"],
|
|
215
|
+
["//", "n"],
|
|
216
|
+
["()", "o"],
|
|
217
|
+
["|*", "p"],
|
|
218
|
+
["|o", "p"],
|
|
219
|
+
["(_,)", "q"],
|
|
220
|
+
["()_", "q"],
|
|
221
|
+
["|2", "r"],
|
|
222
|
+
["12", "r"],
|
|
223
|
+
["5", "s"],
|
|
224
|
+
["$", "s"],
|
|
225
|
+
["z", "s"],
|
|
226
|
+
["7", "t"],
|
|
227
|
+
["+", "t"],
|
|
228
|
+
["†", "t"],
|
|
229
|
+
["|_|", "u"],
|
|
230
|
+
["(_)", "u"],
|
|
231
|
+
["v", "u"],
|
|
232
|
+
["\\/", "v"],
|
|
233
|
+
["|/", "v"],
|
|
234
|
+
["\\/\\/", "w"],
|
|
235
|
+
["vv", "w"],
|
|
236
|
+
["><", "x"],
|
|
237
|
+
["}{", "x"],
|
|
238
|
+
["`/", "y"],
|
|
239
|
+
["j", "y"],
|
|
240
|
+
["2", "z"],
|
|
241
|
+
["7_", "z"],
|
|
242
|
+
]);
|
|
243
|
+
// Dynamic words added at runtime
|
|
244
|
+
this.dynamicWords = new Set();
|
|
245
|
+
this.logger = (options === null || options === void 0 ? void 0 : options.logger) || new ConsoleLogger();
|
|
246
|
+
// Validate and set configuration
|
|
247
|
+
if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
|
|
112
248
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
113
249
|
}
|
|
114
250
|
this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
|
|
115
251
|
this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
|
|
116
252
|
this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
|
|
117
|
-
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d :
|
|
118
|
-
// Load whitelist
|
|
253
|
+
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
|
|
254
|
+
// Load whitelist
|
|
119
255
|
if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
|
|
120
256
|
this.addToWhitelist(options.whitelistWords);
|
|
121
257
|
}
|
|
122
|
-
// Load
|
|
258
|
+
// Load default languages
|
|
123
259
|
this.loadLanguage("english");
|
|
124
|
-
// Load Hindi by default for backward compatibility
|
|
125
260
|
this.loadLanguage("hindi");
|
|
126
|
-
// Load
|
|
127
|
-
if (options === null || options === void 0 ? void 0 : options.languages) {
|
|
261
|
+
// Load additional languages
|
|
262
|
+
if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
|
|
128
263
|
options.languages.forEach((lang) => this.loadLanguage(lang));
|
|
129
264
|
}
|
|
130
|
-
// Load
|
|
265
|
+
// Load custom dictionaries
|
|
131
266
|
if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
|
|
132
|
-
Object.entries(options.customDictionaries).forEach(([
|
|
133
|
-
this.loadCustomDictionary(
|
|
267
|
+
Object.entries(options.customDictionaries).forEach(([name, words]) => {
|
|
268
|
+
this.loadCustomDictionary(name, words);
|
|
134
269
|
});
|
|
135
270
|
}
|
|
136
271
|
}
|
|
137
272
|
/**
|
|
138
|
-
* Normalize text by converting leet speak to regular characters
|
|
139
|
-
* @param text - Text to normalize
|
|
140
|
-
* @returns Normalized text
|
|
273
|
+
* Normalize text by converting leet speak to regular characters.
|
|
141
274
|
*/
|
|
142
275
|
normalizeLeetSpeak(text) {
|
|
143
276
|
if (!this.enableLeetSpeak)
|
|
144
277
|
return text;
|
|
145
278
|
let normalized = text.toLowerCase();
|
|
146
|
-
|
|
147
|
-
const
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
{ pattern: /\[-\]/g, replacement: "h" },
|
|
151
|
-
{ pattern: /\}{\s*/g, replacement: "h" },
|
|
152
|
-
{ pattern: /\|\/\|/g, replacement: "m" },
|
|
153
|
-
{ pattern: /\/\/\\/g, replacement: "m" },
|
|
154
|
-
{ pattern: /\|\|/g, replacement: "n" },
|
|
155
|
-
{ pattern: /\/\//g, replacement: "n" },
|
|
156
|
-
{ pattern: /\|2/g, replacement: "r" },
|
|
157
|
-
{ pattern: /12/g, replacement: "r" },
|
|
158
|
-
{ pattern: /\\\/\\\//g, replacement: "w" },
|
|
159
|
-
{ pattern: /vv/g, replacement: "w" },
|
|
160
|
-
{ pattern: /><\s*/g, replacement: "x" },
|
|
161
|
-
{ pattern: /\(_\)/g, replacement: "u" },
|
|
162
|
-
{ pattern: /\|_\|/g, replacement: "u" },
|
|
163
|
-
{ pattern: /\\\//g, replacement: "v" },
|
|
164
|
-
{ pattern: /\|\//g, replacement: "v" },
|
|
165
|
-
// Single character mappings
|
|
166
|
-
{ pattern: /@/g, replacement: "a" },
|
|
167
|
-
{ pattern: /4/g, replacement: "u" },
|
|
168
|
-
{ pattern: /\^/g, replacement: "a" },
|
|
169
|
-
{ pattern: /8/g, replacement: "b" },
|
|
170
|
-
{ pattern: /6/g, replacement: "b" },
|
|
171
|
-
{ pattern: /\(/g, replacement: "c" },
|
|
172
|
-
{ pattern: /</g, replacement: "c" },
|
|
173
|
-
{ pattern: /©/g, replacement: "c" },
|
|
174
|
-
{ pattern: /¢/g, replacement: "c" },
|
|
175
|
-
{ pattern: /0/g, replacement: "o" },
|
|
176
|
-
{ pattern: /3/g, replacement: "e" },
|
|
177
|
-
{ pattern: /€/g, replacement: "e" },
|
|
178
|
-
{ pattern: /£/g, replacement: "e" },
|
|
179
|
-
{ pattern: /9/g, replacement: "g" },
|
|
180
|
-
{ pattern: /&/g, replacement: "g" },
|
|
181
|
-
{ pattern: /#/g, replacement: "h" },
|
|
182
|
-
{ pattern: /1/g, replacement: "i" },
|
|
183
|
-
{ pattern: /!/g, replacement: "i" },
|
|
184
|
-
{ pattern: /\|/g, replacement: "i" },
|
|
185
|
-
{ pattern: /7/g, replacement: "t" },
|
|
186
|
-
{ pattern: /5/g, replacement: "s" },
|
|
187
|
-
{ pattern: /\$/g, replacement: "s" },
|
|
188
|
-
{ pattern: /\+/g, replacement: "t" },
|
|
189
|
-
{ pattern: /2/g, replacement: "z" },
|
|
190
|
-
];
|
|
191
|
-
// Apply all mappings
|
|
192
|
-
for (const mapping of leetMappings) {
|
|
193
|
-
normalized = normalized.replace(mapping.pattern, mapping.replacement);
|
|
279
|
+
const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
|
|
280
|
+
for (const [leet, normal] of sortedMappings) {
|
|
281
|
+
const regex = new RegExp(this.escapeRegex(leet), "g");
|
|
282
|
+
normalized = normalized.replace(regex, normal);
|
|
194
283
|
}
|
|
195
284
|
return normalized;
|
|
196
285
|
}
|
|
197
|
-
escapeRegex(str) {
|
|
198
|
-
if (!str || typeof str !== "string") {
|
|
199
|
-
return "";
|
|
200
|
-
}
|
|
201
|
-
return str.replace(/[\\^$.*+?()[\]{}|\-]/g, function (match) {
|
|
202
|
-
return "\\" + match;
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
286
|
/**
|
|
206
|
-
*
|
|
287
|
+
* Properly escape regex special characters
|
|
207
288
|
*/
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
// Add suffix variations
|
|
211
|
-
for (const suffix of this.commonSuffixes) {
|
|
212
|
-
variations.add(word + suffix);
|
|
213
|
-
// Handle words ending in 'e'
|
|
214
|
-
if (word.endsWith("e") && !suffix.startsWith("e")) {
|
|
215
|
-
variations.add(word.slice(0, -1) + suffix);
|
|
216
|
-
}
|
|
217
|
-
// Handle consonant doubling
|
|
218
|
-
if (word.length > 2 && /[bcdfghjklmnpqrstvwxyz]/.test(word.slice(-1))) {
|
|
219
|
-
variations.add(word + word.slice(-1) + suffix);
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
// Add prefix variations
|
|
223
|
-
for (const prefix of this.commonPrefixes) {
|
|
224
|
-
variations.add(prefix + word);
|
|
225
|
-
}
|
|
226
|
-
return Array.from(variations);
|
|
289
|
+
escapeRegex(str) {
|
|
290
|
+
return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
|
|
227
291
|
}
|
|
228
292
|
/**
|
|
229
|
-
* Check if
|
|
293
|
+
* Check if a position has word boundaries (for strict mode)
|
|
230
294
|
*/
|
|
231
295
|
hasWordBoundaries(text, start, end) {
|
|
232
296
|
if (!this.strictMode)
|
|
233
297
|
return true;
|
|
234
298
|
const beforeChar = start > 0 ? text[start - 1] : " ";
|
|
235
299
|
const afterChar = end < text.length ? text[end] : " ";
|
|
236
|
-
|
|
237
|
-
|
|
300
|
+
const wordBoundaryRegex = /[\s\p{P}\p{S}]/u;
|
|
301
|
+
return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
|
|
238
302
|
}
|
|
239
303
|
/**
|
|
240
|
-
*
|
|
304
|
+
* Helper method to verify whole-word matching.
|
|
241
305
|
*/
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
// You can enhance this based on your specific word categorization
|
|
247
|
-
const totalWords = detectedWords.length;
|
|
248
|
-
const uniqueWords = new Set(detectedWords).size;
|
|
249
|
-
if (totalWords >= 5 || uniqueWords >= 3)
|
|
250
|
-
return ProfanitySeverity.EXTREME;
|
|
251
|
-
if (totalWords >= 3 || uniqueWords >= 2)
|
|
252
|
-
return ProfanitySeverity.SEVERE;
|
|
253
|
-
if (totalWords >= 2)
|
|
254
|
-
return ProfanitySeverity.MODERATE;
|
|
255
|
-
return ProfanitySeverity.MILD;
|
|
256
|
-
}
|
|
257
|
-
/**
|
|
258
|
-
* Load a built-in language dictionary
|
|
259
|
-
* @param language - The language to load
|
|
260
|
-
* @returns boolean - True if loaded successfully, false otherwise
|
|
261
|
-
*/
|
|
262
|
-
loadLanguage(language) {
|
|
263
|
-
if (this.loadedLanguages.has(language.toLowerCase())) {
|
|
264
|
-
return true;
|
|
306
|
+
isWholeWord(text, start, end) {
|
|
307
|
+
// Check left boundary
|
|
308
|
+
if (start === 0) {
|
|
309
|
+
// ok
|
|
265
310
|
}
|
|
266
|
-
|
|
267
|
-
if (this.availableLanguages[langKey] &&
|
|
268
|
-
this.availableLanguages[langKey].length > 0) {
|
|
269
|
-
const words = this.availableLanguages[langKey];
|
|
270
|
-
// Add words and their variations to the profanity set
|
|
271
|
-
for (const word of words) {
|
|
272
|
-
if (!word || typeof word !== "string")
|
|
273
|
-
continue;
|
|
274
|
-
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
275
|
-
this.profanitySet.add(normalizedWord);
|
|
276
|
-
// Store normalized leet version mapping
|
|
277
|
-
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
278
|
-
if (leetNormalized !== normalizedWord) {
|
|
279
|
-
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
280
|
-
}
|
|
281
|
-
// Generate and add variations
|
|
282
|
-
const variations = this.generateWordVariations(normalizedWord);
|
|
283
|
-
for (const variation of variations) {
|
|
284
|
-
this.profanitySet.add(variation);
|
|
285
|
-
const leetVariation = this.normalizeLeetSpeak(variation);
|
|
286
|
-
if (leetVariation !== variation) {
|
|
287
|
-
this.normalizedProfanityMap.set(leetVariation, variation);
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
this.loadedLanguages.add(langKey);
|
|
292
|
-
console.log(`AllProfanity: Added ${words.length} ${language} words to the profanity list.`);
|
|
293
|
-
return true;
|
|
294
|
-
}
|
|
295
|
-
else {
|
|
296
|
-
console.warn(`AllProfanity: Language '${language}' not found or empty in available dictionaries.`);
|
|
311
|
+
else if (/\w/.test(text[start - 1])) {
|
|
297
312
|
return false;
|
|
298
313
|
}
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
* @param languages - Array of language names to load
|
|
303
|
-
* @returns number - Number of successfully loaded languages
|
|
304
|
-
*/
|
|
305
|
-
loadLanguages(languages) {
|
|
306
|
-
let successCount = 0;
|
|
307
|
-
languages.forEach((lang) => {
|
|
308
|
-
if (this.loadLanguage(lang)) {
|
|
309
|
-
successCount++;
|
|
310
|
-
}
|
|
311
|
-
});
|
|
312
|
-
return successCount;
|
|
313
|
-
}
|
|
314
|
-
/**
|
|
315
|
-
* Load all Indian languages at once
|
|
316
|
-
* @returns number - Number of Indian languages loaded
|
|
317
|
-
*/
|
|
318
|
-
loadIndianLanguages() {
|
|
319
|
-
const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
|
|
320
|
-
return this.loadLanguages(indianLanguages);
|
|
321
|
-
}
|
|
322
|
-
/**
|
|
323
|
-
* Load a custom dictionary with a given name
|
|
324
|
-
* @param name - Name to identify this dictionary
|
|
325
|
-
* @param words - Array of profanity words
|
|
326
|
-
*/
|
|
327
|
-
loadCustomDictionary(name, words) {
|
|
328
|
-
if (!words || words.length === 0) {
|
|
329
|
-
console.warn(`AllProfanity: Custom dictionary '${name}' has no words.`);
|
|
330
|
-
return;
|
|
314
|
+
// Check right boundary
|
|
315
|
+
if (end === text.length) {
|
|
316
|
+
// ok
|
|
331
317
|
}
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
// Process and add words
|
|
335
|
-
for (const word of words) {
|
|
336
|
-
if (!word || typeof word !== "string")
|
|
337
|
-
continue;
|
|
338
|
-
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
339
|
-
this.profanitySet.add(normalizedWord);
|
|
340
|
-
// Store normalized leet version mapping
|
|
341
|
-
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
342
|
-
if (leetNormalized !== normalizedWord) {
|
|
343
|
-
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
344
|
-
}
|
|
345
|
-
// Generate and add variations
|
|
346
|
-
const variations = this.generateWordVariations(normalizedWord);
|
|
347
|
-
for (const variation of variations) {
|
|
348
|
-
this.profanitySet.add(variation);
|
|
349
|
-
const leetVariation = this.normalizeLeetSpeak(variation);
|
|
350
|
-
if (leetVariation !== variation) {
|
|
351
|
-
this.normalizedProfanityMap.set(leetVariation, variation);
|
|
352
|
-
}
|
|
353
|
-
}
|
|
318
|
+
else if (/\w/.test(text[end])) {
|
|
319
|
+
return false;
|
|
354
320
|
}
|
|
355
|
-
|
|
356
|
-
console.log(`AllProfanity: Added ${words.length} words from custom '${name}' dictionary.`);
|
|
321
|
+
return true;
|
|
357
322
|
}
|
|
358
323
|
/**
|
|
359
|
-
*
|
|
360
|
-
* @param words - Array of words to whitelist
|
|
324
|
+
* Check if a match is whitelisted (by actual matched substring and dictionary word)
|
|
361
325
|
*/
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
326
|
+
isWhitelistedMatch(word, matchedText) {
|
|
327
|
+
if (this.caseSensitive) {
|
|
328
|
+
return this.whitelistSet.has(word) || this.whitelistSet.has(matchedText);
|
|
329
|
+
}
|
|
330
|
+
else {
|
|
331
|
+
return (this.whitelistSet.has(word.toLowerCase()) ||
|
|
332
|
+
this.whitelistSet.has(matchedText.toLowerCase()));
|
|
367
333
|
}
|
|
368
334
|
}
|
|
369
335
|
/**
|
|
370
|
-
* Remove
|
|
371
|
-
* @param words - Array of words to remove from whitelist
|
|
336
|
+
* Remove overlapping matches, keep only the longest at each start position
|
|
372
337
|
*/
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
if (
|
|
376
|
-
|
|
338
|
+
deduplicateMatches(matches) {
|
|
339
|
+
const sorted = [...matches].sort((a, b) => {
|
|
340
|
+
if (a.start !== b.start)
|
|
341
|
+
return a.start - b.start;
|
|
342
|
+
return b.end - a.end;
|
|
343
|
+
});
|
|
344
|
+
const result = [];
|
|
345
|
+
let lastEnd = -1;
|
|
346
|
+
for (const match of sorted) {
|
|
347
|
+
if (match.start >= lastEnd) {
|
|
348
|
+
result.push(match);
|
|
349
|
+
lastEnd = match.end;
|
|
377
350
|
}
|
|
378
351
|
}
|
|
352
|
+
return result;
|
|
379
353
|
}
|
|
380
354
|
/**
|
|
381
|
-
* Advanced profanity detection
|
|
382
|
-
* @param text - The text to analyze
|
|
383
|
-
* @returns ProfanityDetectionResult - Detailed detection results
|
|
355
|
+
* Advanced profanity detection using efficient trie-based algorithm
|
|
384
356
|
*/
|
|
385
357
|
detect(text) {
|
|
386
|
-
|
|
358
|
+
const validatedText = validateString(text, "text");
|
|
359
|
+
if (validatedText.length === 0) {
|
|
387
360
|
return {
|
|
388
361
|
hasProfanity: false,
|
|
389
362
|
detectedWords: [],
|
|
390
|
-
cleanedText:
|
|
363
|
+
cleanedText: validatedText,
|
|
391
364
|
severity: ProfanitySeverity.MILD,
|
|
392
365
|
positions: [],
|
|
393
366
|
};
|
|
394
367
|
}
|
|
395
|
-
const
|
|
396
|
-
const
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
const escapedWord = this.escapeRegex(profanity);
|
|
406
|
-
const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
|
|
407
|
-
let match;
|
|
408
|
-
while ((match = wordRegex.exec(normalizedText)) !== null) {
|
|
409
|
-
if (this.hasWordBoundaries(normalizedText, match.index, match.index + match[0].length)) {
|
|
410
|
-
detectedWords.push(match[0]);
|
|
411
|
-
positions.push({
|
|
412
|
-
word: match[0],
|
|
413
|
-
start: match.index,
|
|
414
|
-
end: match.index + match[0].length,
|
|
415
|
-
});
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
catch (error) {
|
|
420
|
-
// Fallback to simple string search if regex fails
|
|
421
|
-
const index = normalizedText.indexOf(profanity);
|
|
422
|
-
if (index !== -1) {
|
|
423
|
-
detectedWords.push(profanity);
|
|
424
|
-
positions.push({
|
|
425
|
-
word: profanity,
|
|
426
|
-
start: index,
|
|
427
|
-
end: index + profanity.length,
|
|
428
|
-
});
|
|
429
|
-
}
|
|
368
|
+
const matches = [];
|
|
369
|
+
const normalizedText = this.caseSensitive
|
|
370
|
+
? validatedText
|
|
371
|
+
: validatedText.toLowerCase();
|
|
372
|
+
this.findMatches(normalizedText, validatedText, matches);
|
|
373
|
+
// Leet speak detection (normalize and search, map back to original)
|
|
374
|
+
if (this.enableLeetSpeak) {
|
|
375
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
|
|
376
|
+
if (leetNormalized !== normalizedText) {
|
|
377
|
+
this.findMatches(leetNormalized, validatedText, matches);
|
|
430
378
|
}
|
|
431
379
|
}
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
380
|
+
const uniqueMatches = this.deduplicateMatches(matches);
|
|
381
|
+
const detectedWords = uniqueMatches.map((m) => m.originalWord);
|
|
382
|
+
const severity = this.calculateSeverity(uniqueMatches);
|
|
383
|
+
const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
|
|
384
|
+
return {
|
|
385
|
+
hasProfanity: uniqueMatches.length > 0,
|
|
386
|
+
detectedWords,
|
|
387
|
+
cleanedText,
|
|
388
|
+
severity,
|
|
389
|
+
positions: uniqueMatches.map((m) => ({
|
|
390
|
+
word: m.originalWord,
|
|
391
|
+
start: m.start,
|
|
392
|
+
end: m.end,
|
|
393
|
+
})),
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Main matching function, with whole-word logic.
|
|
398
|
+
*/
|
|
399
|
+
findMatches(searchText, originalText, matches) {
|
|
400
|
+
for (let i = 0; i < searchText.length; i++) {
|
|
401
|
+
const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
|
|
402
|
+
for (const match of matchResults) {
|
|
403
|
+
const start = i + match.start;
|
|
404
|
+
const end = i + match.end;
|
|
405
|
+
// Only match whole words if !detectPartialWords
|
|
406
|
+
if (!this.detectPartialWords &&
|
|
407
|
+
!this.isWholeWord(originalText, start, end)) {
|
|
436
408
|
continue;
|
|
437
|
-
try {
|
|
438
|
-
const escapedWord = this.escapeRegex(profanity);
|
|
439
|
-
const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
|
|
440
|
-
let match;
|
|
441
|
-
while ((match = wordRegex.exec(leetNormalizedText)) !== null) {
|
|
442
|
-
if (this.hasWordBoundaries(leetNormalizedText, match.index, match.index + match[0].length)) {
|
|
443
|
-
// Find the original text that corresponds to this match
|
|
444
|
-
const originalMatch = normalizedText.substring(match.index, match.index + match[0].length);
|
|
445
|
-
if (!detectedWords.includes(originalMatch)) {
|
|
446
|
-
detectedWords.push(originalMatch);
|
|
447
|
-
positions.push({
|
|
448
|
-
word: originalMatch,
|
|
449
|
-
start: match.index,
|
|
450
|
-
end: match.index + match[0].length,
|
|
451
|
-
});
|
|
452
|
-
}
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
409
|
}
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
const index = leetNormalizedText.indexOf(profanity);
|
|
460
|
-
const originalMatch = normalizedText.substring(index, index + profanity.length);
|
|
461
|
-
if (!detectedWords.includes(originalMatch)) {
|
|
462
|
-
detectedWords.push(originalMatch);
|
|
463
|
-
positions.push({
|
|
464
|
-
word: originalMatch,
|
|
465
|
-
start: index,
|
|
466
|
-
end: index + profanity.length,
|
|
467
|
-
});
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
// Partial word detection (if enabled)
|
|
474
|
-
if (this.detectPartialWords) {
|
|
475
|
-
for (const profanity of this.profanitySet) {
|
|
476
|
-
if (this.whitelistSet.has(profanity) || profanity.length < 4)
|
|
410
|
+
// Use actual matched text for whitelist check
|
|
411
|
+
const matchedText = originalText.substring(start, end);
|
|
412
|
+
if (this.isWhitelistedMatch(match.word, matchedText)) {
|
|
477
413
|
continue;
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
start: index,
|
|
487
|
-
end: index + profanity.length,
|
|
488
|
-
});
|
|
489
|
-
}
|
|
414
|
+
}
|
|
415
|
+
if (this.hasWordBoundaries(originalText, start, end)) {
|
|
416
|
+
matches.push({
|
|
417
|
+
word: match.word,
|
|
418
|
+
start,
|
|
419
|
+
end,
|
|
420
|
+
originalWord: matchedText,
|
|
421
|
+
});
|
|
490
422
|
}
|
|
491
423
|
}
|
|
492
424
|
}
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Generate cleaned text by replacing profane words (non-overlapping only)
|
|
428
|
+
*/
|
|
429
|
+
generateCleanedText(originalText, matches) {
|
|
430
|
+
if (matches.length === 0)
|
|
431
|
+
return originalText;
|
|
432
|
+
let result = originalText;
|
|
433
|
+
// Process matches in reverse order to maintain indices and avoid overlap
|
|
434
|
+
const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
|
|
435
|
+
for (const match of sortedMatches) {
|
|
436
|
+
const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
|
|
437
|
+
result =
|
|
438
|
+
result.substring(0, match.start) +
|
|
439
|
+
replacement +
|
|
440
|
+
result.substring(match.end);
|
|
507
441
|
}
|
|
508
|
-
|
|
509
|
-
return {
|
|
510
|
-
hasProfanity: detectedWords.length > 0,
|
|
511
|
-
detectedWords: [...new Set(detectedWords)],
|
|
512
|
-
cleanedText,
|
|
513
|
-
severity,
|
|
514
|
-
positions,
|
|
515
|
-
};
|
|
442
|
+
return result;
|
|
516
443
|
}
|
|
517
444
|
/**
|
|
518
|
-
*
|
|
519
|
-
* @param string - The string to check
|
|
520
|
-
* @returns boolean - True if profanity found, false otherwise
|
|
445
|
+
* Simple boolean check for profanity
|
|
521
446
|
*/
|
|
522
|
-
check(
|
|
523
|
-
return this.detect(
|
|
447
|
+
check(text) {
|
|
448
|
+
return this.detect(text).hasProfanity;
|
|
524
449
|
}
|
|
525
450
|
/**
|
|
526
|
-
* Clean
|
|
527
|
-
* @param string - The string to clean
|
|
528
|
-
* @param placeholder - Optional custom placeholder
|
|
529
|
-
* @returns string - The cleaned string
|
|
451
|
+
* Clean text with custom placeholder
|
|
530
452
|
*/
|
|
531
|
-
clean(
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
const placeholderChar = placeholder || this.defaultPlaceholder;
|
|
535
|
-
const detection = this.detect(string);
|
|
536
|
-
// If detect() already provided cleanedText and no custom placeholder, use it
|
|
537
|
-
if (!placeholder && detection.cleanedText !== string) {
|
|
453
|
+
clean(text, placeholder) {
|
|
454
|
+
const detection = this.detect(text);
|
|
455
|
+
if (!placeholder || placeholder === this.defaultPlaceholder) {
|
|
538
456
|
return detection.cleanedText;
|
|
539
457
|
}
|
|
540
|
-
//
|
|
541
|
-
let result =
|
|
542
|
-
const sortedPositions =
|
|
458
|
+
// Use custom placeholder
|
|
459
|
+
let result = text;
|
|
460
|
+
const sortedPositions = [
|
|
461
|
+
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
462
|
+
word: p.word,
|
|
463
|
+
start: p.start,
|
|
464
|
+
end: p.end,
|
|
465
|
+
originalWord: text.substring(p.start, p.end),
|
|
466
|
+
}))),
|
|
467
|
+
].sort((a, b) => b.start - a.start);
|
|
543
468
|
for (const pos of sortedPositions) {
|
|
544
|
-
const originalWord =
|
|
545
|
-
const replacement =
|
|
469
|
+
const originalWord = text.substring(pos.start, pos.end);
|
|
470
|
+
const replacement = placeholder.repeat(originalWord.length);
|
|
546
471
|
result =
|
|
547
472
|
result.substring(0, pos.start) +
|
|
548
473
|
replacement +
|
|
@@ -551,107 +476,218 @@ export class AllProfanity {
|
|
|
551
476
|
return result;
|
|
552
477
|
}
|
|
553
478
|
/**
|
|
554
|
-
* Clean
|
|
555
|
-
* @param string - The string to clean
|
|
556
|
-
* @param placeholder - The placeholder to use (defaults to '***')
|
|
557
|
-
* @returns string - The cleaned string
|
|
558
|
-
*/
|
|
559
|
-
cleanWithWord(string, placeholder = "***") {
|
|
560
|
-
if (!string || typeof string !== "string")
|
|
561
|
-
return string || "";
|
|
562
|
-
// Build a regex that matches any profane word with word boundaries, unicode-aware
|
|
563
|
-
const words = Array.from(this.profanitySet)
|
|
564
|
-
.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) // escape regex
|
|
565
|
-
.sort((a, b) => b.length - a.length); // longer words first to avoid partial matches
|
|
566
|
-
if (words.length === 0)
|
|
567
|
-
return string;
|
|
568
|
-
// Unicode safe word boundary: (?<=^|[^\p{L}\p{N}_])WORD(?=[^\p{L}\p{N}_]|$)
|
|
569
|
-
// This ensures we only match whole words, not inside other words.
|
|
570
|
-
const regex = new RegExp(`(?<=^|[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\])(` +
|
|
571
|
-
words.join("|") +
|
|
572
|
-
`)(?=[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\]|$)`, this.caseSensitive ? "gu" : "giu");
|
|
573
|
-
// Replace all matches with the placeholder.
|
|
574
|
-
return string.replace(regex, placeholder);
|
|
575
|
-
}
|
|
576
|
-
/**
|
|
577
|
-
* Get the current list of profanity words
|
|
578
|
-
* @returns string[] - Array of all profanity words
|
|
479
|
+
* Clean text by replacing each profane word with a single placeholder (word-level)
|
|
579
480
|
*/
|
|
580
|
-
|
|
581
|
-
|
|
481
|
+
cleanWithPlaceholder(text, placeholder = "***") {
|
|
482
|
+
const detection = this.detect(text);
|
|
483
|
+
if (detection.positions.length === 0)
|
|
484
|
+
return text;
|
|
485
|
+
let result = text;
|
|
486
|
+
// Sort matches so later matches don't affect earlier ones
|
|
487
|
+
const sortedPositions = [
|
|
488
|
+
...this.deduplicateMatches(detection.positions.map((p) => ({
|
|
489
|
+
word: p.word,
|
|
490
|
+
start: p.start,
|
|
491
|
+
end: p.end,
|
|
492
|
+
originalWord: text.substring(p.start, p.end),
|
|
493
|
+
}))),
|
|
494
|
+
].sort((a, b) => b.start - a.start);
|
|
495
|
+
for (const pos of sortedPositions) {
|
|
496
|
+
// Only replace whole words!
|
|
497
|
+
if (!this.isWholeWord(result, pos.start, pos.end))
|
|
498
|
+
continue;
|
|
499
|
+
result =
|
|
500
|
+
result.substring(0, pos.start) +
|
|
501
|
+
placeholder +
|
|
502
|
+
result.substring(pos.end);
|
|
503
|
+
}
|
|
504
|
+
return result;
|
|
582
505
|
}
|
|
583
506
|
/**
|
|
584
507
|
* Add word(s) to the profanity list
|
|
585
|
-
* @param word - String or array of strings to add
|
|
586
508
|
*/
|
|
587
509
|
add(word) {
|
|
588
510
|
const words = Array.isArray(word) ? word : [word];
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
this.profanitySet.add(normalizedWord);
|
|
594
|
-
// Add leet speak mapping
|
|
595
|
-
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
596
|
-
if (leetNormalized !== normalizedWord) {
|
|
597
|
-
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
598
|
-
}
|
|
599
|
-
// Add variations
|
|
600
|
-
const variations = this.generateWordVariations(normalizedWord);
|
|
601
|
-
for (const variation of variations) {
|
|
602
|
-
this.profanitySet.add(variation);
|
|
603
|
-
}
|
|
511
|
+
const validatedWords = validateStringArray(words, "words to add");
|
|
512
|
+
for (const w of validatedWords) {
|
|
513
|
+
this.dynamicWords.add(w);
|
|
514
|
+
this.addWordToTrie(w);
|
|
604
515
|
}
|
|
605
516
|
}
|
|
606
517
|
/**
|
|
607
518
|
* Remove word(s) from the profanity list
|
|
608
|
-
* @param word - String or array of strings to remove
|
|
609
519
|
*/
|
|
610
520
|
remove(word) {
|
|
611
521
|
const words = Array.isArray(word) ? word : [word];
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
continue;
|
|
522
|
+
const validatedWords = validateStringArray(words, "words to remove");
|
|
523
|
+
for (const w of validatedWords) {
|
|
615
524
|
const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
|
|
616
|
-
this.
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
525
|
+
this.profanityTrie.removeWord(normalizedWord);
|
|
526
|
+
this.dynamicWords.delete(w);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
/**
|
|
530
|
+
* Add words to whitelist
|
|
531
|
+
*/
|
|
532
|
+
addToWhitelist(words) {
|
|
533
|
+
const validatedWords = validateStringArray(words, "whitelist words");
|
|
534
|
+
for (const word of validatedWords) {
|
|
535
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
536
|
+
this.whitelistSet.add(normalizedWord);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Remove words from whitelist
|
|
541
|
+
*/
|
|
542
|
+
removeFromWhitelist(words) {
|
|
543
|
+
const validatedWords = validateStringArray(words, "whitelist words");
|
|
544
|
+
for (const word of validatedWords) {
|
|
545
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
546
|
+
this.whitelistSet.delete(normalizedWord);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Helper for whitelist checking with correct normalization
|
|
551
|
+
*/
|
|
552
|
+
isWhitelisted(word) {
|
|
553
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
554
|
+
return this.whitelistSet.has(normalizedWord);
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* Load a built-in language dictionary
|
|
558
|
+
*/
|
|
559
|
+
loadLanguage(language) {
|
|
560
|
+
if (!language || typeof language !== "string") {
|
|
561
|
+
this.logger.warn(`Invalid language parameter: ${language}`);
|
|
562
|
+
return false;
|
|
563
|
+
}
|
|
564
|
+
const langKey = language.toLowerCase().trim();
|
|
565
|
+
if (this.loadedLanguages.has(langKey)) {
|
|
566
|
+
return true;
|
|
567
|
+
}
|
|
568
|
+
const words = this.availableLanguages[langKey];
|
|
569
|
+
if (!words || words.length === 0) {
|
|
570
|
+
this.logger.warn(`Language '${language}' not found or empty`);
|
|
571
|
+
return false;
|
|
572
|
+
}
|
|
573
|
+
try {
|
|
574
|
+
let addedCount = 0;
|
|
575
|
+
for (const word of words) {
|
|
576
|
+
if (this.addWordToTrie(word)) {
|
|
577
|
+
addedCount++;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
this.loadedLanguages.add(langKey);
|
|
581
|
+
this.logger.info(`Loaded ${addedCount} words from ${language} dictionary`);
|
|
582
|
+
return true;
|
|
583
|
+
}
|
|
584
|
+
catch (error) {
|
|
585
|
+
this.logger.error(`Failed to load language ${language}: ${error}`);
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
/**
|
|
590
|
+
* Load multiple languages at once
|
|
591
|
+
*/
|
|
592
|
+
loadLanguages(languages) {
|
|
593
|
+
const validatedLanguages = validateStringArray(languages, "languages");
|
|
594
|
+
return validatedLanguages.reduce((count, lang) => {
|
|
595
|
+
return this.loadLanguage(lang) ? count + 1 : count;
|
|
596
|
+
}, 0);
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Load all Indian languages
|
|
600
|
+
*/
|
|
601
|
+
loadIndianLanguages() {
|
|
602
|
+
const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
|
|
603
|
+
return this.loadLanguages(indianLanguages);
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
* Load a custom dictionary
|
|
607
|
+
*/
|
|
608
|
+
loadCustomDictionary(name, words) {
|
|
609
|
+
validateString(name, "dictionary name");
|
|
610
|
+
const validatedWords = validateStringArray(words, "custom dictionary words");
|
|
611
|
+
if (validatedWords.length === 0) {
|
|
612
|
+
this.logger.warn(`Custom dictionary '${name}' contains no valid words`);
|
|
613
|
+
return;
|
|
614
|
+
}
|
|
615
|
+
try {
|
|
616
|
+
let addedCount = 0;
|
|
617
|
+
for (const word of validatedWords) {
|
|
618
|
+
if (this.addWordToTrie(word)) {
|
|
619
|
+
addedCount++;
|
|
620
|
+
}
|
|
621
621
|
}
|
|
622
|
+
// Store for future reference
|
|
623
|
+
this.availableLanguages[name.toLowerCase()] = validatedWords;
|
|
624
|
+
this.loadedLanguages.add(name.toLowerCase());
|
|
625
|
+
this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
|
|
626
|
+
}
|
|
627
|
+
catch (error) {
|
|
628
|
+
this.logger.error(`Failed to load custom dictionary ${name}: ${error}`);
|
|
622
629
|
}
|
|
623
630
|
}
|
|
624
631
|
/**
|
|
625
|
-
*
|
|
632
|
+
* Add a single word to the trie structure
|
|
633
|
+
*/
|
|
634
|
+
addWordToTrie(word) {
|
|
635
|
+
if (!word || typeof word !== "string" || word.trim().length === 0) {
|
|
636
|
+
return false;
|
|
637
|
+
}
|
|
638
|
+
const normalizedWord = this.caseSensitive
|
|
639
|
+
? word.trim()
|
|
640
|
+
: word.trim().toLowerCase();
|
|
641
|
+
// Skip if whitelisted
|
|
642
|
+
if (this.isWhitelisted(normalizedWord)) {
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
// Add to trie
|
|
646
|
+
this.profanityTrie.addWord(normalizedWord);
|
|
647
|
+
return true;
|
|
648
|
+
}
|
|
649
|
+
/**
|
|
650
|
+
* Remove overlapping matches, keep only the longest at each start position
|
|
651
|
+
*/
|
|
652
|
+
calculateSeverity(matches) {
|
|
653
|
+
if (matches.length === 0)
|
|
654
|
+
return ProfanitySeverity.MILD;
|
|
655
|
+
const uniqueWords = new Set(matches.map((m) => m.word)).size;
|
|
656
|
+
const totalMatches = matches.length;
|
|
657
|
+
if (totalMatches >= 5 || uniqueWords >= 4)
|
|
658
|
+
return ProfanitySeverity.EXTREME;
|
|
659
|
+
if (totalMatches >= 3 || uniqueWords >= 3)
|
|
660
|
+
return ProfanitySeverity.SEVERE;
|
|
661
|
+
if (totalMatches >= 2 || uniqueWords >= 2)
|
|
662
|
+
return ProfanitySeverity.MODERATE;
|
|
663
|
+
return ProfanitySeverity.MILD;
|
|
664
|
+
}
|
|
665
|
+
/**
|
|
666
|
+
* Clear all loaded dictionaries
|
|
626
667
|
*/
|
|
627
668
|
clearList() {
|
|
628
|
-
this.
|
|
629
|
-
this.normalizedProfanityMap.clear();
|
|
669
|
+
this.profanityTrie.clear();
|
|
630
670
|
this.loadedLanguages.clear();
|
|
671
|
+
this.dynamicWords.clear();
|
|
631
672
|
}
|
|
632
673
|
/**
|
|
633
|
-
*
|
|
634
|
-
* @param placeholder - Single character to use as placeholder
|
|
674
|
+
* Set placeholder character
|
|
635
675
|
*/
|
|
636
676
|
setPlaceholder(placeholder) {
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
}
|
|
641
|
-
else {
|
|
642
|
-
this.defaultPlaceholder = placeholder;
|
|
677
|
+
validateString(placeholder, "placeholder");
|
|
678
|
+
if (placeholder.length === 0) {
|
|
679
|
+
throw new Error("Placeholder cannot be empty");
|
|
643
680
|
}
|
|
681
|
+
this.defaultPlaceholder = placeholder.charAt(0);
|
|
644
682
|
}
|
|
645
683
|
/**
|
|
646
|
-
* Get
|
|
647
|
-
* @returns string[] - Array of loaded language names
|
|
684
|
+
* Get loaded languages
|
|
648
685
|
*/
|
|
649
686
|
getLoadedLanguages() {
|
|
650
687
|
return Array.from(this.loadedLanguages);
|
|
651
688
|
}
|
|
652
689
|
/**
|
|
653
|
-
* Get
|
|
654
|
-
* @returns string[] - Array of available language names
|
|
690
|
+
* Get available languages
|
|
655
691
|
*/
|
|
656
692
|
getAvailableLanguages() {
|
|
657
693
|
return Object.keys(this.availableLanguages);
|
|
@@ -671,17 +707,37 @@ export class AllProfanity {
|
|
|
671
707
|
};
|
|
672
708
|
}
|
|
673
709
|
/**
|
|
674
|
-
*
|
|
710
|
+
* Rebuilds the profanity trie from loaded language dictionaries and dynamic words.
|
|
711
|
+
*/
|
|
712
|
+
rebuildTrie() {
|
|
713
|
+
this.profanityTrie.clear();
|
|
714
|
+
// Re-add all loaded language words
|
|
715
|
+
for (const lang of this.loadedLanguages) {
|
|
716
|
+
const words = this.availableLanguages[lang] || [];
|
|
717
|
+
for (const word of words) {
|
|
718
|
+
this.addWordToTrie(word);
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
// Re-add dynamic words
|
|
722
|
+
for (const word of this.dynamicWords) {
|
|
723
|
+
this.addWordToTrie(word);
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
/**
|
|
727
|
+
* Update configuration. Rebuild trie if needed.
|
|
675
728
|
*/
|
|
676
729
|
updateConfig(options) {
|
|
730
|
+
let rebuildNeeded = false;
|
|
677
731
|
if (options.defaultPlaceholder !== undefined) {
|
|
678
732
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
679
733
|
}
|
|
680
734
|
if (options.enableLeetSpeak !== undefined) {
|
|
681
735
|
this.enableLeetSpeak = options.enableLeetSpeak;
|
|
682
736
|
}
|
|
683
|
-
if (options.caseSensitive !== undefined
|
|
737
|
+
if (options.caseSensitive !== undefined &&
|
|
738
|
+
options.caseSensitive !== this.caseSensitive) {
|
|
684
739
|
this.caseSensitive = options.caseSensitive;
|
|
740
|
+
rebuildNeeded = true;
|
|
685
741
|
}
|
|
686
742
|
if (options.strictMode !== undefined) {
|
|
687
743
|
this.strictMode = options.strictMode;
|
|
@@ -692,9 +748,12 @@ export class AllProfanity {
|
|
|
692
748
|
if (options.whitelistWords) {
|
|
693
749
|
this.addToWhitelist(options.whitelistWords);
|
|
694
750
|
}
|
|
751
|
+
if (rebuildNeeded) {
|
|
752
|
+
this.rebuildTrie();
|
|
753
|
+
}
|
|
695
754
|
}
|
|
696
755
|
}
|
|
697
|
-
// Create and export a singleton instance
|
|
756
|
+
// Create and export a singleton instance
|
|
698
757
|
const allProfanity = new AllProfanity();
|
|
699
758
|
export default allProfanity;
|
|
700
759
|
//# sourceMappingURL=index.js.map
|