allprofanity 1.0.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +129 -85
- package/dist/index.d.ts +100 -18
- package/dist/index.js +546 -99
- package/dist/index.js.map +1 -1
- package/dist/languages/bengali-words.js +0 -1
- package/dist/languages/bengali-words.js.map +1 -1
- package/dist/languages/english-words.d.ts +2 -0
- package/dist/languages/english-words.js +256 -0
- package/dist/languages/english-words.js.map +1 -0
- package/package.json +2 -5
package/dist/index.js
CHANGED
|
@@ -1,22 +1,33 @@
|
|
|
1
|
-
import
|
|
2
|
-
import hindiBadWords from "./languages/hindi-words";
|
|
3
|
-
import frenchBadWords from "./languages/french-words";
|
|
4
|
-
import germanBadWords from "./languages/german-words";
|
|
5
|
-
import spanishBadWords from "./languages/spanish-words";
|
|
6
|
-
import bengaliBadWords from "./languages/bengali-words";
|
|
7
|
-
import tamilBadWords from "./languages/tamil-words";
|
|
8
|
-
import teluguBadWords from "./languages/telugu-words";
|
|
1
|
+
import englishBadWords from "./languages/english-words.js";
|
|
2
|
+
import hindiBadWords from "./languages/hindi-words.js";
|
|
3
|
+
import frenchBadWords from "./languages/french-words.js";
|
|
4
|
+
import germanBadWords from "./languages/german-words.js";
|
|
5
|
+
import spanishBadWords from "./languages/spanish-words.js";
|
|
6
|
+
import bengaliBadWords from "./languages/bengali-words.js";
|
|
7
|
+
import tamilBadWords from "./languages/tamil-words.js";
|
|
8
|
+
import teluguBadWords from "./languages/telugu-words.js";
|
|
9
9
|
// Export language dictionaries for direct access
|
|
10
|
-
export { default as
|
|
11
|
-
export { default as
|
|
12
|
-
export { default as
|
|
13
|
-
export { default as
|
|
14
|
-
export { default as
|
|
15
|
-
export { default as
|
|
16
|
-
export { default as
|
|
10
|
+
export { default as englishBadWords } from "./languages/english-words.js";
|
|
11
|
+
export { default as hindiBadWords } from "./languages/hindi-words.js";
|
|
12
|
+
export { default as frenchBadWords } from "./languages/french-words.js";
|
|
13
|
+
export { default as germanBadWords } from "./languages/german-words.js";
|
|
14
|
+
export { default as spanishBadWords } from "./languages/spanish-words.js";
|
|
15
|
+
export { default as bengaliBadWords } from "./languages/bengali-words.js";
|
|
16
|
+
export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
17
|
+
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
17
18
|
/**
|
|
18
|
-
*
|
|
19
|
-
|
|
19
|
+
* Severity levels for profanity detection
|
|
20
|
+
*/
|
|
21
|
+
export var ProfanitySeverity;
|
|
22
|
+
(function (ProfanitySeverity) {
|
|
23
|
+
ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
|
|
24
|
+
ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
|
|
25
|
+
ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
|
|
26
|
+
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
|
|
27
|
+
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
|
|
28
|
+
/**
|
|
29
|
+
* Advanced AllProfanity - Custom profanity filter with multi-language support and leet speak detection
|
|
30
|
+
* No external dependencies - built from scratch for maximum performance and control
|
|
20
31
|
*/
|
|
21
32
|
export class AllProfanity {
|
|
22
33
|
/**
|
|
@@ -24,25 +35,92 @@ export class AllProfanity {
|
|
|
24
35
|
* @param options - Configuration options
|
|
25
36
|
*/
|
|
26
37
|
constructor(options) {
|
|
38
|
+
var _a, _b, _c, _d;
|
|
39
|
+
this.profanitySet = new Set();
|
|
40
|
+
this.normalizedProfanityMap = new Map();
|
|
27
41
|
this.defaultPlaceholder = "*";
|
|
28
42
|
this.loadedLanguages = new Set();
|
|
43
|
+
this.whitelistSet = new Set();
|
|
44
|
+
this.enableLeetSpeak = true;
|
|
45
|
+
this.caseSensitive = false;
|
|
46
|
+
this.strictMode = false;
|
|
47
|
+
this.detectPartialWords = true;
|
|
48
|
+
// Comprehensive leet speak mapping
|
|
49
|
+
this.leetMap = {
|
|
50
|
+
a: ["4", "@", "^", "aye", "λ", "ª"],
|
|
51
|
+
b: ["8", "6", "|3", "ß", "β", "13"],
|
|
52
|
+
c: ["(", "<", "©", "¢", "see", "sea"],
|
|
53
|
+
d: ["|)", "|]", "0", "ð"],
|
|
54
|
+
e: ["3", "€", "£", "ë", "é", "è"],
|
|
55
|
+
f: ["|=", "ph", "|#", "ƒ"],
|
|
56
|
+
g: ["9", "6", "&", "gee"],
|
|
57
|
+
h: ["#", "|-|", "[-]", "}{", "ħ"],
|
|
58
|
+
i: ["1", "!", "|", "eye", "ï", "í", "ì"],
|
|
59
|
+
j: ["_|", "_/", "¿", "ĵ"],
|
|
60
|
+
k: ["|<", "1<", "l<", "|{", "ķ"],
|
|
61
|
+
l: ["1", "|", "7", "£", "ł", "ĺ"],
|
|
62
|
+
m: ["|/|", "//\\", "em", "ɱ"],
|
|
63
|
+
n: ["||", "//", "and", "ñ", "ń"],
|
|
64
|
+
o: ["0", "()", "oh", "ø", "ó", "ò", "ô"],
|
|
65
|
+
p: ["|*", "|o", "|^", "|>", "9", "þ"],
|
|
66
|
+
q: ["(_,)", "()_", "kw", "ĸ"],
|
|
67
|
+
r: ["|2", "12", ".-", "are", "ř", "ŕ"],
|
|
68
|
+
s: ["5", "$", "z", "ş", "ś", "š"],
|
|
69
|
+
t: ["7", "+", "-|-", "†", "ť", "ţ"],
|
|
70
|
+
u: ["(_)", "|_|", "v", "you", "ü", "ú", "ù"],
|
|
71
|
+
v: ["\\/", "|/", "|", "vee"],
|
|
72
|
+
w: ["\\/\\/", "vv", "dubya", "ŵ"],
|
|
73
|
+
x: ["><", "}{", "ecks", "χ"],
|
|
74
|
+
y: ["`/", "j", "why", "ÿ", "ý"],
|
|
75
|
+
z: ["2", "7_", "-/_", "zee", "ž", "ź", "ż"],
|
|
76
|
+
};
|
|
77
|
+
// Word boundary patterns
|
|
78
|
+
this.wordBoundaryChars = /[\s\.,;:!?\-_+=\[\]{}()"'\/\\]/;
|
|
79
|
+
// Common word variations and suffixes
|
|
80
|
+
this.commonSuffixes = [
|
|
81
|
+
"ing",
|
|
82
|
+
"ed",
|
|
83
|
+
"s",
|
|
84
|
+
"er",
|
|
85
|
+
"ers",
|
|
86
|
+
"est",
|
|
87
|
+
"ly",
|
|
88
|
+
"tion",
|
|
89
|
+
"ness",
|
|
90
|
+
];
|
|
91
|
+
this.commonPrefixes = [
|
|
92
|
+
"un",
|
|
93
|
+
"re",
|
|
94
|
+
"pre",
|
|
95
|
+
"dis",
|
|
96
|
+
"over",
|
|
97
|
+
"under",
|
|
98
|
+
"out",
|
|
99
|
+
];
|
|
29
100
|
this.availableLanguages = {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
101
|
+
english: englishBadWords || [],
|
|
102
|
+
hindi: hindiBadWords || [],
|
|
103
|
+
french: frenchBadWords || [],
|
|
104
|
+
german: germanBadWords || [],
|
|
105
|
+
spanish: spanishBadWords || [],
|
|
106
|
+
bengali: bengaliBadWords || [],
|
|
107
|
+
tamil: tamilBadWords || [],
|
|
108
|
+
telugu: teluguBadWords || [],
|
|
38
109
|
};
|
|
39
|
-
|
|
40
|
-
// Set custom placeholder if provided
|
|
110
|
+
// Set configuration options
|
|
41
111
|
if (options === null || options === void 0 ? void 0 : options.defaultPlaceholder) {
|
|
42
112
|
this.setPlaceholder(options.defaultPlaceholder);
|
|
43
113
|
}
|
|
44
|
-
|
|
45
|
-
this.
|
|
114
|
+
this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
|
|
115
|
+
this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
|
|
116
|
+
this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
|
|
117
|
+
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : true;
|
|
118
|
+
// Load whitelist if provided
|
|
119
|
+
if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
|
|
120
|
+
this.addToWhitelist(options.whitelistWords);
|
|
121
|
+
}
|
|
122
|
+
// Load the default English dictionary
|
|
123
|
+
this.loadLanguage("english");
|
|
46
124
|
// Load Hindi by default for backward compatibility
|
|
47
125
|
this.loadLanguage("hindi");
|
|
48
126
|
// Load any additional languages specified in options
|
|
@@ -56,25 +134,166 @@ export class AllProfanity {
|
|
|
56
134
|
});
|
|
57
135
|
}
|
|
58
136
|
}
|
|
137
|
+
/**
|
|
138
|
+
* Normalize text by converting leet speak to regular characters
|
|
139
|
+
* @param text - Text to normalize
|
|
140
|
+
* @returns Normalized text
|
|
141
|
+
*/
|
|
142
|
+
normalizeLeetSpeak(text) {
|
|
143
|
+
if (!this.enableLeetSpeak)
|
|
144
|
+
return text;
|
|
145
|
+
let normalized = text.toLowerCase();
|
|
146
|
+
// Define comprehensive leet mappings
|
|
147
|
+
const leetMappings = [
|
|
148
|
+
// Multi-character first
|
|
149
|
+
{ pattern: /\|-\|/g, replacement: "h" },
|
|
150
|
+
{ pattern: /\[-\]/g, replacement: "h" },
|
|
151
|
+
{ pattern: /\}{\s*/g, replacement: "h" },
|
|
152
|
+
{ pattern: /\|\/\|/g, replacement: "m" },
|
|
153
|
+
{ pattern: /\/\/\\/g, replacement: "m" },
|
|
154
|
+
{ pattern: /\|\|/g, replacement: "n" },
|
|
155
|
+
{ pattern: /\/\//g, replacement: "n" },
|
|
156
|
+
{ pattern: /\|2/g, replacement: "r" },
|
|
157
|
+
{ pattern: /12/g, replacement: "r" },
|
|
158
|
+
{ pattern: /\\\/\\\//g, replacement: "w" },
|
|
159
|
+
{ pattern: /vv/g, replacement: "w" },
|
|
160
|
+
{ pattern: /><\s*/g, replacement: "x" },
|
|
161
|
+
{ pattern: /\(_\)/g, replacement: "u" },
|
|
162
|
+
{ pattern: /\|_\|/g, replacement: "u" },
|
|
163
|
+
{ pattern: /\\\//g, replacement: "v" },
|
|
164
|
+
{ pattern: /\|\//g, replacement: "v" },
|
|
165
|
+
// Single character mappings
|
|
166
|
+
{ pattern: /@/g, replacement: "a" },
|
|
167
|
+
{ pattern: /4/g, replacement: "u" },
|
|
168
|
+
{ pattern: /\^/g, replacement: "a" },
|
|
169
|
+
{ pattern: /8/g, replacement: "b" },
|
|
170
|
+
{ pattern: /6/g, replacement: "b" },
|
|
171
|
+
{ pattern: /\(/g, replacement: "c" },
|
|
172
|
+
{ pattern: /</g, replacement: "c" },
|
|
173
|
+
{ pattern: /©/g, replacement: "c" },
|
|
174
|
+
{ pattern: /¢/g, replacement: "c" },
|
|
175
|
+
{ pattern: /0/g, replacement: "o" },
|
|
176
|
+
{ pattern: /3/g, replacement: "e" },
|
|
177
|
+
{ pattern: /€/g, replacement: "e" },
|
|
178
|
+
{ pattern: /£/g, replacement: "e" },
|
|
179
|
+
{ pattern: /9/g, replacement: "g" },
|
|
180
|
+
{ pattern: /&/g, replacement: "g" },
|
|
181
|
+
{ pattern: /#/g, replacement: "h" },
|
|
182
|
+
{ pattern: /1/g, replacement: "i" },
|
|
183
|
+
{ pattern: /!/g, replacement: "i" },
|
|
184
|
+
{ pattern: /\|/g, replacement: "i" },
|
|
185
|
+
{ pattern: /7/g, replacement: "t" },
|
|
186
|
+
{ pattern: /5/g, replacement: "s" },
|
|
187
|
+
{ pattern: /\$/g, replacement: "s" },
|
|
188
|
+
{ pattern: /\+/g, replacement: "t" },
|
|
189
|
+
{ pattern: /2/g, replacement: "z" },
|
|
190
|
+
];
|
|
191
|
+
// Apply all mappings
|
|
192
|
+
for (const mapping of leetMappings) {
|
|
193
|
+
normalized = normalized.replace(mapping.pattern, mapping.replacement);
|
|
194
|
+
}
|
|
195
|
+
return normalized;
|
|
196
|
+
}
|
|
197
|
+
escapeRegex(str) {
|
|
198
|
+
if (!str || typeof str !== "string") {
|
|
199
|
+
return "";
|
|
200
|
+
}
|
|
201
|
+
return str.replace(/[\\^$.*+?()[\]{}|\-]/g, function (match) {
|
|
202
|
+
return "\\" + match;
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Generate word variations with common prefixes and suffixes
|
|
207
|
+
*/
|
|
208
|
+
generateWordVariations(word) {
|
|
209
|
+
const variations = new Set([word]);
|
|
210
|
+
// Add suffix variations
|
|
211
|
+
for (const suffix of this.commonSuffixes) {
|
|
212
|
+
variations.add(word + suffix);
|
|
213
|
+
// Handle words ending in 'e'
|
|
214
|
+
if (word.endsWith("e") && !suffix.startsWith("e")) {
|
|
215
|
+
variations.add(word.slice(0, -1) + suffix);
|
|
216
|
+
}
|
|
217
|
+
// Handle consonant doubling
|
|
218
|
+
if (word.length > 2 && /[bcdfghjklmnpqrstvwxyz]/.test(word.slice(-1))) {
|
|
219
|
+
variations.add(word + word.slice(-1) + suffix);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// Add prefix variations
|
|
223
|
+
for (const prefix of this.commonPrefixes) {
|
|
224
|
+
variations.add(prefix + word);
|
|
225
|
+
}
|
|
226
|
+
return Array.from(variations);
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Check if text contains word boundaries around a match
|
|
230
|
+
*/
|
|
231
|
+
hasWordBoundaries(text, start, end) {
|
|
232
|
+
if (!this.strictMode)
|
|
233
|
+
return true;
|
|
234
|
+
const beforeChar = start > 0 ? text[start - 1] : " ";
|
|
235
|
+
const afterChar = end < text.length ? text[end] : " ";
|
|
236
|
+
return (this.wordBoundaryChars.test(beforeChar) &&
|
|
237
|
+
this.wordBoundaryChars.test(afterChar));
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Calculate severity based on detected words
|
|
241
|
+
*/
|
|
242
|
+
calculateSeverity(detectedWords) {
|
|
243
|
+
if (detectedWords.length === 0)
|
|
244
|
+
return ProfanitySeverity.MILD;
|
|
245
|
+
// This is a simplified severity calculation
|
|
246
|
+
// You can enhance this based on your specific word categorization
|
|
247
|
+
const totalWords = detectedWords.length;
|
|
248
|
+
const uniqueWords = new Set(detectedWords).size;
|
|
249
|
+
if (totalWords >= 5 || uniqueWords >= 3)
|
|
250
|
+
return ProfanitySeverity.EXTREME;
|
|
251
|
+
if (totalWords >= 3 || uniqueWords >= 2)
|
|
252
|
+
return ProfanitySeverity.SEVERE;
|
|
253
|
+
if (totalWords >= 2)
|
|
254
|
+
return ProfanitySeverity.MODERATE;
|
|
255
|
+
return ProfanitySeverity.MILD;
|
|
256
|
+
}
|
|
59
257
|
/**
|
|
60
258
|
* Load a built-in language dictionary
|
|
61
259
|
* @param language - The language to load
|
|
62
260
|
* @returns boolean - True if loaded successfully, false otherwise
|
|
63
261
|
*/
|
|
64
262
|
loadLanguage(language) {
|
|
65
|
-
// Skip if already loaded
|
|
66
263
|
if (this.loadedLanguages.has(language.toLowerCase())) {
|
|
67
264
|
return true;
|
|
68
265
|
}
|
|
69
266
|
const langKey = language.toLowerCase();
|
|
70
|
-
if (this.availableLanguages[langKey]
|
|
71
|
-
this.
|
|
267
|
+
if (this.availableLanguages[langKey] &&
|
|
268
|
+
this.availableLanguages[langKey].length > 0) {
|
|
269
|
+
const words = this.availableLanguages[langKey];
|
|
270
|
+
// Add words and their variations to the profanity set
|
|
271
|
+
for (const word of words) {
|
|
272
|
+
if (!word || typeof word !== "string")
|
|
273
|
+
continue;
|
|
274
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
275
|
+
this.profanitySet.add(normalizedWord);
|
|
276
|
+
// Store normalized leet version mapping
|
|
277
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
278
|
+
if (leetNormalized !== normalizedWord) {
|
|
279
|
+
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
280
|
+
}
|
|
281
|
+
// Generate and add variations
|
|
282
|
+
const variations = this.generateWordVariations(normalizedWord);
|
|
283
|
+
for (const variation of variations) {
|
|
284
|
+
this.profanitySet.add(variation);
|
|
285
|
+
const leetVariation = this.normalizeLeetSpeak(variation);
|
|
286
|
+
if (leetVariation !== variation) {
|
|
287
|
+
this.normalizedProfanityMap.set(leetVariation, variation);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
72
291
|
this.loadedLanguages.add(langKey);
|
|
73
|
-
console.log(`AllProfanity: Added ${
|
|
292
|
+
console.log(`AllProfanity: Added ${words.length} ${language} words to the profanity list.`);
|
|
74
293
|
return true;
|
|
75
294
|
}
|
|
76
295
|
else {
|
|
77
|
-
console.warn(`AllProfanity: Language '${language}' not found in available dictionaries.`);
|
|
296
|
+
console.warn(`AllProfanity: Language '${language}' not found or empty in available dictionaries.`);
|
|
78
297
|
return false;
|
|
79
298
|
}
|
|
80
299
|
}
|
|
@@ -112,66 +331,224 @@ export class AllProfanity {
|
|
|
112
331
|
}
|
|
113
332
|
// Add to available languages for future reference
|
|
114
333
|
this.availableLanguages[name.toLowerCase()] = words;
|
|
115
|
-
//
|
|
116
|
-
|
|
334
|
+
// Process and add words
|
|
335
|
+
for (const word of words) {
|
|
336
|
+
if (!word || typeof word !== "string")
|
|
337
|
+
continue;
|
|
338
|
+
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
|
|
339
|
+
this.profanitySet.add(normalizedWord);
|
|
340
|
+
// Store normalized leet version mapping
|
|
341
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
342
|
+
if (leetNormalized !== normalizedWord) {
|
|
343
|
+
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
344
|
+
}
|
|
345
|
+
// Generate and add variations
|
|
346
|
+
const variations = this.generateWordVariations(normalizedWord);
|
|
347
|
+
for (const variation of variations) {
|
|
348
|
+
this.profanitySet.add(variation);
|
|
349
|
+
const leetVariation = this.normalizeLeetSpeak(variation);
|
|
350
|
+
if (leetVariation !== variation) {
|
|
351
|
+
this.normalizedProfanityMap.set(leetVariation, variation);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
117
355
|
this.loadedLanguages.add(name.toLowerCase());
|
|
118
356
|
console.log(`AllProfanity: Added ${words.length} words from custom '${name}' dictionary.`);
|
|
119
357
|
}
|
|
120
358
|
/**
|
|
121
|
-
*
|
|
122
|
-
* @
|
|
359
|
+
* Add words to whitelist (words that should never be flagged as profanity)
|
|
360
|
+
* @param words - Array of words to whitelist
|
|
123
361
|
*/
|
|
124
|
-
|
|
125
|
-
|
|
362
|
+
addToWhitelist(words) {
|
|
363
|
+
for (const word of words) {
|
|
364
|
+
if (word && typeof word === "string") {
|
|
365
|
+
this.whitelistSet.add(this.caseSensitive ? word : word.toLowerCase());
|
|
366
|
+
}
|
|
367
|
+
}
|
|
126
368
|
}
|
|
127
369
|
/**
|
|
128
|
-
*
|
|
129
|
-
* @
|
|
370
|
+
* Remove words from whitelist
|
|
371
|
+
* @param words - Array of words to remove from whitelist
|
|
130
372
|
*/
|
|
131
|
-
|
|
132
|
-
|
|
373
|
+
removeFromWhitelist(words) {
|
|
374
|
+
for (const word of words) {
|
|
375
|
+
if (word && typeof word === "string") {
|
|
376
|
+
this.whitelistSet.delete(this.caseSensitive ? word : word.toLowerCase());
|
|
377
|
+
}
|
|
378
|
+
}
|
|
133
379
|
}
|
|
134
380
|
/**
|
|
135
|
-
*
|
|
381
|
+
* Advanced profanity detection with detailed results
|
|
382
|
+
* @param text - The text to analyze
|
|
383
|
+
* @returns ProfanityDetectionResult - Detailed detection results
|
|
384
|
+
*/
|
|
385
|
+
detect(text) {
|
|
386
|
+
if (!text || typeof text !== "string") {
|
|
387
|
+
return {
|
|
388
|
+
hasProfanity: false,
|
|
389
|
+
detectedWords: [],
|
|
390
|
+
cleanedText: text || "",
|
|
391
|
+
severity: ProfanitySeverity.MILD,
|
|
392
|
+
positions: [],
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
const normalizedText = this.caseSensitive ? text : text.toLowerCase();
|
|
396
|
+
const leetNormalizedText = this.normalizeLeetSpeak(normalizedText);
|
|
397
|
+
const detectedWords = [];
|
|
398
|
+
const positions = [];
|
|
399
|
+
// Check for whole word matches first
|
|
400
|
+
for (const profanity of this.profanitySet) {
|
|
401
|
+
if (this.whitelistSet.has(profanity))
|
|
402
|
+
continue;
|
|
403
|
+
try {
|
|
404
|
+
// Create regex for word boundary detection
|
|
405
|
+
const escapedWord = this.escapeRegex(profanity);
|
|
406
|
+
const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
|
|
407
|
+
let match;
|
|
408
|
+
while ((match = wordRegex.exec(normalizedText)) !== null) {
|
|
409
|
+
if (this.hasWordBoundaries(normalizedText, match.index, match.index + match[0].length)) {
|
|
410
|
+
detectedWords.push(match[0]);
|
|
411
|
+
positions.push({
|
|
412
|
+
word: match[0],
|
|
413
|
+
start: match.index,
|
|
414
|
+
end: match.index + match[0].length,
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
catch (error) {
|
|
420
|
+
// Fallback to simple string search if regex fails
|
|
421
|
+
const index = normalizedText.indexOf(profanity);
|
|
422
|
+
if (index !== -1) {
|
|
423
|
+
detectedWords.push(profanity);
|
|
424
|
+
positions.push({
|
|
425
|
+
word: profanity,
|
|
426
|
+
start: index,
|
|
427
|
+
end: index + profanity.length,
|
|
428
|
+
});
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
// Check leet speak normalized text
|
|
433
|
+
if (this.enableLeetSpeak && leetNormalizedText !== normalizedText) {
|
|
434
|
+
for (const profanity of this.profanitySet) {
|
|
435
|
+
if (this.whitelistSet.has(profanity))
|
|
436
|
+
continue;
|
|
437
|
+
try {
|
|
438
|
+
const escapedWord = this.escapeRegex(profanity);
|
|
439
|
+
const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
|
|
440
|
+
let match;
|
|
441
|
+
while ((match = wordRegex.exec(leetNormalizedText)) !== null) {
|
|
442
|
+
if (this.hasWordBoundaries(leetNormalizedText, match.index, match.index + match[0].length)) {
|
|
443
|
+
// Find the original text that corresponds to this match
|
|
444
|
+
const originalMatch = normalizedText.substring(match.index, match.index + match[0].length);
|
|
445
|
+
if (!detectedWords.includes(originalMatch)) {
|
|
446
|
+
detectedWords.push(originalMatch);
|
|
447
|
+
positions.push({
|
|
448
|
+
word: originalMatch,
|
|
449
|
+
start: match.index,
|
|
450
|
+
end: match.index + match[0].length,
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
catch (error) {
|
|
457
|
+
// Fallback to simple string search
|
|
458
|
+
if (leetNormalizedText.includes(profanity)) {
|
|
459
|
+
const index = leetNormalizedText.indexOf(profanity);
|
|
460
|
+
const originalMatch = normalizedText.substring(index, index + profanity.length);
|
|
461
|
+
if (!detectedWords.includes(originalMatch)) {
|
|
462
|
+
detectedWords.push(originalMatch);
|
|
463
|
+
positions.push({
|
|
464
|
+
word: originalMatch,
|
|
465
|
+
start: index,
|
|
466
|
+
end: index + profanity.length,
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
// Partial word detection (if enabled)
|
|
474
|
+
if (this.detectPartialWords) {
|
|
475
|
+
for (const profanity of this.profanitySet) {
|
|
476
|
+
if (this.whitelistSet.has(profanity) || profanity.length < 4)
|
|
477
|
+
continue;
|
|
478
|
+
if (normalizedText.includes(profanity) ||
|
|
479
|
+
leetNormalizedText.includes(profanity)) {
|
|
480
|
+
const index = normalizedText.indexOf(profanity);
|
|
481
|
+
if (index !== -1 &&
|
|
482
|
+
!detectedWords.some((w) => normalizedText.indexOf(w) === index)) {
|
|
483
|
+
detectedWords.push(profanity);
|
|
484
|
+
positions.push({
|
|
485
|
+
word: profanity,
|
|
486
|
+
start: index,
|
|
487
|
+
end: index + profanity.length,
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
// REMOVED: cleanedText = this.clean(text) - this was causing circular dependency
|
|
494
|
+
// We'll generate the cleaned text directly here instead
|
|
495
|
+
let cleanedText = text;
|
|
496
|
+
if (detectedWords.length > 0) {
|
|
497
|
+
// Sort positions by start index in descending order to avoid index shifting
|
|
498
|
+
const sortedPositions = positions.sort((a, b) => b.start - a.start);
|
|
499
|
+
for (const pos of sortedPositions) {
|
|
500
|
+
const originalWord = text.substring(pos.start, pos.end);
|
|
501
|
+
const replacement = this.defaultPlaceholder.repeat(originalWord.length);
|
|
502
|
+
cleanedText =
|
|
503
|
+
cleanedText.substring(0, pos.start) +
|
|
504
|
+
replacement +
|
|
505
|
+
cleanedText.substring(pos.end);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
const severity = this.calculateSeverity(detectedWords);
|
|
509
|
+
return {
|
|
510
|
+
hasProfanity: detectedWords.length > 0,
|
|
511
|
+
detectedWords: [...new Set(detectedWords)],
|
|
512
|
+
cleanedText,
|
|
513
|
+
severity,
|
|
514
|
+
positions,
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Check if a string contains profanity (simple boolean check)
|
|
136
519
|
* @param string - The string to check
|
|
137
520
|
* @returns boolean - True if profanity found, false otherwise
|
|
138
521
|
*/
|
|
139
522
|
check(string) {
|
|
140
|
-
return this.
|
|
523
|
+
return this.detect(string).hasProfanity;
|
|
141
524
|
}
|
|
142
525
|
/**
|
|
143
526
|
* Clean a string by replacing profanities with placeholders
|
|
144
527
|
* @param string - The string to clean
|
|
145
|
-
* @param placeholder - Optional custom placeholder
|
|
528
|
+
* @param placeholder - Optional custom placeholder
|
|
146
529
|
* @returns string - The cleaned string
|
|
147
530
|
*/
|
|
148
531
|
clean(string, placeholder) {
|
|
149
|
-
|
|
150
|
-
|
|
532
|
+
if (!string || typeof string !== "string")
|
|
533
|
+
return string || "";
|
|
534
|
+
const placeholderChar = placeholder || this.defaultPlaceholder;
|
|
535
|
+
const detection = this.detect(string);
|
|
536
|
+
// If detect() already provided cleanedText and no custom placeholder, use it
|
|
537
|
+
if (!placeholder && detection.cleanedText !== string) {
|
|
538
|
+
return detection.cleanedText;
|
|
539
|
+
}
|
|
540
|
+
// Otherwise, build cleaned text with custom placeholder
|
|
151
541
|
let result = string;
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
];
|
|
161
|
-
for (const variation of variations) {
|
|
162
|
-
if (result.toLowerCase().includes(variation.toLowerCase())) {
|
|
163
|
-
const prefix = word;
|
|
164
|
-
const suffix = variation.slice(word.length);
|
|
165
|
-
const replacement = (placeholder || this.defaultPlaceholder).repeat(prefix.length) +
|
|
166
|
-
suffix;
|
|
167
|
-
// Use regex to replace while preserving case (though this simplifies it)
|
|
168
|
-
const regex = new RegExp(variation, "gi");
|
|
169
|
-
result = result.replace(regex, replacement);
|
|
170
|
-
}
|
|
171
|
-
}
|
|
542
|
+
const sortedPositions = detection.positions.sort((a, b) => b.start - a.start);
|
|
543
|
+
for (const pos of sortedPositions) {
|
|
544
|
+
const originalWord = string.substring(pos.start, pos.end);
|
|
545
|
+
const replacement = placeholderChar.repeat(originalWord.length);
|
|
546
|
+
result =
|
|
547
|
+
result.substring(0, pos.start) +
|
|
548
|
+
replacement +
|
|
549
|
+
result.substring(pos.end);
|
|
172
550
|
}
|
|
173
|
-
|
|
174
|
-
return this.filter.clean(result, placeholder || this.defaultPlaceholder);
|
|
551
|
+
return result;
|
|
175
552
|
}
|
|
176
553
|
/**
|
|
177
554
|
* Clean a string by replacing each profane word with a single placeholder
|
|
@@ -180,58 +557,77 @@ export class AllProfanity {
|
|
|
180
557
|
* @returns string - The cleaned string
|
|
181
558
|
*/
|
|
182
559
|
cleanWithWord(string, placeholder = "***") {
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
const words =
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
cleaned = cleaned.replace(/ ([^\w\s])$/g, "$1"); // Fix trailing punctuation
|
|
199
|
-
return cleaned;
|
|
560
|
+
if (!string || typeof string !== "string")
|
|
561
|
+
return string || "";
|
|
562
|
+
// Build a regex that matches any profane word with word boundaries, unicode-aware
|
|
563
|
+
const words = Array.from(this.profanitySet)
|
|
564
|
+
.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) // escape regex
|
|
565
|
+
.sort((a, b) => b.length - a.length); // longer words first to avoid partial matches
|
|
566
|
+
if (words.length === 0)
|
|
567
|
+
return string;
|
|
568
|
+
// Unicode safe word boundary: (?<=^|[^\p{L}\p{N}_])WORD(?=[^\p{L}\p{N}_]|$)
|
|
569
|
+
// This ensures we only match whole words, not inside other words.
|
|
570
|
+
const regex = new RegExp(`(?<=^|[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\])(` +
|
|
571
|
+
words.join("|") +
|
|
572
|
+
`)(?=[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\]|$)`, this.caseSensitive ? "gu" : "giu");
|
|
573
|
+
// Replace all matches with the placeholder.
|
|
574
|
+
return string.replace(regex, placeholder);
|
|
200
575
|
}
|
|
201
576
|
/**
|
|
202
577
|
* Get the current list of profanity words
|
|
203
578
|
* @returns string[] - Array of all profanity words
|
|
204
579
|
*/
|
|
205
580
|
list() {
|
|
206
|
-
return this.
|
|
581
|
+
return Array.from(this.profanitySet);
|
|
207
582
|
}
|
|
208
583
|
/**
|
|
209
584
|
* Add word(s) to the profanity list
|
|
210
585
|
* @param word - String or array of strings to add
|
|
211
586
|
*/
|
|
212
587
|
add(word) {
|
|
213
|
-
|
|
588
|
+
const words = Array.isArray(word) ? word : [word];
|
|
589
|
+
for (const w of words) {
|
|
590
|
+
if (!w || typeof w !== "string")
|
|
591
|
+
continue;
|
|
592
|
+
const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
|
|
593
|
+
this.profanitySet.add(normalizedWord);
|
|
594
|
+
// Add leet speak mapping
|
|
595
|
+
const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
|
|
596
|
+
if (leetNormalized !== normalizedWord) {
|
|
597
|
+
this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
|
|
598
|
+
}
|
|
599
|
+
// Add variations
|
|
600
|
+
const variations = this.generateWordVariations(normalizedWord);
|
|
601
|
+
for (const variation of variations) {
|
|
602
|
+
this.profanitySet.add(variation);
|
|
603
|
+
}
|
|
604
|
+
}
|
|
214
605
|
}
|
|
215
606
|
/**
|
|
216
607
|
* Remove word(s) from the profanity list
|
|
217
608
|
* @param word - String or array of strings to remove
|
|
218
609
|
*/
|
|
219
610
|
remove(word) {
|
|
220
|
-
|
|
611
|
+
const words = Array.isArray(word) ? word : [word];
|
|
612
|
+
for (const w of words) {
|
|
613
|
+
if (!w || typeof w !== "string")
|
|
614
|
+
continue;
|
|
615
|
+
const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
|
|
616
|
+
this.profanitySet.delete(normalizedWord);
|
|
617
|
+
// Remove variations
|
|
618
|
+
const variations = this.generateWordVariations(normalizedWord);
|
|
619
|
+
for (const variation of variations) {
|
|
620
|
+
this.profanitySet.delete(variation);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
221
623
|
}
|
|
222
624
|
/**
|
|
223
625
|
* Clear the filter list and reset to default
|
|
224
626
|
*/
|
|
225
627
|
clearList() {
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
// Remove all words
|
|
229
|
-
if (currentWords.length > 0) {
|
|
230
|
-
this.filter.remove(currentWords);
|
|
231
|
-
}
|
|
232
|
-
// Reset loaded languages tracking
|
|
628
|
+
this.profanitySet.clear();
|
|
629
|
+
this.normalizedProfanityMap.clear();
|
|
233
630
|
this.loadedLanguages.clear();
|
|
234
|
-
this.loadedLanguages.add("english"); // Default language remains
|
|
235
631
|
}
|
|
236
632
|
/**
|
|
237
633
|
* Change the character used as placeholder
|
|
@@ -246,6 +642,57 @@ export class AllProfanity {
|
|
|
246
642
|
this.defaultPlaceholder = placeholder;
|
|
247
643
|
}
|
|
248
644
|
}
|
|
645
|
+
/**
|
|
646
|
+
* Get the list of currently loaded languages
|
|
647
|
+
* @returns string[] - Array of loaded language names
|
|
648
|
+
*/
|
|
649
|
+
getLoadedLanguages() {
|
|
650
|
+
return Array.from(this.loadedLanguages);
|
|
651
|
+
}
|
|
652
|
+
/**
|
|
653
|
+
* Get the list of available language dictionaries
|
|
654
|
+
* @returns string[] - Array of available language names
|
|
655
|
+
*/
|
|
656
|
+
getAvailableLanguages() {
|
|
657
|
+
return Object.keys(this.availableLanguages);
|
|
658
|
+
}
|
|
659
|
+
/**
|
|
660
|
+
* Get current configuration
|
|
661
|
+
*/
|
|
662
|
+
getConfig() {
|
|
663
|
+
return {
|
|
664
|
+
defaultPlaceholder: this.defaultPlaceholder,
|
|
665
|
+
enableLeetSpeak: this.enableLeetSpeak,
|
|
666
|
+
caseSensitive: this.caseSensitive,
|
|
667
|
+
strictMode: this.strictMode,
|
|
668
|
+
detectPartialWords: this.detectPartialWords,
|
|
669
|
+
languages: this.getLoadedLanguages(),
|
|
670
|
+
whitelistWords: Array.from(this.whitelistSet),
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
/**
|
|
674
|
+
* Update configuration
|
|
675
|
+
*/
|
|
676
|
+
updateConfig(options) {
|
|
677
|
+
if (options.defaultPlaceholder !== undefined) {
|
|
678
|
+
this.setPlaceholder(options.defaultPlaceholder);
|
|
679
|
+
}
|
|
680
|
+
if (options.enableLeetSpeak !== undefined) {
|
|
681
|
+
this.enableLeetSpeak = options.enableLeetSpeak;
|
|
682
|
+
}
|
|
683
|
+
if (options.caseSensitive !== undefined) {
|
|
684
|
+
this.caseSensitive = options.caseSensitive;
|
|
685
|
+
}
|
|
686
|
+
if (options.strictMode !== undefined) {
|
|
687
|
+
this.strictMode = options.strictMode;
|
|
688
|
+
}
|
|
689
|
+
if (options.detectPartialWords !== undefined) {
|
|
690
|
+
this.detectPartialWords = options.detectPartialWords;
|
|
691
|
+
}
|
|
692
|
+
if (options.whitelistWords) {
|
|
693
|
+
this.addToWhitelist(options.whitelistWords);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
249
696
|
}
|
|
250
697
|
// Create and export a singleton instance with default settings
|
|
251
698
|
const allProfanity = new AllProfanity();
|