allprofanity 1.0.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,22 +1,33 @@
1
- import leoProfanity from "leo-profanity";
2
- import hindiBadWords from "./languages/hindi-words";
3
- import frenchBadWords from "./languages/french-words";
4
- import germanBadWords from "./languages/german-words";
5
- import spanishBadWords from "./languages/spanish-words";
6
- import bengaliBadWords from "./languages/bengali-words";
7
- import tamilBadWords from "./languages/tamil-words";
8
- import teluguBadWords from "./languages/telugu-words";
1
+ import englishBadWords from "./languages/english-words.js";
2
+ import hindiBadWords from "./languages/hindi-words.js";
3
+ import frenchBadWords from "./languages/french-words.js";
4
+ import germanBadWords from "./languages/german-words.js";
5
+ import spanishBadWords from "./languages/spanish-words.js";
6
+ import bengaliBadWords from "./languages/bengali-words.js";
7
+ import tamilBadWords from "./languages/tamil-words.js";
8
+ import teluguBadWords from "./languages/telugu-words.js";
9
9
  // Export language dictionaries for direct access
10
- export { default as hindiBadWords } from "./languages/hindi-words";
11
- export { default as frenchBadWords } from "./languages/french-words";
12
- export { default as germanBadWords } from "./languages/german-words";
13
- export { default as spanishBadWords } from "./languages/spanish-words";
14
- export { default as bengaliBadWords } from "./languages/bengali-words";
15
- export { default as tamilBadWords } from "./languages/tamil-words";
16
- export { default as teluguBadWords } from "./languages/telugu-words";
10
+ export { default as englishBadWords } from "./languages/english-words.js";
11
+ export { default as hindiBadWords } from "./languages/hindi-words.js";
12
+ export { default as frenchBadWords } from "./languages/french-words.js";
13
+ export { default as germanBadWords } from "./languages/german-words.js";
14
+ export { default as spanishBadWords } from "./languages/spanish-words.js";
15
+ export { default as bengaliBadWords } from "./languages/bengali-words.js";
16
+ export { default as tamilBadWords } from "./languages/tamil-words.js";
17
+ export { default as teluguBadWords } from "./languages/telugu-words.js";
17
18
  /**
18
- * AllProfanity - Extended profanity filter with multi-language support
19
- * Based on leo-profanity with additional language capabilities
19
+ * Severity levels for profanity detection
20
+ */
21
+ export var ProfanitySeverity;
22
+ (function (ProfanitySeverity) {
23
+ ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
24
+ ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
25
+ ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
26
+ ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
27
+ })(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
28
+ /**
29
+ * Advanced AllProfanity - Custom profanity filter with multi-language support and leet speak detection
30
+ * No external dependencies - built from scratch for maximum performance and control
20
31
  */
21
32
  export class AllProfanity {
22
33
  /**
@@ -24,25 +35,92 @@ export class AllProfanity {
24
35
  * @param options - Configuration options
25
36
  */
26
37
  constructor(options) {
38
+ var _a, _b, _c, _d;
39
+ this.profanitySet = new Set();
40
+ this.normalizedProfanityMap = new Map();
27
41
  this.defaultPlaceholder = "*";
28
42
  this.loadedLanguages = new Set();
43
+ this.whitelistSet = new Set();
44
+ this.enableLeetSpeak = true;
45
+ this.caseSensitive = false;
46
+ this.strictMode = false;
47
+ this.detectPartialWords = true;
48
+ // Comprehensive leet speak mapping
49
+ this.leetMap = {
50
+ a: ["4", "@", "^", "aye", "λ", "ª"],
51
+ b: ["8", "6", "|3", "ß", "β", "13"],
52
+ c: ["(", "<", "©", "¢", "see", "sea"],
53
+ d: ["|)", "|]", "0", "ð"],
54
+ e: ["3", "€", "£", "ë", "é", "è"],
55
+ f: ["|=", "ph", "|#", "ƒ"],
56
+ g: ["9", "6", "&", "gee"],
57
+ h: ["#", "|-|", "[-]", "}{", "ħ"],
58
+ i: ["1", "!", "|", "eye", "ï", "í", "ì"],
59
+ j: ["_|", "_/", "¿", "ĵ"],
60
+ k: ["|<", "1<", "l<", "|{", "ķ"],
61
+ l: ["1", "|", "7", "£", "ł", "ĺ"],
62
+ m: ["|/|", "//\\", "em", "ɱ"],
63
+ n: ["||", "//", "and", "ñ", "ń"],
64
+ o: ["0", "()", "oh", "ø", "ó", "ò", "ô"],
65
+ p: ["|*", "|o", "|^", "|>", "9", "þ"],
66
+ q: ["(_,)", "()_", "kw", "ĸ"],
67
+ r: ["|2", "12", ".-", "are", "ř", "ŕ"],
68
+ s: ["5", "$", "z", "ş", "ś", "š"],
69
+ t: ["7", "+", "-|-", "†", "ť", "ţ"],
70
+ u: ["(_)", "|_|", "v", "you", "ü", "ú", "ù"],
71
+ v: ["\\/", "|/", "|", "vee"],
72
+ w: ["\\/\\/", "vv", "dubya", "ŵ"],
73
+ x: ["><", "}{", "ecks", "χ"],
74
+ y: ["`/", "j", "why", "ÿ", "ý"],
75
+ z: ["2", "7_", "-/_", "zee", "ž", "ź", "ż"],
76
+ };
77
+ // Word boundary patterns
78
+ this.wordBoundaryChars = /[\s\.,;:!?\-_+=\[\]{}()"'\/\\]/;
79
+ // Common word variations and suffixes
80
+ this.commonSuffixes = [
81
+ "ing",
82
+ "ed",
83
+ "s",
84
+ "er",
85
+ "ers",
86
+ "est",
87
+ "ly",
88
+ "tion",
89
+ "ness",
90
+ ];
91
+ this.commonPrefixes = [
92
+ "un",
93
+ "re",
94
+ "pre",
95
+ "dis",
96
+ "over",
97
+ "under",
98
+ "out",
99
+ ];
29
100
  this.availableLanguages = {
30
- hindi: hindiBadWords,
31
- french: frenchBadWords,
32
- german: germanBadWords,
33
- spanish: spanishBadWords,
34
- bengali: bengaliBadWords,
35
- tamil: tamilBadWords,
36
- telugu: teluguBadWords,
37
- // Add more built-in languages here in the future
101
+ english: englishBadWords || [],
102
+ hindi: hindiBadWords || [],
103
+ french: frenchBadWords || [],
104
+ german: germanBadWords || [],
105
+ spanish: spanishBadWords || [],
106
+ bengali: bengaliBadWords || [],
107
+ tamil: tamilBadWords || [],
108
+ telugu: teluguBadWords || [],
38
109
  };
39
- this.filter = leoProfanity;
40
- // Set custom placeholder if provided
110
+ // Set configuration options
41
111
  if (options === null || options === void 0 ? void 0 : options.defaultPlaceholder) {
42
112
  this.setPlaceholder(options.defaultPlaceholder);
43
113
  }
44
- // Load the default English dictionary from leo-profanity
45
- this.loadedLanguages.add("english");
114
+ this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
115
+ this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
116
+ this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
117
+ this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : true;
118
+ // Load whitelist if provided
119
+ if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
120
+ this.addToWhitelist(options.whitelistWords);
121
+ }
122
+ // Load the default English dictionary
123
+ this.loadLanguage("english");
46
124
  // Load Hindi by default for backward compatibility
47
125
  this.loadLanguage("hindi");
48
126
  // Load any additional languages specified in options
@@ -56,25 +134,166 @@ export class AllProfanity {
56
134
  });
57
135
  }
58
136
  }
137
+ /**
138
+ * Normalize text by converting leet speak to regular characters
139
+ * @param text - Text to normalize
140
+ * @returns Normalized text
141
+ */
142
+ normalizeLeetSpeak(text) {
143
+ if (!this.enableLeetSpeak)
144
+ return text;
145
+ let normalized = text.toLowerCase();
146
+ // Define comprehensive leet mappings
147
+ const leetMappings = [
148
+ // Multi-character first
149
+ { pattern: /\|-\|/g, replacement: "h" },
150
+ { pattern: /\[-\]/g, replacement: "h" },
151
+ { pattern: /\}{\s*/g, replacement: "h" },
152
+ { pattern: /\|\/\|/g, replacement: "m" },
153
+ { pattern: /\/\/\\/g, replacement: "m" },
154
+ { pattern: /\|\|/g, replacement: "n" },
155
+ { pattern: /\/\//g, replacement: "n" },
156
+ { pattern: /\|2/g, replacement: "r" },
157
+ { pattern: /12/g, replacement: "r" },
158
+ { pattern: /\\\/\\\//g, replacement: "w" },
159
+ { pattern: /vv/g, replacement: "w" },
160
+ { pattern: /><\s*/g, replacement: "x" },
161
+ { pattern: /\(_\)/g, replacement: "u" },
162
+ { pattern: /\|_\|/g, replacement: "u" },
163
+ { pattern: /\\\//g, replacement: "v" },
164
+ { pattern: /\|\//g, replacement: "v" },
165
+ // Single character mappings
166
+ { pattern: /@/g, replacement: "a" },
167
+ { pattern: /4/g, replacement: "u" },
168
+ { pattern: /\^/g, replacement: "a" },
169
+ { pattern: /8/g, replacement: "b" },
170
+ { pattern: /6/g, replacement: "b" },
171
+ { pattern: /\(/g, replacement: "c" },
172
+ { pattern: /</g, replacement: "c" },
173
+ { pattern: /©/g, replacement: "c" },
174
+ { pattern: /¢/g, replacement: "c" },
175
+ { pattern: /0/g, replacement: "o" },
176
+ { pattern: /3/g, replacement: "e" },
177
+ { pattern: /€/g, replacement: "e" },
178
+ { pattern: /£/g, replacement: "e" },
179
+ { pattern: /9/g, replacement: "g" },
180
+ { pattern: /&/g, replacement: "g" },
181
+ { pattern: /#/g, replacement: "h" },
182
+ { pattern: /1/g, replacement: "i" },
183
+ { pattern: /!/g, replacement: "i" },
184
+ { pattern: /\|/g, replacement: "i" },
185
+ { pattern: /7/g, replacement: "t" },
186
+ { pattern: /5/g, replacement: "s" },
187
+ { pattern: /\$/g, replacement: "s" },
188
+ { pattern: /\+/g, replacement: "t" },
189
+ { pattern: /2/g, replacement: "z" },
190
+ ];
191
+ // Apply all mappings
192
+ for (const mapping of leetMappings) {
193
+ normalized = normalized.replace(mapping.pattern, mapping.replacement);
194
+ }
195
+ return normalized;
196
+ }
197
+ escapeRegex(str) {
198
+ if (!str || typeof str !== "string") {
199
+ return "";
200
+ }
201
+ return str.replace(/[\\^$.*+?()[\]{}|\-]/g, function (match) {
202
+ return "\\" + match;
203
+ });
204
+ }
205
+ /**
206
+ * Generate word variations with common prefixes and suffixes
207
+ */
208
+ generateWordVariations(word) {
209
+ const variations = new Set([word]);
210
+ // Add suffix variations
211
+ for (const suffix of this.commonSuffixes) {
212
+ variations.add(word + suffix);
213
+ // Handle words ending in 'e'
214
+ if (word.endsWith("e") && !suffix.startsWith("e")) {
215
+ variations.add(word.slice(0, -1) + suffix);
216
+ }
217
+ // Handle consonant doubling
218
+ if (word.length > 2 && /[bcdfghjklmnpqrstvwxyz]/.test(word.slice(-1))) {
219
+ variations.add(word + word.slice(-1) + suffix);
220
+ }
221
+ }
222
+ // Add prefix variations
223
+ for (const prefix of this.commonPrefixes) {
224
+ variations.add(prefix + word);
225
+ }
226
+ return Array.from(variations);
227
+ }
228
+ /**
229
+ * Check if text contains word boundaries around a match
230
+ */
231
+ hasWordBoundaries(text, start, end) {
232
+ if (!this.strictMode)
233
+ return true;
234
+ const beforeChar = start > 0 ? text[start - 1] : " ";
235
+ const afterChar = end < text.length ? text[end] : " ";
236
+ return (this.wordBoundaryChars.test(beforeChar) &&
237
+ this.wordBoundaryChars.test(afterChar));
238
+ }
239
+ /**
240
+ * Calculate severity based on detected words
241
+ */
242
+ calculateSeverity(detectedWords) {
243
+ if (detectedWords.length === 0)
244
+ return ProfanitySeverity.MILD;
245
+ // This is a simplified severity calculation
246
+ // You can enhance this based on your specific word categorization
247
+ const totalWords = detectedWords.length;
248
+ const uniqueWords = new Set(detectedWords).size;
249
+ if (totalWords >= 5 || uniqueWords >= 3)
250
+ return ProfanitySeverity.EXTREME;
251
+ if (totalWords >= 3 || uniqueWords >= 2)
252
+ return ProfanitySeverity.SEVERE;
253
+ if (totalWords >= 2)
254
+ return ProfanitySeverity.MODERATE;
255
+ return ProfanitySeverity.MILD;
256
+ }
59
257
  /**
60
258
  * Load a built-in language dictionary
61
259
  * @param language - The language to load
62
260
  * @returns boolean - True if loaded successfully, false otherwise
63
261
  */
64
262
  loadLanguage(language) {
65
- // Skip if already loaded
66
263
  if (this.loadedLanguages.has(language.toLowerCase())) {
67
264
  return true;
68
265
  }
69
266
  const langKey = language.toLowerCase();
70
- if (this.availableLanguages[langKey]) {
71
- this.filter.add(this.availableLanguages[langKey]);
267
+ if (this.availableLanguages[langKey] &&
268
+ this.availableLanguages[langKey].length > 0) {
269
+ const words = this.availableLanguages[langKey];
270
+ // Add words and their variations to the profanity set
271
+ for (const word of words) {
272
+ if (!word || typeof word !== "string")
273
+ continue;
274
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
275
+ this.profanitySet.add(normalizedWord);
276
+ // Store normalized leet version mapping
277
+ const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
278
+ if (leetNormalized !== normalizedWord) {
279
+ this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
280
+ }
281
+ // Generate and add variations
282
+ const variations = this.generateWordVariations(normalizedWord);
283
+ for (const variation of variations) {
284
+ this.profanitySet.add(variation);
285
+ const leetVariation = this.normalizeLeetSpeak(variation);
286
+ if (leetVariation !== variation) {
287
+ this.normalizedProfanityMap.set(leetVariation, variation);
288
+ }
289
+ }
290
+ }
72
291
  this.loadedLanguages.add(langKey);
73
- console.log(`AllProfanity: Added ${this.availableLanguages[langKey].length} ${language} words to the profanity list.`);
292
+ console.log(`AllProfanity: Added ${words.length} ${language} words to the profanity list.`);
74
293
  return true;
75
294
  }
76
295
  else {
77
- console.warn(`AllProfanity: Language '${language}' not found in available dictionaries.`);
296
+ console.warn(`AllProfanity: Language '${language}' not found or empty in available dictionaries.`);
78
297
  return false;
79
298
  }
80
299
  }
@@ -112,66 +331,224 @@ export class AllProfanity {
112
331
  }
113
332
  // Add to available languages for future reference
114
333
  this.availableLanguages[name.toLowerCase()] = words;
115
- // Add to filter
116
- this.filter.add(words);
334
+ // Process and add words
335
+ for (const word of words) {
336
+ if (!word || typeof word !== "string")
337
+ continue;
338
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
339
+ this.profanitySet.add(normalizedWord);
340
+ // Store normalized leet version mapping
341
+ const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
342
+ if (leetNormalized !== normalizedWord) {
343
+ this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
344
+ }
345
+ // Generate and add variations
346
+ const variations = this.generateWordVariations(normalizedWord);
347
+ for (const variation of variations) {
348
+ this.profanitySet.add(variation);
349
+ const leetVariation = this.normalizeLeetSpeak(variation);
350
+ if (leetVariation !== variation) {
351
+ this.normalizedProfanityMap.set(leetVariation, variation);
352
+ }
353
+ }
354
+ }
117
355
  this.loadedLanguages.add(name.toLowerCase());
118
356
  console.log(`AllProfanity: Added ${words.length} words from custom '${name}' dictionary.`);
119
357
  }
120
358
  /**
121
- * Get the list of currently loaded languages
122
- * @returns string[] - Array of loaded language names
359
+ * Add words to whitelist (words that should never be flagged as profanity)
360
+ * @param words - Array of words to whitelist
123
361
  */
124
- getLoadedLanguages() {
125
- return Array.from(this.loadedLanguages);
362
+ addToWhitelist(words) {
363
+ for (const word of words) {
364
+ if (word && typeof word === "string") {
365
+ this.whitelistSet.add(this.caseSensitive ? word : word.toLowerCase());
366
+ }
367
+ }
126
368
  }
127
369
  /**
128
- * Get the list of available language dictionaries
129
- * @returns string[] - Array of available language names
370
+ * Remove words from whitelist
371
+ * @param words - Array of words to remove from whitelist
130
372
  */
131
- getAvailableLanguages() {
132
- return Object.keys(this.availableLanguages);
373
+ removeFromWhitelist(words) {
374
+ for (const word of words) {
375
+ if (word && typeof word === "string") {
376
+ this.whitelistSet.delete(this.caseSensitive ? word : word.toLowerCase());
377
+ }
378
+ }
133
379
  }
134
380
  /**
135
- * Check if a string contains profanity
381
+ * Advanced profanity detection with detailed results
382
+ * @param text - The text to analyze
383
+ * @returns ProfanityDetectionResult - Detailed detection results
384
+ */
385
+ detect(text) {
386
+ if (!text || typeof text !== "string") {
387
+ return {
388
+ hasProfanity: false,
389
+ detectedWords: [],
390
+ cleanedText: text || "",
391
+ severity: ProfanitySeverity.MILD,
392
+ positions: [],
393
+ };
394
+ }
395
+ const normalizedText = this.caseSensitive ? text : text.toLowerCase();
396
+ const leetNormalizedText = this.normalizeLeetSpeak(normalizedText);
397
+ const detectedWords = [];
398
+ const positions = [];
399
+ // Check for whole word matches first
400
+ for (const profanity of this.profanitySet) {
401
+ if (this.whitelistSet.has(profanity))
402
+ continue;
403
+ try {
404
+ // Create regex for word boundary detection
405
+ const escapedWord = this.escapeRegex(profanity);
406
+ const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
407
+ let match;
408
+ while ((match = wordRegex.exec(normalizedText)) !== null) {
409
+ if (this.hasWordBoundaries(normalizedText, match.index, match.index + match[0].length)) {
410
+ detectedWords.push(match[0]);
411
+ positions.push({
412
+ word: match[0],
413
+ start: match.index,
414
+ end: match.index + match[0].length,
415
+ });
416
+ }
417
+ }
418
+ }
419
+ catch (error) {
420
+ // Fallback to simple string search if regex fails
421
+ const index = normalizedText.indexOf(profanity);
422
+ if (index !== -1) {
423
+ detectedWords.push(profanity);
424
+ positions.push({
425
+ word: profanity,
426
+ start: index,
427
+ end: index + profanity.length,
428
+ });
429
+ }
430
+ }
431
+ }
432
+ // Check leet speak normalized text
433
+ if (this.enableLeetSpeak && leetNormalizedText !== normalizedText) {
434
+ for (const profanity of this.profanitySet) {
435
+ if (this.whitelistSet.has(profanity))
436
+ continue;
437
+ try {
438
+ const escapedWord = this.escapeRegex(profanity);
439
+ const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
440
+ let match;
441
+ while ((match = wordRegex.exec(leetNormalizedText)) !== null) {
442
+ if (this.hasWordBoundaries(leetNormalizedText, match.index, match.index + match[0].length)) {
443
+ // Find the original text that corresponds to this match
444
+ const originalMatch = normalizedText.substring(match.index, match.index + match[0].length);
445
+ if (!detectedWords.includes(originalMatch)) {
446
+ detectedWords.push(originalMatch);
447
+ positions.push({
448
+ word: originalMatch,
449
+ start: match.index,
450
+ end: match.index + match[0].length,
451
+ });
452
+ }
453
+ }
454
+ }
455
+ }
456
+ catch (error) {
457
+ // Fallback to simple string search
458
+ if (leetNormalizedText.includes(profanity)) {
459
+ const index = leetNormalizedText.indexOf(profanity);
460
+ const originalMatch = normalizedText.substring(index, index + profanity.length);
461
+ if (!detectedWords.includes(originalMatch)) {
462
+ detectedWords.push(originalMatch);
463
+ positions.push({
464
+ word: originalMatch,
465
+ start: index,
466
+ end: index + profanity.length,
467
+ });
468
+ }
469
+ }
470
+ }
471
+ }
472
+ }
473
+ // Partial word detection (if enabled)
474
+ if (this.detectPartialWords) {
475
+ for (const profanity of this.profanitySet) {
476
+ if (this.whitelistSet.has(profanity) || profanity.length < 4)
477
+ continue;
478
+ if (normalizedText.includes(profanity) ||
479
+ leetNormalizedText.includes(profanity)) {
480
+ const index = normalizedText.indexOf(profanity);
481
+ if (index !== -1 &&
482
+ !detectedWords.some((w) => normalizedText.indexOf(w) === index)) {
483
+ detectedWords.push(profanity);
484
+ positions.push({
485
+ word: profanity,
486
+ start: index,
487
+ end: index + profanity.length,
488
+ });
489
+ }
490
+ }
491
+ }
492
+ }
493
+ // REMOVED: cleanedText = this.clean(text) - this was causing circular dependency
494
+ // We'll generate the cleaned text directly here instead
495
+ let cleanedText = text;
496
+ if (detectedWords.length > 0) {
497
+ // Sort positions by start index in descending order to avoid index shifting
498
+ const sortedPositions = positions.sort((a, b) => b.start - a.start);
499
+ for (const pos of sortedPositions) {
500
+ const originalWord = text.substring(pos.start, pos.end);
501
+ const replacement = this.defaultPlaceholder.repeat(originalWord.length);
502
+ cleanedText =
503
+ cleanedText.substring(0, pos.start) +
504
+ replacement +
505
+ cleanedText.substring(pos.end);
506
+ }
507
+ }
508
+ const severity = this.calculateSeverity(detectedWords);
509
+ return {
510
+ hasProfanity: detectedWords.length > 0,
511
+ detectedWords: [...new Set(detectedWords)],
512
+ cleanedText,
513
+ severity,
514
+ positions,
515
+ };
516
+ }
517
+ /**
518
+ * Check if a string contains profanity (simple boolean check)
136
519
  * @param string - The string to check
137
520
  * @returns boolean - True if profanity found, false otherwise
138
521
  */
139
522
  check(string) {
140
- return this.filter.check(string);
523
+ return this.detect(string).hasProfanity;
141
524
  }
142
525
  /**
143
526
  * Clean a string by replacing profanities with placeholders
144
527
  * @param string - The string to clean
145
- * @param placeholder - Optional custom placeholder (defaults to '*')
528
+ * @param placeholder - Optional custom placeholder
146
529
  * @returns string - The cleaned string
147
530
  */
148
531
  clean(string, placeholder) {
149
- // More general solution for handling variations like "fucking"
150
- const badWords = this.list();
532
+ if (!string || typeof string !== "string")
533
+ return string || "";
534
+ const placeholderChar = placeholder || this.defaultPlaceholder;
535
+ const detection = this.detect(string);
536
+ // If detect() already provided cleanedText and no custom placeholder, use it
537
+ if (!placeholder && detection.cleanedText !== string) {
538
+ return detection.cleanedText;
539
+ }
540
+ // Otherwise, build cleaned text with custom placeholder
151
541
  let result = string;
152
- for (const word of badWords) {
153
- // Check for variations with "ing", "ed", etc.
154
- const variations = [
155
- `${word}ing`,
156
- `${word}ed`,
157
- `${word}s`,
158
- `${word}er`,
159
- `${word}ers`,
160
- ];
161
- for (const variation of variations) {
162
- if (result.toLowerCase().includes(variation.toLowerCase())) {
163
- const prefix = word;
164
- const suffix = variation.slice(word.length);
165
- const replacement = (placeholder || this.defaultPlaceholder).repeat(prefix.length) +
166
- suffix;
167
- // Use regex to replace while preserving case (though this simplifies it)
168
- const regex = new RegExp(variation, "gi");
169
- result = result.replace(regex, replacement);
170
- }
171
- }
542
+ const sortedPositions = detection.positions.sort((a, b) => b.start - a.start);
543
+ for (const pos of sortedPositions) {
544
+ const originalWord = string.substring(pos.start, pos.end);
545
+ const replacement = placeholderChar.repeat(originalWord.length);
546
+ result =
547
+ result.substring(0, pos.start) +
548
+ replacement +
549
+ result.substring(pos.end);
172
550
  }
173
- // Fall back to default leo-profanity implementation
174
- return this.filter.clean(result, placeholder || this.defaultPlaceholder);
551
+ return result;
175
552
  }
176
553
  /**
177
554
  * Clean a string by replacing each profane word with a single placeholder
@@ -180,58 +557,77 @@ export class AllProfanity {
180
557
  * @returns string - The cleaned string
181
558
  */
182
559
  cleanWithWord(string, placeholder = "***") {
183
- // Split by spaces but preserve punctuation
184
- const regex = /([^\w\s])/g;
185
- let tempString = string.replace(regex, " $1 ");
186
- const words = tempString.split(" ").filter((w) => w !== "");
187
- const result = words.map((word) => {
188
- // Check if this word contains profanity, ignoring punctuation for the check
189
- const wordWithoutPunctuation = word.replace(/[^\w\s]/g, "");
190
- if (wordWithoutPunctuation && this.check(wordWithoutPunctuation)) {
191
- return placeholder;
192
- }
193
- return word;
194
- });
195
- // Join and fix spaces before punctuation
196
- let cleaned = result.join(" ");
197
- cleaned = cleaned.replace(/ ([^\w\s]) /g, "$1 "); // Fix space before punctuation
198
- cleaned = cleaned.replace(/ ([^\w\s])$/g, "$1"); // Fix trailing punctuation
199
- return cleaned;
560
+ if (!string || typeof string !== "string")
561
+ return string || "";
562
+ // Build a regex that matches any profane word with word boundaries, unicode-aware
563
+ const words = Array.from(this.profanitySet)
564
+ .map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) // escape regex
565
+ .sort((a, b) => b.length - a.length); // longer words first to avoid partial matches
566
+ if (words.length === 0)
567
+ return string;
568
+ // Unicode safe word boundary: (?<=^|[^\p{L}\p{N}_])WORD(?=[^\p{L}\p{N}_]|$)
569
+ // This ensures we only match whole words, not inside other words.
570
+ const regex = new RegExp(`(?<=^|[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\])(` +
571
+ words.join("|") +
572
+ `)(?=[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\]|$)`, this.caseSensitive ? "gu" : "giu");
573
+ // Replace all matches with the placeholder.
574
+ return string.replace(regex, placeholder);
200
575
  }
201
576
  /**
202
577
  * Get the current list of profanity words
203
578
  * @returns string[] - Array of all profanity words
204
579
  */
205
580
  list() {
206
- return this.filter.list();
581
+ return Array.from(this.profanitySet);
207
582
  }
208
583
  /**
209
584
  * Add word(s) to the profanity list
210
585
  * @param word - String or array of strings to add
211
586
  */
212
587
  add(word) {
213
- this.filter.add(word);
588
+ const words = Array.isArray(word) ? word : [word];
589
+ for (const w of words) {
590
+ if (!w || typeof w !== "string")
591
+ continue;
592
+ const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
593
+ this.profanitySet.add(normalizedWord);
594
+ // Add leet speak mapping
595
+ const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
596
+ if (leetNormalized !== normalizedWord) {
597
+ this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
598
+ }
599
+ // Add variations
600
+ const variations = this.generateWordVariations(normalizedWord);
601
+ for (const variation of variations) {
602
+ this.profanitySet.add(variation);
603
+ }
604
+ }
214
605
  }
215
606
  /**
216
607
  * Remove word(s) from the profanity list
217
608
  * @param word - String or array of strings to remove
218
609
  */
219
610
  remove(word) {
220
- this.filter.remove(word);
611
+ const words = Array.isArray(word) ? word : [word];
612
+ for (const w of words) {
613
+ if (!w || typeof w !== "string")
614
+ continue;
615
+ const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
616
+ this.profanitySet.delete(normalizedWord);
617
+ // Remove variations
618
+ const variations = this.generateWordVariations(normalizedWord);
619
+ for (const variation of variations) {
620
+ this.profanitySet.delete(variation);
621
+ }
622
+ }
221
623
  }
222
624
  /**
223
625
  * Clear the filter list and reset to default
224
626
  */
225
627
  clearList() {
226
- // Get all current words
227
- const currentWords = this.filter.list();
228
- // Remove all words
229
- if (currentWords.length > 0) {
230
- this.filter.remove(currentWords);
231
- }
232
- // Reset loaded languages tracking
628
+ this.profanitySet.clear();
629
+ this.normalizedProfanityMap.clear();
233
630
  this.loadedLanguages.clear();
234
- this.loadedLanguages.add("english"); // Default language remains
235
631
  }
236
632
  /**
237
633
  * Change the character used as placeholder
@@ -246,6 +642,57 @@ export class AllProfanity {
246
642
  this.defaultPlaceholder = placeholder;
247
643
  }
248
644
  }
645
+ /**
646
+ * Get the list of currently loaded languages
647
+ * @returns string[] - Array of loaded language names
648
+ */
649
+ getLoadedLanguages() {
650
+ return Array.from(this.loadedLanguages);
651
+ }
652
+ /**
653
+ * Get the list of available language dictionaries
654
+ * @returns string[] - Array of available language names
655
+ */
656
+ getAvailableLanguages() {
657
+ return Object.keys(this.availableLanguages);
658
+ }
659
+ /**
660
+ * Get current configuration
661
+ */
662
+ getConfig() {
663
+ return {
664
+ defaultPlaceholder: this.defaultPlaceholder,
665
+ enableLeetSpeak: this.enableLeetSpeak,
666
+ caseSensitive: this.caseSensitive,
667
+ strictMode: this.strictMode,
668
+ detectPartialWords: this.detectPartialWords,
669
+ languages: this.getLoadedLanguages(),
670
+ whitelistWords: Array.from(this.whitelistSet),
671
+ };
672
+ }
673
+ /**
674
+ * Update configuration
675
+ */
676
+ updateConfig(options) {
677
+ if (options.defaultPlaceholder !== undefined) {
678
+ this.setPlaceholder(options.defaultPlaceholder);
679
+ }
680
+ if (options.enableLeetSpeak !== undefined) {
681
+ this.enableLeetSpeak = options.enableLeetSpeak;
682
+ }
683
+ if (options.caseSensitive !== undefined) {
684
+ this.caseSensitive = options.caseSensitive;
685
+ }
686
+ if (options.strictMode !== undefined) {
687
+ this.strictMode = options.strictMode;
688
+ }
689
+ if (options.detectPartialWords !== undefined) {
690
+ this.detectPartialWords = options.detectPartialWords;
691
+ }
692
+ if (options.whitelistWords) {
693
+ this.addToWhitelist(options.whitelistWords);
694
+ }
695
+ }
249
696
  }
250
697
  // Create and export a singleton instance with default settings
251
698
  const allProfanity = new AllProfanity();