allprofanity 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
+ // Import language dictionaries (assuming these exist)
1
2
  import englishBadWords from "./languages/english-words.js";
2
3
  import hindiBadWords from "./languages/hindi-words.js";
3
4
  import frenchBadWords from "./languages/french-words.js";
@@ -15,6 +16,20 @@ export { default as spanishBadWords } from "./languages/spanish-words.js";
15
16
  export { default as bengaliBadWords } from "./languages/bengali-words.js";
16
17
  export { default as tamilBadWords } from "./languages/tamil-words.js";
17
18
  export { default as teluguBadWords } from "./languages/telugu-words.js";
19
+ /**
20
+ * Default console logger implementation
21
+ */
22
+ class ConsoleLogger {
23
+ info(message) {
24
+ console.log(`[AllProfanity] ${message}`);
25
+ }
26
+ warn(message) {
27
+ console.warn(`[AllProfanity] ${message}`);
28
+ }
29
+ error(message) {
30
+ console.error(`[AllProfanity] ${message}`);
31
+ }
32
+ }
18
33
  /**
19
34
  * Severity levels for profanity detection
20
35
  */
@@ -26,77 +41,136 @@ export var ProfanitySeverity;
26
41
  ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
27
42
  })(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
28
43
  /**
29
- * Advanced AllProfanity - Custom profanity filter with multi-language support and leet speak detection
30
- * No external dependencies - built from scratch for maximum performance and control
44
+ * Validates input parameters
31
45
  */
32
- export class AllProfanity {
46
+ function validateString(input, paramName) {
47
+ if (typeof input !== "string") {
48
+ throw new TypeError(`${paramName} must be a string, got ${typeof input}`);
49
+ }
50
+ return input;
51
+ }
52
+ function validateStringArray(input, paramName) {
53
+ if (!Array.isArray(input)) {
54
+ throw new TypeError(`${paramName} must be an array`);
55
+ }
56
+ return input.filter((item) => {
57
+ if (typeof item !== "string") {
58
+ console.warn(`Skipping non-string item in ${paramName}: ${item}`);
59
+ return false;
60
+ }
61
+ return item.trim().length > 0;
62
+ });
63
+ }
64
+ /**
65
+ * Efficient Trie data structure for fast string matching
66
+ */
67
+ class TrieNode {
68
+ constructor() {
69
+ this.children = new Map();
70
+ this.isEndOfWord = false;
71
+ this.word = "";
72
+ }
73
+ /**
74
+ * Add a word to the trie
75
+ */
76
+ addWord(word) {
77
+ let current = this;
78
+ for (const char of word) {
79
+ if (!current.children.has(char)) {
80
+ current.children.set(char, new TrieNode());
81
+ }
82
+ const nextNode = current.children.get(char);
83
+ if (nextNode) {
84
+ current = nextNode;
85
+ }
86
+ }
87
+ current.isEndOfWord = true;
88
+ current.word = word;
89
+ }
33
90
  /**
34
- * Create a new AllProfanity instance
35
- * @param options - Configuration options
91
+ * Remove a word from the trie
36
92
  */
93
+ removeWord(word) {
94
+ return this.removeHelper(word, 0);
95
+ }
96
+ removeHelper(word, index) {
97
+ if (index === word.length) {
98
+ if (!this.isEndOfWord)
99
+ return false;
100
+ this.isEndOfWord = false;
101
+ return this.children.size === 0;
102
+ }
103
+ const char = word[index];
104
+ const node = this.children.get(char);
105
+ if (!node)
106
+ return false;
107
+ const shouldDeleteChild = node.removeHelper(word, index + 1);
108
+ if (shouldDeleteChild) {
109
+ this.children.delete(char);
110
+ return this.children.size === 0 && !this.isEndOfWord;
111
+ }
112
+ return false;
113
+ }
114
+ /**
115
+ * Find all matches starting at a given position
116
+ */
117
+ findMatches(text, startPos, allowPartial) {
118
+ const matches = [];
119
+ let current = this;
120
+ let pos = startPos;
121
+ while (pos < text.length) {
122
+ const nextNode = current.children.get(text[pos]);
123
+ if (!nextNode)
124
+ break;
125
+ current = nextNode;
126
+ pos++;
127
+ if (current.isEndOfWord) {
128
+ if (!allowPartial) {
129
+ const wordStart = startPos;
130
+ const wordEnd = pos;
131
+ matches.push({
132
+ word: current.word,
133
+ start: wordStart - startPos,
134
+ end: wordEnd - startPos,
135
+ });
136
+ }
137
+ else {
138
+ matches.push({
139
+ word: current.word,
140
+ start: 0,
141
+ end: pos - startPos,
142
+ });
143
+ }
144
+ }
145
+ }
146
+ return matches;
147
+ }
148
+ /**
149
+ * Clear all words from the trie
150
+ */
151
+ clear() {
152
+ this.children.clear();
153
+ this.isEndOfWord = false;
154
+ this.word = "";
155
+ }
156
+ }
157
+ /**
158
+ * Advanced AllProfanity - Fixed profanity filter with multi-language support
159
+ * Addresses all critical issues from the original implementation
160
+ */
161
+ export class AllProfanity {
37
162
  constructor(options) {
38
- var _a, _b, _c, _d;
39
- this.profanitySet = new Set();
40
- this.normalizedProfanityMap = new Map();
41
- this.defaultPlaceholder = "*";
42
- this.loadedLanguages = new Set();
163
+ var _a, _b, _c, _d, _e;
164
+ this.profanityTrie = new TrieNode();
43
165
  this.whitelistSet = new Set();
166
+ this.loadedLanguages = new Set();
167
+ // Configuration
168
+ this.defaultPlaceholder = "*";
44
169
  this.enableLeetSpeak = true;
45
170
  this.caseSensitive = false;
46
171
  this.strictMode = false;
47
- this.detectPartialWords = true;
48
- // Comprehensive leet speak mapping
49
- this.leetMap = {
50
- a: ["4", "@", "^", "aye", "λ", "ª"],
51
- b: ["8", "6", "|3", "ß", "β", "13"],
52
- c: ["(", "<", "©", "¢", "see", "sea"],
53
- d: ["|)", "|]", "0", "ð"],
54
- e: ["3", "€", "£", "ë", "é", "è"],
55
- f: ["|=", "ph", "|#", "ƒ"],
56
- g: ["9", "6", "&", "gee"],
57
- h: ["#", "|-|", "[-]", "}{", "ħ"],
58
- i: ["1", "!", "|", "eye", "ï", "í", "ì"],
59
- j: ["_|", "_/", "¿", "ĵ"],
60
- k: ["|<", "1<", "l<", "|{", "ķ"],
61
- l: ["1", "|", "7", "£", "ł", "ĺ"],
62
- m: ["|/|", "//\\", "em", "ɱ"],
63
- n: ["||", "//", "and", "ñ", "ń"],
64
- o: ["0", "()", "oh", "ø", "ó", "ò", "ô"],
65
- p: ["|*", "|o", "|^", "|>", "9", "þ"],
66
- q: ["(_,)", "()_", "kw", "ĸ"],
67
- r: ["|2", "12", ".-", "are", "ř", "ŕ"],
68
- s: ["5", "$", "z", "ş", "ś", "š"],
69
- t: ["7", "+", "-|-", "†", "ť", "ţ"],
70
- u: ["(_)", "|_|", "v", "you", "ü", "ú", "ù"],
71
- v: ["\\/", "|/", "|", "vee"],
72
- w: ["\\/\\/", "vv", "dubya", "ŵ"],
73
- x: ["><", "}{", "ecks", "χ"],
74
- y: ["`/", "j", "why", "ÿ", "ý"],
75
- z: ["2", "7_", "-/_", "zee", "ž", "ź", "ż"],
76
- };
77
- // Word boundary patterns
78
- this.wordBoundaryChars = /[\s\.,;:!?\-_+=\[\]{}()"'\/\\]/;
79
- // Common word variations and suffixes
80
- this.commonSuffixes = [
81
- "ing",
82
- "ed",
83
- "s",
84
- "er",
85
- "ers",
86
- "est",
87
- "ly",
88
- "tion",
89
- "ness",
90
- ];
91
- this.commonPrefixes = [
92
- "un",
93
- "re",
94
- "pre",
95
- "dis",
96
- "over",
97
- "under",
98
- "out",
99
- ];
172
+ this.detectPartialWords = false;
173
+ // Available language dictionaries
100
174
  this.availableLanguages = {
101
175
  english: englishBadWords || [],
102
176
  hindi: hindiBadWords || [],
@@ -107,442 +181,293 @@ export class AllProfanity {
107
181
  tamil: tamilBadWords || [],
108
182
  telugu: teluguBadWords || [],
109
183
  };
110
- // Set configuration options
111
- if (options === null || options === void 0 ? void 0 : options.defaultPlaceholder) {
184
+ // Fixed leet speak mappings
185
+ this.leetMappings = new Map([
186
+ ["@", "a"],
187
+ ["^", "a"],
188
+ ["4", "a"],
189
+ ["8", "b"],
190
+ ["6", "b"],
191
+ ["|3", "b"],
192
+ ["(", "c"],
193
+ ["<", "c"],
194
+ ["©", "c"],
195
+ ["|)", "d"],
196
+ ["0", "o"],
197
+ ["3", "e"],
198
+ ["€", "e"],
199
+ ["|=", "f"],
200
+ ["ph", "f"],
201
+ ["9", "g"],
202
+ ["#", "h"],
203
+ ["|-|", "h"],
204
+ ["1", "i"],
205
+ ["!", "i"],
206
+ ["|", "i"],
207
+ ["_|", "j"],
208
+ ["¿", "j"],
209
+ ["|<", "k"],
210
+ ["1<", "k"],
211
+ ["7", "l"],
212
+ ["|\\/|", "m"],
213
+ ["/\\/\\", "m"],
214
+ ["|\\|", "n"],
215
+ ["//", "n"],
216
+ ["()", "o"],
217
+ ["|*", "p"],
218
+ ["|o", "p"],
219
+ ["(_,)", "q"],
220
+ ["()_", "q"],
221
+ ["|2", "r"],
222
+ ["12", "r"],
223
+ ["5", "s"],
224
+ ["$", "s"],
225
+ ["z", "s"],
226
+ ["7", "t"],
227
+ ["+", "t"],
228
+ ["†", "t"],
229
+ ["|_|", "u"],
230
+ ["(_)", "u"],
231
+ ["v", "u"],
232
+ ["\\/", "v"],
233
+ ["|/", "v"],
234
+ ["\\/\\/", "w"],
235
+ ["vv", "w"],
236
+ ["><", "x"],
237
+ ["}{", "x"],
238
+ ["`/", "y"],
239
+ ["j", "y"],
240
+ ["2", "z"],
241
+ ["7_", "z"],
242
+ ]);
243
+ // Dynamic words added at runtime
244
+ this.dynamicWords = new Set();
245
+ this.logger = (options === null || options === void 0 ? void 0 : options.logger) || new ConsoleLogger();
246
+ // Validate and set configuration
247
+ if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
112
248
  this.setPlaceholder(options.defaultPlaceholder);
113
249
  }
114
250
  this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
115
251
  this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
116
252
  this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
117
- this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : true;
118
- // Load whitelist if provided
253
+ this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
254
+ // Load whitelist
119
255
  if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
120
256
  this.addToWhitelist(options.whitelistWords);
121
257
  }
122
- // Load the default English dictionary
258
+ // Load default languages
123
259
  this.loadLanguage("english");
124
- // Load Hindi by default for backward compatibility
125
260
  this.loadLanguage("hindi");
126
- // Load any additional languages specified in options
127
- if (options === null || options === void 0 ? void 0 : options.languages) {
261
+ // Load additional languages
262
+ if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
128
263
  options.languages.forEach((lang) => this.loadLanguage(lang));
129
264
  }
130
- // Load any custom dictionaries
265
+ // Load custom dictionaries
131
266
  if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
132
- Object.entries(options.customDictionaries).forEach(([langName, words]) => {
133
- this.loadCustomDictionary(langName, words);
267
+ Object.entries(options.customDictionaries).forEach(([name, words]) => {
268
+ this.loadCustomDictionary(name, words);
134
269
  });
135
270
  }
136
271
  }
137
272
  /**
138
- * Normalize text by converting leet speak to regular characters
139
- * @param text - Text to normalize
140
- * @returns Normalized text
273
+ * Normalize text by converting leet speak to regular characters.
141
274
  */
142
275
  normalizeLeetSpeak(text) {
143
276
  if (!this.enableLeetSpeak)
144
277
  return text;
145
278
  let normalized = text.toLowerCase();
146
- // Define comprehensive leet mappings
147
- const leetMappings = [
148
- // Multi-character first
149
- { pattern: /\|-\|/g, replacement: "h" },
150
- { pattern: /\[-\]/g, replacement: "h" },
151
- { pattern: /\}{\s*/g, replacement: "h" },
152
- { pattern: /\|\/\|/g, replacement: "m" },
153
- { pattern: /\/\/\\/g, replacement: "m" },
154
- { pattern: /\|\|/g, replacement: "n" },
155
- { pattern: /\/\//g, replacement: "n" },
156
- { pattern: /\|2/g, replacement: "r" },
157
- { pattern: /12/g, replacement: "r" },
158
- { pattern: /\\\/\\\//g, replacement: "w" },
159
- { pattern: /vv/g, replacement: "w" },
160
- { pattern: /><\s*/g, replacement: "x" },
161
- { pattern: /\(_\)/g, replacement: "u" },
162
- { pattern: /\|_\|/g, replacement: "u" },
163
- { pattern: /\\\//g, replacement: "v" },
164
- { pattern: /\|\//g, replacement: "v" },
165
- // Single character mappings
166
- { pattern: /@/g, replacement: "a" },
167
- { pattern: /4/g, replacement: "u" },
168
- { pattern: /\^/g, replacement: "a" },
169
- { pattern: /8/g, replacement: "b" },
170
- { pattern: /6/g, replacement: "b" },
171
- { pattern: /\(/g, replacement: "c" },
172
- { pattern: /</g, replacement: "c" },
173
- { pattern: /©/g, replacement: "c" },
174
- { pattern: /¢/g, replacement: "c" },
175
- { pattern: /0/g, replacement: "o" },
176
- { pattern: /3/g, replacement: "e" },
177
- { pattern: /€/g, replacement: "e" },
178
- { pattern: /£/g, replacement: "e" },
179
- { pattern: /9/g, replacement: "g" },
180
- { pattern: /&/g, replacement: "g" },
181
- { pattern: /#/g, replacement: "h" },
182
- { pattern: /1/g, replacement: "i" },
183
- { pattern: /!/g, replacement: "i" },
184
- { pattern: /\|/g, replacement: "i" },
185
- { pattern: /7/g, replacement: "t" },
186
- { pattern: /5/g, replacement: "s" },
187
- { pattern: /\$/g, replacement: "s" },
188
- { pattern: /\+/g, replacement: "t" },
189
- { pattern: /2/g, replacement: "z" },
190
- ];
191
- // Apply all mappings
192
- for (const mapping of leetMappings) {
193
- normalized = normalized.replace(mapping.pattern, mapping.replacement);
279
+ const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
280
+ for (const [leet, normal] of sortedMappings) {
281
+ const regex = new RegExp(this.escapeRegex(leet), "g");
282
+ normalized = normalized.replace(regex, normal);
194
283
  }
195
284
  return normalized;
196
285
  }
197
- escapeRegex(str) {
198
- if (!str || typeof str !== "string") {
199
- return "";
200
- }
201
- return str.replace(/[\\^$.*+?()[\]{}|\-]/g, function (match) {
202
- return "\\" + match;
203
- });
204
- }
205
286
  /**
206
- * Generate word variations with common prefixes and suffixes
287
+ * Properly escape regex special characters
207
288
  */
208
- generateWordVariations(word) {
209
- const variations = new Set([word]);
210
- // Add suffix variations
211
- for (const suffix of this.commonSuffixes) {
212
- variations.add(word + suffix);
213
- // Handle words ending in 'e'
214
- if (word.endsWith("e") && !suffix.startsWith("e")) {
215
- variations.add(word.slice(0, -1) + suffix);
216
- }
217
- // Handle consonant doubling
218
- if (word.length > 2 && /[bcdfghjklmnpqrstvwxyz]/.test(word.slice(-1))) {
219
- variations.add(word + word.slice(-1) + suffix);
220
- }
221
- }
222
- // Add prefix variations
223
- for (const prefix of this.commonPrefixes) {
224
- variations.add(prefix + word);
225
- }
226
- return Array.from(variations);
289
+ escapeRegex(str) {
290
+ return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
227
291
  }
228
292
  /**
229
- * Check if text contains word boundaries around a match
293
+ * Check if a position has word boundaries (for strict mode)
230
294
  */
231
295
  hasWordBoundaries(text, start, end) {
232
296
  if (!this.strictMode)
233
297
  return true;
234
298
  const beforeChar = start > 0 ? text[start - 1] : " ";
235
299
  const afterChar = end < text.length ? text[end] : " ";
236
- return (this.wordBoundaryChars.test(beforeChar) &&
237
- this.wordBoundaryChars.test(afterChar));
300
+ const wordBoundaryRegex = /[\s\p{P}\p{S}]/u;
301
+ return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
238
302
  }
239
303
  /**
240
- * Calculate severity based on detected words
304
+ * Helper method to verify whole-word matching.
241
305
  */
242
- calculateSeverity(detectedWords) {
243
- if (detectedWords.length === 0)
244
- return ProfanitySeverity.MILD;
245
- // This is a simplified severity calculation
246
- // You can enhance this based on your specific word categorization
247
- const totalWords = detectedWords.length;
248
- const uniqueWords = new Set(detectedWords).size;
249
- if (totalWords >= 5 || uniqueWords >= 3)
250
- return ProfanitySeverity.EXTREME;
251
- if (totalWords >= 3 || uniqueWords >= 2)
252
- return ProfanitySeverity.SEVERE;
253
- if (totalWords >= 2)
254
- return ProfanitySeverity.MODERATE;
255
- return ProfanitySeverity.MILD;
256
- }
257
- /**
258
- * Load a built-in language dictionary
259
- * @param language - The language to load
260
- * @returns boolean - True if loaded successfully, false otherwise
261
- */
262
- loadLanguage(language) {
263
- if (this.loadedLanguages.has(language.toLowerCase())) {
264
- return true;
306
+ isWholeWord(text, start, end) {
307
+ // Check left boundary
308
+ if (start === 0) {
309
+ // ok
265
310
  }
266
- const langKey = language.toLowerCase();
267
- if (this.availableLanguages[langKey] &&
268
- this.availableLanguages[langKey].length > 0) {
269
- const words = this.availableLanguages[langKey];
270
- // Add words and their variations to the profanity set
271
- for (const word of words) {
272
- if (!word || typeof word !== "string")
273
- continue;
274
- const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
275
- this.profanitySet.add(normalizedWord);
276
- // Store normalized leet version mapping
277
- const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
278
- if (leetNormalized !== normalizedWord) {
279
- this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
280
- }
281
- // Generate and add variations
282
- const variations = this.generateWordVariations(normalizedWord);
283
- for (const variation of variations) {
284
- this.profanitySet.add(variation);
285
- const leetVariation = this.normalizeLeetSpeak(variation);
286
- if (leetVariation !== variation) {
287
- this.normalizedProfanityMap.set(leetVariation, variation);
288
- }
289
- }
290
- }
291
- this.loadedLanguages.add(langKey);
292
- console.log(`AllProfanity: Added ${words.length} ${language} words to the profanity list.`);
293
- return true;
294
- }
295
- else {
296
- console.warn(`AllProfanity: Language '${language}' not found or empty in available dictionaries.`);
311
+ else if (/\w/.test(text[start - 1])) {
297
312
  return false;
298
313
  }
299
- }
300
- /**
301
- * Load multiple languages at once
302
- * @param languages - Array of language names to load
303
- * @returns number - Number of successfully loaded languages
304
- */
305
- loadLanguages(languages) {
306
- let successCount = 0;
307
- languages.forEach((lang) => {
308
- if (this.loadLanguage(lang)) {
309
- successCount++;
310
- }
311
- });
312
- return successCount;
313
- }
314
- /**
315
- * Load all Indian languages at once
316
- * @returns number - Number of Indian languages loaded
317
- */
318
- loadIndianLanguages() {
319
- const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
320
- return this.loadLanguages(indianLanguages);
321
- }
322
- /**
323
- * Load a custom dictionary with a given name
324
- * @param name - Name to identify this dictionary
325
- * @param words - Array of profanity words
326
- */
327
- loadCustomDictionary(name, words) {
328
- if (!words || words.length === 0) {
329
- console.warn(`AllProfanity: Custom dictionary '${name}' has no words.`);
330
- return;
314
+ // Check right boundary
315
+ if (end === text.length) {
316
+ // ok
331
317
  }
332
- // Add to available languages for future reference
333
- this.availableLanguages[name.toLowerCase()] = words;
334
- // Process and add words
335
- for (const word of words) {
336
- if (!word || typeof word !== "string")
337
- continue;
338
- const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
339
- this.profanitySet.add(normalizedWord);
340
- // Store normalized leet version mapping
341
- const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
342
- if (leetNormalized !== normalizedWord) {
343
- this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
344
- }
345
- // Generate and add variations
346
- const variations = this.generateWordVariations(normalizedWord);
347
- for (const variation of variations) {
348
- this.profanitySet.add(variation);
349
- const leetVariation = this.normalizeLeetSpeak(variation);
350
- if (leetVariation !== variation) {
351
- this.normalizedProfanityMap.set(leetVariation, variation);
352
- }
353
- }
318
+ else if (/\w/.test(text[end])) {
319
+ return false;
354
320
  }
355
- this.loadedLanguages.add(name.toLowerCase());
356
- console.log(`AllProfanity: Added ${words.length} words from custom '${name}' dictionary.`);
321
+ return true;
357
322
  }
358
323
  /**
359
- * Add words to whitelist (words that should never be flagged as profanity)
360
- * @param words - Array of words to whitelist
324
+ * Check if a match is whitelisted (by actual matched substring and dictionary word)
361
325
  */
362
- addToWhitelist(words) {
363
- for (const word of words) {
364
- if (word && typeof word === "string") {
365
- this.whitelistSet.add(this.caseSensitive ? word : word.toLowerCase());
366
- }
326
+ isWhitelistedMatch(word, matchedText) {
327
+ if (this.caseSensitive) {
328
+ return this.whitelistSet.has(word) || this.whitelistSet.has(matchedText);
329
+ }
330
+ else {
331
+ return (this.whitelistSet.has(word.toLowerCase()) ||
332
+ this.whitelistSet.has(matchedText.toLowerCase()));
367
333
  }
368
334
  }
369
335
  /**
370
- * Remove words from whitelist
371
- * @param words - Array of words to remove from whitelist
336
+ * Remove overlapping matches, keep only the longest at each start position
372
337
  */
373
- removeFromWhitelist(words) {
374
- for (const word of words) {
375
- if (word && typeof word === "string") {
376
- this.whitelistSet.delete(this.caseSensitive ? word : word.toLowerCase());
338
+ deduplicateMatches(matches) {
339
+ const sorted = [...matches].sort((a, b) => {
340
+ if (a.start !== b.start)
341
+ return a.start - b.start;
342
+ return b.end - a.end;
343
+ });
344
+ const result = [];
345
+ let lastEnd = -1;
346
+ for (const match of sorted) {
347
+ if (match.start >= lastEnd) {
348
+ result.push(match);
349
+ lastEnd = match.end;
377
350
  }
378
351
  }
352
+ return result;
379
353
  }
380
354
  /**
381
- * Advanced profanity detection with detailed results
382
- * @param text - The text to analyze
383
- * @returns ProfanityDetectionResult - Detailed detection results
355
+ * Advanced profanity detection using efficient trie-based algorithm
384
356
  */
385
357
  detect(text) {
386
- if (!text || typeof text !== "string") {
358
+ const validatedText = validateString(text, "text");
359
+ if (validatedText.length === 0) {
387
360
  return {
388
361
  hasProfanity: false,
389
362
  detectedWords: [],
390
- cleanedText: text || "",
363
+ cleanedText: validatedText,
391
364
  severity: ProfanitySeverity.MILD,
392
365
  positions: [],
393
366
  };
394
367
  }
395
- const normalizedText = this.caseSensitive ? text : text.toLowerCase();
396
- const leetNormalizedText = this.normalizeLeetSpeak(normalizedText);
397
- const detectedWords = [];
398
- const positions = [];
399
- // Check for whole word matches first
400
- for (const profanity of this.profanitySet) {
401
- if (this.whitelistSet.has(profanity))
402
- continue;
403
- try {
404
- // Create regex for word boundary detection
405
- const escapedWord = this.escapeRegex(profanity);
406
- const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
407
- let match;
408
- while ((match = wordRegex.exec(normalizedText)) !== null) {
409
- if (this.hasWordBoundaries(normalizedText, match.index, match.index + match[0].length)) {
410
- detectedWords.push(match[0]);
411
- positions.push({
412
- word: match[0],
413
- start: match.index,
414
- end: match.index + match[0].length,
415
- });
416
- }
417
- }
418
- }
419
- catch (error) {
420
- // Fallback to simple string search if regex fails
421
- const index = normalizedText.indexOf(profanity);
422
- if (index !== -1) {
423
- detectedWords.push(profanity);
424
- positions.push({
425
- word: profanity,
426
- start: index,
427
- end: index + profanity.length,
428
- });
429
- }
368
+ const matches = [];
369
+ const normalizedText = this.caseSensitive
370
+ ? validatedText
371
+ : validatedText.toLowerCase();
372
+ this.findMatches(normalizedText, validatedText, matches);
373
+ // Leet speak detection (normalize and search, map back to original)
374
+ if (this.enableLeetSpeak) {
375
+ const leetNormalized = this.normalizeLeetSpeak(normalizedText);
376
+ if (leetNormalized !== normalizedText) {
377
+ this.findMatches(leetNormalized, validatedText, matches);
430
378
  }
431
379
  }
432
- // Check leet speak normalized text
433
- if (this.enableLeetSpeak && leetNormalizedText !== normalizedText) {
434
- for (const profanity of this.profanitySet) {
435
- if (this.whitelistSet.has(profanity))
380
+ const uniqueMatches = this.deduplicateMatches(matches);
381
+ const detectedWords = uniqueMatches.map((m) => m.originalWord);
382
+ const severity = this.calculateSeverity(uniqueMatches);
383
+ const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
384
+ return {
385
+ hasProfanity: uniqueMatches.length > 0,
386
+ detectedWords,
387
+ cleanedText,
388
+ severity,
389
+ positions: uniqueMatches.map((m) => ({
390
+ word: m.originalWord,
391
+ start: m.start,
392
+ end: m.end,
393
+ })),
394
+ };
395
+ }
396
+ /**
397
+ * Main matching function, with whole-word logic.
398
+ */
399
+ findMatches(searchText, originalText, matches) {
400
+ for (let i = 0; i < searchText.length; i++) {
401
+ const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
402
+ for (const match of matchResults) {
403
+ const start = i + match.start;
404
+ const end = i + match.end;
405
+ // Only match whole words if !detectPartialWords
406
+ if (!this.detectPartialWords &&
407
+ !this.isWholeWord(originalText, start, end)) {
436
408
  continue;
437
- try {
438
- const escapedWord = this.escapeRegex(profanity);
439
- const wordRegex = new RegExp(`\\b${escapedWord}\\b`, this.caseSensitive ? "g" : "gi");
440
- let match;
441
- while ((match = wordRegex.exec(leetNormalizedText)) !== null) {
442
- if (this.hasWordBoundaries(leetNormalizedText, match.index, match.index + match[0].length)) {
443
- // Find the original text that corresponds to this match
444
- const originalMatch = normalizedText.substring(match.index, match.index + match[0].length);
445
- if (!detectedWords.includes(originalMatch)) {
446
- detectedWords.push(originalMatch);
447
- positions.push({
448
- word: originalMatch,
449
- start: match.index,
450
- end: match.index + match[0].length,
451
- });
452
- }
453
- }
454
- }
455
409
  }
456
- catch (error) {
457
- // Fallback to simple string search
458
- if (leetNormalizedText.includes(profanity)) {
459
- const index = leetNormalizedText.indexOf(profanity);
460
- const originalMatch = normalizedText.substring(index, index + profanity.length);
461
- if (!detectedWords.includes(originalMatch)) {
462
- detectedWords.push(originalMatch);
463
- positions.push({
464
- word: originalMatch,
465
- start: index,
466
- end: index + profanity.length,
467
- });
468
- }
469
- }
470
- }
471
- }
472
- }
473
- // Partial word detection (if enabled)
474
- if (this.detectPartialWords) {
475
- for (const profanity of this.profanitySet) {
476
- if (this.whitelistSet.has(profanity) || profanity.length < 4)
410
+ // Use actual matched text for whitelist check
411
+ const matchedText = originalText.substring(start, end);
412
+ if (this.isWhitelistedMatch(match.word, matchedText)) {
477
413
  continue;
478
- if (normalizedText.includes(profanity) ||
479
- leetNormalizedText.includes(profanity)) {
480
- const index = normalizedText.indexOf(profanity);
481
- if (index !== -1 &&
482
- !detectedWords.some((w) => normalizedText.indexOf(w) === index)) {
483
- detectedWords.push(profanity);
484
- positions.push({
485
- word: profanity,
486
- start: index,
487
- end: index + profanity.length,
488
- });
489
- }
414
+ }
415
+ if (this.hasWordBoundaries(originalText, start, end)) {
416
+ matches.push({
417
+ word: match.word,
418
+ start,
419
+ end,
420
+ originalWord: matchedText,
421
+ });
490
422
  }
491
423
  }
492
424
  }
493
- // REMOVED: cleanedText = this.clean(text) - this was causing circular dependency
494
- // We'll generate the cleaned text directly here instead
495
- let cleanedText = text;
496
- if (detectedWords.length > 0) {
497
- // Sort positions by start index in descending order to avoid index shifting
498
- const sortedPositions = positions.sort((a, b) => b.start - a.start);
499
- for (const pos of sortedPositions) {
500
- const originalWord = text.substring(pos.start, pos.end);
501
- const replacement = this.defaultPlaceholder.repeat(originalWord.length);
502
- cleanedText =
503
- cleanedText.substring(0, pos.start) +
504
- replacement +
505
- cleanedText.substring(pos.end);
506
- }
425
+ }
426
+ /**
427
+ * Generate cleaned text by replacing profane words (non-overlapping only)
428
+ */
429
+ generateCleanedText(originalText, matches) {
430
+ if (matches.length === 0)
431
+ return originalText;
432
+ let result = originalText;
433
+ // Process matches in reverse order to maintain indices and avoid overlap
434
+ const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
435
+ for (const match of sortedMatches) {
436
+ const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
437
+ result =
438
+ result.substring(0, match.start) +
439
+ replacement +
440
+ result.substring(match.end);
507
441
  }
508
- const severity = this.calculateSeverity(detectedWords);
509
- return {
510
- hasProfanity: detectedWords.length > 0,
511
- detectedWords: [...new Set(detectedWords)],
512
- cleanedText,
513
- severity,
514
- positions,
515
- };
442
+ return result;
516
443
  }
517
444
  /**
518
- * Check if a string contains profanity (simple boolean check)
519
- * @param string - The string to check
520
- * @returns boolean - True if profanity found, false otherwise
445
+ * Simple boolean check for profanity
521
446
  */
522
- check(string) {
523
- return this.detect(string).hasProfanity;
447
+ check(text) {
448
+ return this.detect(text).hasProfanity;
524
449
  }
525
450
  /**
526
- * Clean a string by replacing profanities with placeholders
527
- * @param string - The string to clean
528
- * @param placeholder - Optional custom placeholder
529
- * @returns string - The cleaned string
451
+ * Clean text with custom placeholder
530
452
  */
531
- clean(string, placeholder) {
532
- if (!string || typeof string !== "string")
533
- return string || "";
534
- const placeholderChar = placeholder || this.defaultPlaceholder;
535
- const detection = this.detect(string);
536
- // If detect() already provided cleanedText and no custom placeholder, use it
537
- if (!placeholder && detection.cleanedText !== string) {
453
+ clean(text, placeholder) {
454
+ const detection = this.detect(text);
455
+ if (!placeholder || placeholder === this.defaultPlaceholder) {
538
456
  return detection.cleanedText;
539
457
  }
540
- // Otherwise, build cleaned text with custom placeholder
541
- let result = string;
542
- const sortedPositions = detection.positions.sort((a, b) => b.start - a.start);
458
+ // Use custom placeholder
459
+ let result = text;
460
+ const sortedPositions = [
461
+ ...this.deduplicateMatches(detection.positions.map((p) => ({
462
+ word: p.word,
463
+ start: p.start,
464
+ end: p.end,
465
+ originalWord: text.substring(p.start, p.end),
466
+ }))),
467
+ ].sort((a, b) => b.start - a.start);
543
468
  for (const pos of sortedPositions) {
544
- const originalWord = string.substring(pos.start, pos.end);
545
- const replacement = placeholderChar.repeat(originalWord.length);
469
+ const originalWord = text.substring(pos.start, pos.end);
470
+ const replacement = placeholder.repeat(originalWord.length);
546
471
  result =
547
472
  result.substring(0, pos.start) +
548
473
  replacement +
@@ -551,107 +476,218 @@ export class AllProfanity {
551
476
  return result;
552
477
  }
553
478
  /**
554
- * Clean a string by replacing each profane word with a single placeholder
555
- * @param string - The string to clean
556
- * @param placeholder - The placeholder to use (defaults to '***')
557
- * @returns string - The cleaned string
558
- */
559
- cleanWithWord(string, placeholder = "***") {
560
- if (!string || typeof string !== "string")
561
- return string || "";
562
- // Build a regex that matches any profane word with word boundaries, unicode-aware
563
- const words = Array.from(this.profanitySet)
564
- .map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) // escape regex
565
- .sort((a, b) => b.length - a.length); // longer words first to avoid partial matches
566
- if (words.length === 0)
567
- return string;
568
- // Unicode safe word boundary: (?<=^|[^\p{L}\p{N}_])WORD(?=[^\p{L}\p{N}_]|$)
569
- // This ensures we only match whole words, not inside other words.
570
- const regex = new RegExp(`(?<=^|[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\])(` +
571
- words.join("|") +
572
- `)(?=[\\s\\.,;:!\\?\\-_+=\\[\\]{}()"'\\/\\\\]|$)`, this.caseSensitive ? "gu" : "giu");
573
- // Replace all matches with the placeholder.
574
- return string.replace(regex, placeholder);
575
- }
576
- /**
577
- * Get the current list of profanity words
578
- * @returns string[] - Array of all profanity words
479
+ * Clean text by replacing each profane word with a single placeholder (word-level)
579
480
  */
580
- list() {
581
- return Array.from(this.profanitySet);
481
+ cleanWithPlaceholder(text, placeholder = "***") {
482
+ const detection = this.detect(text);
483
+ if (detection.positions.length === 0)
484
+ return text;
485
+ let result = text;
486
+ // Sort matches so later matches don't affect earlier ones
487
+ const sortedPositions = [
488
+ ...this.deduplicateMatches(detection.positions.map((p) => ({
489
+ word: p.word,
490
+ start: p.start,
491
+ end: p.end,
492
+ originalWord: text.substring(p.start, p.end),
493
+ }))),
494
+ ].sort((a, b) => b.start - a.start);
495
+ for (const pos of sortedPositions) {
496
+ // Only replace whole words!
497
+ if (!this.isWholeWord(result, pos.start, pos.end))
498
+ continue;
499
+ result =
500
+ result.substring(0, pos.start) +
501
+ placeholder +
502
+ result.substring(pos.end);
503
+ }
504
+ return result;
582
505
  }
583
506
  /**
584
507
  * Add word(s) to the profanity list
585
- * @param word - String or array of strings to add
586
508
  */
587
509
  add(word) {
588
510
  const words = Array.isArray(word) ? word : [word];
589
- for (const w of words) {
590
- if (!w || typeof w !== "string")
591
- continue;
592
- const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
593
- this.profanitySet.add(normalizedWord);
594
- // Add leet speak mapping
595
- const leetNormalized = this.normalizeLeetSpeak(normalizedWord);
596
- if (leetNormalized !== normalizedWord) {
597
- this.normalizedProfanityMap.set(leetNormalized, normalizedWord);
598
- }
599
- // Add variations
600
- const variations = this.generateWordVariations(normalizedWord);
601
- for (const variation of variations) {
602
- this.profanitySet.add(variation);
603
- }
511
+ const validatedWords = validateStringArray(words, "words to add");
512
+ for (const w of validatedWords) {
513
+ this.dynamicWords.add(w);
514
+ this.addWordToTrie(w);
604
515
  }
605
516
  }
606
517
  /**
607
518
  * Remove word(s) from the profanity list
608
- * @param word - String or array of strings to remove
609
519
  */
610
520
  remove(word) {
611
521
  const words = Array.isArray(word) ? word : [word];
612
- for (const w of words) {
613
- if (!w || typeof w !== "string")
614
- continue;
522
+ const validatedWords = validateStringArray(words, "words to remove");
523
+ for (const w of validatedWords) {
615
524
  const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
616
- this.profanitySet.delete(normalizedWord);
617
- // Remove variations
618
- const variations = this.generateWordVariations(normalizedWord);
619
- for (const variation of variations) {
620
- this.profanitySet.delete(variation);
525
+ this.profanityTrie.removeWord(normalizedWord);
526
+ this.dynamicWords.delete(w);
527
+ }
528
+ }
529
+ /**
530
+ * Add words to whitelist
531
+ */
532
+ addToWhitelist(words) {
533
+ const validatedWords = validateStringArray(words, "whitelist words");
534
+ for (const word of validatedWords) {
535
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
536
+ this.whitelistSet.add(normalizedWord);
537
+ }
538
+ }
539
+ /**
540
+ * Remove words from whitelist
541
+ */
542
+ removeFromWhitelist(words) {
543
+ const validatedWords = validateStringArray(words, "whitelist words");
544
+ for (const word of validatedWords) {
545
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
546
+ this.whitelistSet.delete(normalizedWord);
547
+ }
548
+ }
549
+ /**
550
+ * Helper for whitelist checking with correct normalization
551
+ */
552
+ isWhitelisted(word) {
553
+ const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
554
+ return this.whitelistSet.has(normalizedWord);
555
+ }
556
+ /**
557
+ * Load a built-in language dictionary
558
+ */
559
+ loadLanguage(language) {
560
+ if (!language || typeof language !== "string") {
561
+ this.logger.warn(`Invalid language parameter: ${language}`);
562
+ return false;
563
+ }
564
+ const langKey = language.toLowerCase().trim();
565
+ if (this.loadedLanguages.has(langKey)) {
566
+ return true;
567
+ }
568
+ const words = this.availableLanguages[langKey];
569
+ if (!words || words.length === 0) {
570
+ this.logger.warn(`Language '${language}' not found or empty`);
571
+ return false;
572
+ }
573
+ try {
574
+ let addedCount = 0;
575
+ for (const word of words) {
576
+ if (this.addWordToTrie(word)) {
577
+ addedCount++;
578
+ }
579
+ }
580
+ this.loadedLanguages.add(langKey);
581
+ this.logger.info(`Loaded ${addedCount} words from ${language} dictionary`);
582
+ return true;
583
+ }
584
+ catch (error) {
585
+ this.logger.error(`Failed to load language ${language}: ${error}`);
586
+ return false;
587
+ }
588
+ }
589
+ /**
590
+ * Load multiple languages at once
591
+ */
592
+ loadLanguages(languages) {
593
+ const validatedLanguages = validateStringArray(languages, "languages");
594
+ return validatedLanguages.reduce((count, lang) => {
595
+ return this.loadLanguage(lang) ? count + 1 : count;
596
+ }, 0);
597
+ }
598
+ /**
599
+ * Load all Indian languages
600
+ */
601
+ loadIndianLanguages() {
602
+ const indianLanguages = ["hindi", "bengali", "tamil", "telugu"];
603
+ return this.loadLanguages(indianLanguages);
604
+ }
605
+ /**
606
+ * Load a custom dictionary
607
+ */
608
+ loadCustomDictionary(name, words) {
609
+ validateString(name, "dictionary name");
610
+ const validatedWords = validateStringArray(words, "custom dictionary words");
611
+ if (validatedWords.length === 0) {
612
+ this.logger.warn(`Custom dictionary '${name}' contains no valid words`);
613
+ return;
614
+ }
615
+ try {
616
+ let addedCount = 0;
617
+ for (const word of validatedWords) {
618
+ if (this.addWordToTrie(word)) {
619
+ addedCount++;
620
+ }
621
621
  }
622
+ // Store for future reference
623
+ this.availableLanguages[name.toLowerCase()] = validatedWords;
624
+ this.loadedLanguages.add(name.toLowerCase());
625
+ this.logger.info(`Loaded ${addedCount} words from custom dictionary '${name}'`);
626
+ }
627
+ catch (error) {
628
+ this.logger.error(`Failed to load custom dictionary ${name}: ${error}`);
622
629
  }
623
630
  }
624
631
  /**
625
- * Clear the filter list and reset to default
632
+ * Add a single word to the trie structure
633
+ */
634
+ addWordToTrie(word) {
635
+ if (!word || typeof word !== "string" || word.trim().length === 0) {
636
+ return false;
637
+ }
638
+ const normalizedWord = this.caseSensitive
639
+ ? word.trim()
640
+ : word.trim().toLowerCase();
641
+ // Skip if whitelisted
642
+ if (this.isWhitelisted(normalizedWord)) {
643
+ return false;
644
+ }
645
+ // Add to trie
646
+ this.profanityTrie.addWord(normalizedWord);
647
+ return true;
648
+ }
649
+ /**
650
+ * Remove overlapping matches, keep only the longest at each start position
651
+ */
652
+ calculateSeverity(matches) {
653
+ if (matches.length === 0)
654
+ return ProfanitySeverity.MILD;
655
+ const uniqueWords = new Set(matches.map((m) => m.word)).size;
656
+ const totalMatches = matches.length;
657
+ if (totalMatches >= 5 || uniqueWords >= 4)
658
+ return ProfanitySeverity.EXTREME;
659
+ if (totalMatches >= 3 || uniqueWords >= 3)
660
+ return ProfanitySeverity.SEVERE;
661
+ if (totalMatches >= 2 || uniqueWords >= 2)
662
+ return ProfanitySeverity.MODERATE;
663
+ return ProfanitySeverity.MILD;
664
+ }
665
+ /**
666
+ * Clear all loaded dictionaries
626
667
  */
627
668
  clearList() {
628
- this.profanitySet.clear();
629
- this.normalizedProfanityMap.clear();
669
+ this.profanityTrie.clear();
630
670
  this.loadedLanguages.clear();
671
+ this.dynamicWords.clear();
631
672
  }
632
673
  /**
633
- * Change the character used as placeholder
634
- * @param placeholder - Single character to use as placeholder
674
+ * Set placeholder character
635
675
  */
636
676
  setPlaceholder(placeholder) {
637
- if (placeholder.length !== 1) {
638
- console.warn("AllProfanity: Placeholder should be a single character. Using first character.");
639
- this.defaultPlaceholder = placeholder.charAt(0);
640
- }
641
- else {
642
- this.defaultPlaceholder = placeholder;
677
+ validateString(placeholder, "placeholder");
678
+ if (placeholder.length === 0) {
679
+ throw new Error("Placeholder cannot be empty");
643
680
  }
681
+ this.defaultPlaceholder = placeholder.charAt(0);
644
682
  }
645
683
  /**
646
- * Get the list of currently loaded languages
647
- * @returns string[] - Array of loaded language names
684
+ * Get loaded languages
648
685
  */
649
686
  getLoadedLanguages() {
650
687
  return Array.from(this.loadedLanguages);
651
688
  }
652
689
  /**
653
- * Get the list of available language dictionaries
654
- * @returns string[] - Array of available language names
690
+ * Get available languages
655
691
  */
656
692
  getAvailableLanguages() {
657
693
  return Object.keys(this.availableLanguages);
@@ -671,17 +707,37 @@ export class AllProfanity {
671
707
  };
672
708
  }
673
709
  /**
674
- * Update configuration
710
+ * Rebuilds the profanity trie from loaded language dictionaries and dynamic words.
711
+ */
712
+ rebuildTrie() {
713
+ this.profanityTrie.clear();
714
+ // Re-add all loaded language words
715
+ for (const lang of this.loadedLanguages) {
716
+ const words = this.availableLanguages[lang] || [];
717
+ for (const word of words) {
718
+ this.addWordToTrie(word);
719
+ }
720
+ }
721
+ // Re-add dynamic words
722
+ for (const word of this.dynamicWords) {
723
+ this.addWordToTrie(word);
724
+ }
725
+ }
726
+ /**
727
+ * Update configuration. Rebuild trie if needed.
675
728
  */
676
729
  updateConfig(options) {
730
+ let rebuildNeeded = false;
677
731
  if (options.defaultPlaceholder !== undefined) {
678
732
  this.setPlaceholder(options.defaultPlaceholder);
679
733
  }
680
734
  if (options.enableLeetSpeak !== undefined) {
681
735
  this.enableLeetSpeak = options.enableLeetSpeak;
682
736
  }
683
- if (options.caseSensitive !== undefined) {
737
+ if (options.caseSensitive !== undefined &&
738
+ options.caseSensitive !== this.caseSensitive) {
684
739
  this.caseSensitive = options.caseSensitive;
740
+ rebuildNeeded = true;
685
741
  }
686
742
  if (options.strictMode !== undefined) {
687
743
  this.strictMode = options.strictMode;
@@ -692,9 +748,12 @@ export class AllProfanity {
692
748
  if (options.whitelistWords) {
693
749
  this.addToWhitelist(options.whitelistWords);
694
750
  }
751
+ if (rebuildNeeded) {
752
+ this.rebuildTrie();
753
+ }
695
754
  }
696
755
  }
697
- // Create and export a singleton instance with default settings
756
+ // Create and export a singleton instance
698
757
  const allProfanity = new AllProfanity();
699
758
  export default allProfanity;
700
759
  //# sourceMappingURL=index.js.map