allprofanity 1.0.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,24 +1,29 @@
1
1
  # AllProfanity
2
2
 
3
- A comprehensive multi-language profanity filter for JavaScript/TypeScript applications with built-in support for English, Hindi, Hinglish, Bengali, Tamil, Telugu, French, German, and Spanish content.
3
+ A comprehensive, zero-dependency, multi-language profanity filter for JavaScript/TypeScript applications with built-in support for English, Hindi, Hinglish, Bengali, Tamil, Telugu, French, German, and Spanish content.
4
4
 
5
5
  [![npm version](https://img.shields.io/npm/v/allprofanity.svg)](https://www.npmjs.com/package/allprofanity)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
7
 
8
8
  ## Features
9
9
 
10
- - **Multi-language Support**: Pre-loaded with English profanities (from leo-profanity) and extensive dictionaries for Hindi/Hinglish, Bengali, Tamil, Telugu, French, German, and Spanish
11
- - **Multiple Scripts**: Detects profanity in both Latin/Roman and native scripts (Devanagari, Bengali, Tamil, Telugu)
12
- - **Case Insensitive**: Works regardless of letter case
10
+ - **Multi-language Support**: Built-in dictionaries for English, Hindi/Hinglish, Bengali, Tamil, Telugu, French, German, and Spanish.
11
+ - **Multiple Scripts**: Detects profanity in both Latin/Roman and native scripts (Devanagari, Bengali, Tamil, Telugu).
12
+ - **Case Insensitive (configurable)**: By default, not case sensitive, but can be configured to be case sensitive.
13
+ - **Leet Speak Detection**: Optionally detects leet speak and obfuscated profanities.
13
14
  - **Flexible Cleaning Options**:
14
- - Character-level replacement (each character of a profane word becomes a placeholder)
15
- - Word-level replacement (entire profane word becomes a single placeholder)
16
- - **Customizable**:
17
- - Dynamically add/remove words from the filter
18
- - Set custom placeholder characters or strings
19
- - **Zero Dependencies**: Only depends on leo-profanity as the base filter
20
- - **TypeScript Support**: Full TypeScript type definitions included
21
- - **Extensible**: Designed with multi-language support in mind, making it easy to add more languages in the future
15
+ - Character-level replacement (each character of a profane word becomes a placeholder).
16
+ - Word-level replacement (entire profane word becomes a single placeholder).
17
+ - **Customizable & Extensible**:
18
+ - Dynamically add/remove words or whole dictionaries.
19
+ - Set custom placeholder characters or strings.
20
+ - Supports custom language packs.
21
+ - Whitelist words to avoid false positives.
22
+ - Strict mode and partial word detection options.
23
+ - **Severity Levels**: Detects severity of profanities (MILD, MODERATE, SEVERE, EXTREME).
24
+ - **Zero External Dependencies**: Fully built from scratch for maximum performance and control.
25
+ - **TypeScript Support**: Full TypeScript type definitions included.
26
+ - **Exportable Dictionaries**: Language word lists are exportable for direct use or extension.
22
27
 
23
28
  ## Installation
24
29
 
@@ -42,11 +47,11 @@ profanity.check('यह एक चूतिया परीक्षण है
42
47
  profanity.check('Ye ek chutiya test hai.'); // true (Hinglish example)
43
48
 
44
49
  // Clean profanity (character by character replacement)
45
- profanity.clean('This is a fucking test.');
50
+ profanity.clean('This is a fucking test.');
46
51
  // => "This is a ****ing test."
47
52
 
48
53
  // Clean profanity (whole word replacement)
49
- profanity.cleanWithWord('This is a fucking test.');
54
+ profanity.cleanWithWord('This is a fucking test.');
50
55
  // => "This is a *** test."
51
56
  ```
52
57
 
@@ -61,6 +66,21 @@ profanity.check('This contains bullshit.'); // true
61
66
  profanity.check('This is clean.'); // false
62
67
  ```
63
68
 
69
+ ### `detect(string: string): ProfanityDetectionResult`
70
+
71
+ Advanced detection with details about profanities found, severity, cleaned text, and word positions.
72
+
73
+ ```javascript
74
+ const result = profanity.detect('This contains bullshit.');
75
+ // result: {
76
+ // hasProfanity: true,
77
+ // detectedWords: [...],
78
+ // cleanedText: ...,
79
+ // severity: ...,
80
+ // positions: [...]
81
+ // }
82
+ ```
83
+
64
84
  ### `clean(string: string, placeholder?: string): string`
65
85
 
66
86
  Cleans a string by replacing each character of profane words with a placeholder character.
@@ -139,114 +159,140 @@ Sets the default placeholder character for the `clean` method.
139
159
  profanity.setPlaceholder('#');
140
160
  ```
141
161
 
142
- ## Word Boundary Detection
162
+ ### `getLoadedLanguages(): string[]`
143
163
 
144
- The library is designed to handle word boundaries correctly, reducing false positives:
164
+ Returns the list of currently loaded languages.
145
165
 
146
166
  ```javascript
147
- profanity.check('He is an associate professor.'); // false, even though 'ass' is a profane word
148
- profanity.check('I'm an analyst at this company.'); // false, even though 'anal' is a profane word
149
- profanity.check('This is ass and that's bad.'); // true
167
+ const loaded = profanity.getLoadedLanguages();
168
+ // => ['english', 'hindi', ...]
150
169
  ```
151
170
 
152
- ## Language Support
153
-
154
- ### Current Languages
171
+ ### `getAvailableLanguages(): string[]`
155
172
 
156
- #### English
173
+ Returns the list of all available built-in languages.
157
174
 
158
- Built on top of the leo-profanity library, AllProfanity includes comprehensive English profanity detection.
175
+ ```javascript
176
+ const available = profanity.getAvailableLanguages();
177
+ // => ['english', 'hindi', 'bengali', 'tamil', 'telugu', 'french', 'german', 'spanish']
178
+ ```
159
179
 
160
- #### Hindi/Hinglish Support
180
+ ### `loadLanguage(language: string): boolean`
161
181
 
162
- The library comes pre-loaded with an extensive list of Hindi profanities in both Devanagari and Roman scripts, as well as common Hinglish abbreviations and variations.
182
+ Loads a built-in language dictionary by name.
163
183
 
164
184
  ```javascript
165
- // Hindi in Devanagari script
166
- profanity.check('इस वाक्य में लंड शब्द है।'); // true
185
+ profanity.loadLanguage('bengali');
186
+ profanity.loadLanguage('french');
187
+ ```
167
188
 
168
- // Hindi in Roman script
169
- profanity.check('Is vakya mein lund shabd hai.'); // true
189
+ ### `loadLanguages(languages: string[]): number`
170
190
 
171
- // Hinglish abbreviations
172
- profanity.check('Usne bc kaha.'); // true
191
+ Loads multiple languages at once.
192
+
193
+ ```javascript
194
+ profanity.loadLanguages(['tamil', 'german', 'spanish']);
173
195
  ```
174
196
 
175
- #### Indian Languages
197
+ ### `loadIndianLanguages(): number`
176
198
 
177
- AllProfanity supports multiple Indian languages including Bengali, Tamil, and Telugu in both their native scripts and Roman transliterations.
199
+ Loads Hindi, Bengali, Tamil, and Telugu dictionaries.
178
200
 
179
201
  ```javascript
180
- // Bengali in Bengali script
181
- profanity.check('এই বাক্যে বাল শব্দ আছে।'); // true
202
+ profanity.loadIndianLanguages();
203
+ ```
182
204
 
183
- // Tamil in Tamil script
184
- profanity.check('இந்த வாக்கியத்தில் கூதி உள்ளது.'); // true
205
+ ### `loadCustomDictionary(name: string, words: string[]): void`
185
206
 
186
- // Telugu in Telugu script
187
- profanity.check('ఈ వాక్యంలో పూకు పదం ఉంది.'); // true
207
+ Loads a custom dictionary under the given name.
188
208
 
189
- // Loading all Indian languages at once
190
- import { AllProfanity } from 'allprofanity';
191
- const filter = new AllProfanity();
192
- filter.loadIndianLanguages(); // Loads Hindi, Bengali, Tamil, and Telugu
209
+ ```javascript
210
+ profanity.loadCustomDictionary('myLanguage', ['word1', 'word2']);
211
+ profanity.loadLanguage('myLanguage');
193
212
  ```
194
213
 
195
- #### European Languages
214
+ ### `addToWhitelist(words: string[]): void` / `removeFromWhitelist(words: string[]): void`
196
215
 
197
- AllProfanity also supports several European languages including French, German, and Spanish.
216
+ Add or remove words from the whitelist (words never flagged as profanity).
198
217
 
199
218
  ```javascript
200
- // French example
201
- profanity.check('Cette phrase contient le mot merde.'); // true
219
+ profanity.addToWhitelist(['anal', 'ass']);
220
+ profanity.removeFromWhitelist(['anal']);
221
+ ```
222
+
223
+ ### `getConfig(): AllProfanityOptions`
224
+
225
+ Get current configuration.
226
+
227
+ ### `updateConfig(options: Partial<AllProfanityOptions>): void`
202
228
 
203
- // German example
204
- profanity.check('Dieser Satz enthält das Wort scheisse.'); // true
229
+ Update configuration (enable/disable leet speak, case sensitivity, etc.).
205
230
 
206
- // Spanish example
207
- profanity.check('Esta frase contiene la palabra mierda.'); // true
231
+ ## Word Boundary Detection
232
+
233
+ The library handles word boundaries and reduces false positives:
234
+
235
+ ```javascript
236
+ profanity.check('He is an associate professor.'); // false, even though 'ass' is a profane word
237
+ profanity.check('I\'m an analyst at this company.'); // false, even though 'anal' is a profane word
238
+ profanity.check('This is ass and that\'s bad.'); // true
208
239
  ```
209
240
 
210
- ### Loading Additional Languages
241
+ ## Language Support
242
+
243
+ ### Current Languages
211
244
 
212
- By default, only English and Hindi are loaded. You can load additional languages as needed:
245
+ - **English** (imported from `./languages/english-words.js`)
246
+ - **Hindi/Hinglish** (`./languages/hindi-words.js`)
247
+ - **Bengali** (`./languages/bengali-words.js`)
248
+ - **Tamil** (`./languages/tamil-words.js`)
249
+ - **Telugu** (`./languages/telugu-words.js`)
250
+ - **French** (`./languages/french-words.js`)
251
+ - **German** (`./languages/german-words.js`)
252
+ - **Spanish** (`./languages/spanish-words.js`)
253
+
254
+ > **Note:** All dictionaries are exported for direct access/import.
255
+
256
+ #### Usage Examples
213
257
 
214
258
  ```javascript
215
- // Load individual languages
216
- profanity.loadLanguage('bengali');
217
- profanity.loadLanguage('tamil');
218
- profanity.loadLanguage('french');
259
+ profanity.check('इस वाक्य में लंड शब्द है।'); // true (Hindi)
260
+ profanity.check('Is vakya mein lund shabd hai.'); // true (Hinglish)
261
+ profanity.check('এই বাক্যে বাল শব্দ আছে।'); // true (Bengali)
262
+ profanity.check('இந்த வாக்கியத்தில் கூதி உள்ளது.'); // true (Tamil)
263
+ profanity.check('Cette phrase contient le mot merde.'); // true (French)
264
+ ```
219
265
 
220
- // Load multiple languages at once
221
- profanity.loadLanguages(['telugu', 'german', 'spanish']);
266
+ ### Mixed Language Content
222
267
 
223
- // Get available languages
224
- const availableLanguages = profanity.getAvailableLanguages();
225
- // => ['hindi', 'bengali', 'tamil', 'telugu', 'french', 'german', 'spanish']
268
+ AllProfanity can detect profanities from multiple languages in a single string:
226
269
 
227
- // Get currently loaded languages
228
- const loadedLanguages = profanity.getLoadedLanguages();
229
- // => ['english', 'hindi', ...]
270
+ ```javascript
271
+ profanity.check('This English sentence has chutiya which is bad.'); // true
272
+ profanity.check('I\'m saying मादरचोद and bullshit in one sentence.'); // true
230
273
  ```
231
274
 
232
- ### Future Language Support
275
+ ### Loading Additional or Custom Languages
233
276
 
234
- AllProfanity is designed with extensibility in mind. If you'd like to contribute language packs, please see the Contributing section below.
277
+ By default, only English and Hindi are loaded. You can load additional languages as needed:
235
278
 
236
- ## Mixed Language Content
279
+ ```javascript
280
+ profanity.loadLanguage('bengali');
281
+ profanity.loadLanguages(['tamil', 'french']);
282
+ ```
237
283
 
238
- AllProfanity effectively handles mixed-language content containing profanities from different languages:
284
+ You can also load custom dictionaries:
239
285
 
240
286
  ```javascript
241
- profanity.check('This English sentence has chutiya which is bad.'); // true
242
- profanity.check('I'm saying मादरचोद and bullshit in one sentence.'); // true
287
+ profanity.loadCustomDictionary('swedish', ['fulord1', 'fulord2']);
288
+ profanity.loadLanguage('swedish');
243
289
  ```
244
290
 
245
291
  ## Customizing The Library
246
292
 
247
293
  ### Adding Custom Profanity Lists
248
294
 
249
- You can add your own profanity lists to extend support for other languages or add additional words to existing languages:
295
+ You can add your own profanity words to extend support for other languages or add additional words to existing languages:
250
296
 
251
297
  ```javascript
252
298
  // Add custom profanity words
@@ -255,31 +301,26 @@ profanity.add([
255
301
  'customword2',
256
302
  'customword3'
257
303
  ]);
258
-
259
- // Now it will detect Spanish profanity
260
- profanity.check('Este es un ejemplo de mierda.'); // true
261
304
  ```
262
305
 
263
- ### Creating a Custom-Configured Instance
306
+ ## Creating a Custom-Configured Instance
264
307
 
265
- If you need multiple differently-configured instances of the filter, you can import the AllProfanity class directly:
308
+ If you need multiple differently-configured filters, import the `AllProfanity` class directly:
266
309
 
267
310
  ```javascript
268
311
  import { AllProfanity } from 'allprofanity';
269
312
 
270
- // Create custom instances
271
- const kidSafeFilter = new AllProfanity({ includeModerate: true });
272
- const adultFilter = new AllProfanity({ includeModerate: false });
313
+ const kidSafeFilter = new AllProfanity({ enableLeetSpeak: true, strictMode: true });
314
+ const adultFilter = new AllProfanity({ enableLeetSpeak: false, detectPartialWords: false });
273
315
  ```
274
316
 
275
317
  ## Advanced Use Cases
276
318
 
277
319
  ### Performance Optimization
278
320
 
279
- For applications processing large volumes of text:
321
+ For high-throughput applications:
280
322
 
281
323
  ```javascript
282
- // Pre-compile your most used strings for faster checking
283
324
  const badWordsList = profanity.list();
284
325
  const preCompiledRegex = new RegExp('\\b(' + badWordsList.join('|') + ')\\b', 'i');
285
326
 
@@ -322,7 +363,7 @@ AllProfanity works in all modern browsers and Node.js environments.
322
363
 
323
364
  ## Roadmap
324
365
 
325
- - Add support for more languages (Spanish, French, German, Arabic, etc.)
366
+ - Add support for more languages (Arabic, Chinese, Russian, etc.)
326
367
  - Contextual profanity detection
327
368
  - Severity levels for different categories of profanity
328
369
  - Phonetic matching for evasion attempts
@@ -348,5 +389,8 @@ To add support for a new language:
348
389
 
349
390
  ## Acknowledgements
350
391
 
351
- - Built on top of [leo-profanity](https://github.com/jojoee/leo-profanity)
392
+ - Inspired by [leo-profanity](https://github.com/jojoee/leo-profanity), but fully rebuilt for extensibility and multi-language support.
352
393
 
394
+ ```diff
395
+ - Note: As of v2+, AllProfanity is zero-dependency and does not use leo-profanity internally.
396
+ ```
package/dist/index.d.ts CHANGED
@@ -1,10 +1,11 @@
1
- export { default as hindiBadWords } from "./languages/hindi-words";
2
- export { default as frenchBadWords } from "./languages/french-words";
3
- export { default as germanBadWords } from "./languages/german-words";
4
- export { default as spanishBadWords } from "./languages/spanish-words";
5
- export { default as bengaliBadWords } from "./languages/bengali-words";
6
- export { default as tamilBadWords } from "./languages/tamil-words";
7
- export { default as teluguBadWords } from "./languages/telugu-words";
1
+ export { default as englishBadWords } from "./languages/english-words.js";
2
+ export { default as hindiBadWords } from "./languages/hindi-words.js";
3
+ export { default as frenchBadWords } from "./languages/french-words.js";
4
+ export { default as germanBadWords } from "./languages/german-words.js";
5
+ export { default as spanishBadWords } from "./languages/spanish-words.js";
6
+ export { default as bengaliBadWords } from "./languages/bengali-words.js";
7
+ export { default as tamilBadWords } from "./languages/tamil-words.js";
8
+ export { default as teluguBadWords } from "./languages/telugu-words.js";
8
9
  /**
9
10
  * Configuration options for AllProfanity
10
11
  */
@@ -12,21 +13,78 @@ export interface AllProfanityOptions {
12
13
  languages?: string[];
13
14
  customDictionaries?: Record<string, string[]>;
14
15
  defaultPlaceholder?: string;
16
+ enableLeetSpeak?: boolean;
17
+ caseSensitive?: boolean;
18
+ whitelistWords?: string[];
19
+ strictMode?: boolean;
20
+ detectPartialWords?: boolean;
15
21
  }
16
22
  /**
17
- * AllProfanity - Extended profanity filter with multi-language support
18
- * Based on leo-profanity with additional language capabilities
23
+ * Severity levels for profanity detection
24
+ */
25
+ export declare enum ProfanitySeverity {
26
+ MILD = 1,
27
+ MODERATE = 2,
28
+ SEVERE = 3,
29
+ EXTREME = 4
30
+ }
31
+ /**
32
+ * Detection result interface
33
+ */
34
+ export interface ProfanityDetectionResult {
35
+ hasProfanity: boolean;
36
+ detectedWords: string[];
37
+ cleanedText: string;
38
+ severity: ProfanitySeverity;
39
+ positions: Array<{
40
+ word: string;
41
+ start: number;
42
+ end: number;
43
+ }>;
44
+ }
45
+ /**
46
+ * Advanced AllProfanity - Custom profanity filter with multi-language support and leet speak detection
47
+ * No external dependencies - built from scratch for maximum performance and control
19
48
  */
20
49
  export declare class AllProfanity {
21
- private filter;
50
+ private profanitySet;
51
+ private normalizedProfanityMap;
22
52
  private defaultPlaceholder;
23
53
  private loadedLanguages;
54
+ private whitelistSet;
55
+ private enableLeetSpeak;
56
+ private caseSensitive;
57
+ private strictMode;
58
+ private detectPartialWords;
59
+ private readonly leetMap;
60
+ private readonly wordBoundaryChars;
61
+ private readonly commonSuffixes;
62
+ private readonly commonPrefixes;
24
63
  private availableLanguages;
25
64
  /**
26
65
  * Create a new AllProfanity instance
27
66
  * @param options - Configuration options
28
67
  */
29
68
  constructor(options?: AllProfanityOptions);
69
+ /**
70
+ * Normalize text by converting leet speak to regular characters
71
+ * @param text - Text to normalize
72
+ * @returns Normalized text
73
+ */
74
+ private normalizeLeetSpeak;
75
+ private escapeRegex;
76
+ /**
77
+ * Generate word variations with common prefixes and suffixes
78
+ */
79
+ private generateWordVariations;
80
+ /**
81
+ * Check if text contains word boundaries around a match
82
+ */
83
+ private hasWordBoundaries;
84
+ /**
85
+ * Calculate severity based on detected words
86
+ */
87
+ private calculateSeverity;
30
88
  /**
31
89
  * Load a built-in language dictionary
32
90
  * @param language - The language to load
@@ -51,17 +109,23 @@ export declare class AllProfanity {
51
109
  */
52
110
  loadCustomDictionary(name: string, words: string[]): void;
53
111
  /**
54
- * Get the list of currently loaded languages
55
- * @returns string[] - Array of loaded language names
112
+ * Add words to whitelist (words that should never be flagged as profanity)
113
+ * @param words - Array of words to whitelist
56
114
  */
57
- getLoadedLanguages(): string[];
115
+ addToWhitelist(words: string[]): void;
58
116
  /**
59
- * Get the list of available language dictionaries
60
- * @returns string[] - Array of available language names
117
+ * Remove words from whitelist
118
+ * @param words - Array of words to remove from whitelist
61
119
  */
62
- getAvailableLanguages(): string[];
120
+ removeFromWhitelist(words: string[]): void;
63
121
  /**
64
- * Check if a string contains profanity
122
+ * Advanced profanity detection with detailed results
123
+ * @param text - The text to analyze
124
+ * @returns ProfanityDetectionResult - Detailed detection results
125
+ */
126
+ detect(text: string): ProfanityDetectionResult;
127
+ /**
128
+ * Check if a string contains profanity (simple boolean check)
65
129
  * @param string - The string to check
66
130
  * @returns boolean - True if profanity found, false otherwise
67
131
  */
@@ -69,7 +133,7 @@ export declare class AllProfanity {
69
133
  /**
70
134
  * Clean a string by replacing profanities with placeholders
71
135
  * @param string - The string to clean
72
- * @param placeholder - Optional custom placeholder (defaults to '*')
136
+ * @param placeholder - Optional custom placeholder
73
137
  * @returns string - The cleaned string
74
138
  */
75
139
  clean(string: string, placeholder?: string): string;
@@ -104,6 +168,24 @@ export declare class AllProfanity {
104
168
  * @param placeholder - Single character to use as placeholder
105
169
  */
106
170
  setPlaceholder(placeholder: string): void;
171
+ /**
172
+ * Get the list of currently loaded languages
173
+ * @returns string[] - Array of loaded language names
174
+ */
175
+ getLoadedLanguages(): string[];
176
+ /**
177
+ * Get the list of available language dictionaries
178
+ * @returns string[] - Array of available language names
179
+ */
180
+ getAvailableLanguages(): string[];
181
+ /**
182
+ * Get current configuration
183
+ */
184
+ getConfig(): Partial<AllProfanityOptions>;
185
+ /**
186
+ * Update configuration
187
+ */
188
+ updateConfig(options: Partial<AllProfanityOptions>): void;
107
189
  }
108
190
  declare const allProfanity: AllProfanity;
109
191
  export default allProfanity;