allprofanity 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -25
- package/allprofanity.config.example.json +6 -0
- package/bin/init.js +1 -1
- package/bin/mcp.js +6 -0
- package/config.schema.json +44 -0
- package/dist/algos/aho-corasick.d.ts +11 -1
- package/dist/algos/aho-corasick.js +31 -6
- package/dist/algos/aho-corasick.js.map +1 -1
- package/dist/algos/bloom-filter.d.ts +2 -2
- package/dist/algos/bloom-filter.js +6 -6
- package/dist/algos/bloom-filter.js.map +1 -1
- package/dist/index.d.ts +896 -48
- package/dist/index.js +1438 -177
- package/dist/index.js.map +1 -1
- package/dist/languages/hindi-words.js +2 -2
- package/dist/languages/hindi-words.js.map +1 -1
- package/dist/mcp/server.d.ts +30 -0
- package/dist/mcp/server.js +364 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/stdio.d.ts +1 -0
- package/dist/mcp/stdio.js +72 -0
- package/dist/mcp/stdio.js.map +1 -0
- package/examples-config/README.md +113 -0
- package/examples-config/chat-app.json +24 -0
- package/examples-config/content-moderation.json +42 -0
- package/examples-config/family-friendly-max.json +33 -0
- package/examples-config/high-throughput-api.json +29 -0
- package/examples-config/low-latency-minimal.json +24 -0
- package/examples-config/medical-professional.json +42 -0
- package/examples-config/multilingual-global.json +33 -0
- package/package.json +17 -7
package/dist/index.d.ts
CHANGED
|
@@ -8,81 +8,398 @@ export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
|
8
8
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
9
9
|
export { default as brazilianBadWords } from "./languages/brazilian-words.js";
|
|
10
10
|
/**
|
|
11
|
-
* Logger interface for
|
|
11
|
+
* Logger interface for AllProfanity library logging operations.
|
|
12
|
+
*
|
|
13
|
+
* @interface Logger
|
|
14
|
+
* @description Provides a contract for logging implementations used by the AllProfanity library.
|
|
15
|
+
* Implement this interface to provide custom logging behavior (e.g., logging to files, external services).
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* class CustomLogger implements Logger {
|
|
20
|
+
* info(message: string): void {
|
|
21
|
+
* // Custom info logging logic
|
|
22
|
+
* }
|
|
23
|
+
* warn(message: string): void {
|
|
24
|
+
* // Custom warning logging logic
|
|
25
|
+
* }
|
|
26
|
+
* error(message: string): void {
|
|
27
|
+
* // Custom error logging logic
|
|
28
|
+
* }
|
|
29
|
+
* }
|
|
30
|
+
* const filter = new AllProfanity({ logger: new CustomLogger() });
|
|
31
|
+
* ```
|
|
12
32
|
*/
|
|
13
33
|
export interface Logger {
|
|
14
34
|
/**
|
|
15
|
-
* Log informational messages.
|
|
16
|
-
*
|
|
35
|
+
* Log informational messages about normal operations.
|
|
36
|
+
*
|
|
37
|
+
* @param message - The informational message to log
|
|
38
|
+
* @returns void
|
|
17
39
|
*/
|
|
18
40
|
info(message: string): void;
|
|
19
41
|
/**
|
|
20
|
-
* Log warning messages.
|
|
21
|
-
*
|
|
42
|
+
* Log warning messages about potential issues or deprecated usage.
|
|
43
|
+
*
|
|
44
|
+
* @param message - The warning message to log
|
|
45
|
+
* @returns void
|
|
22
46
|
*/
|
|
23
47
|
warn(message: string): void;
|
|
24
48
|
/**
|
|
25
|
-
* Log error messages.
|
|
26
|
-
*
|
|
49
|
+
* Log error messages about failures or critical issues.
|
|
50
|
+
*
|
|
51
|
+
* @param message - The error message to log
|
|
52
|
+
* @returns void
|
|
27
53
|
*/
|
|
28
54
|
error(message: string): void;
|
|
29
55
|
}
|
|
30
56
|
/**
|
|
31
|
-
* Configuration options for AllProfanity.
|
|
57
|
+
* Configuration options for initializing an AllProfanity instance.
|
|
58
|
+
*
|
|
59
|
+
* @interface AllProfanityOptions
|
|
60
|
+
* @description Comprehensive configuration object for customizing profanity detection behavior,
|
|
61
|
+
* algorithm selection, performance optimizations, and logging.
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* ```typescript
|
|
65
|
+
* const filter = new AllProfanity({
|
|
66
|
+
* languages: ['english', 'french'],
|
|
67
|
+
* enableLeetSpeak: true,
|
|
68
|
+
* strictMode: true,
|
|
69
|
+
* algorithm: {
|
|
70
|
+
* matching: 'hybrid',
|
|
71
|
+
* useBloomFilter: true
|
|
72
|
+
* },
|
|
73
|
+
* performance: {
|
|
74
|
+
* enableCaching: true,
|
|
75
|
+
* cacheSize: 500
|
|
76
|
+
* }
|
|
77
|
+
* });
|
|
78
|
+
* ```
|
|
32
79
|
*/
|
|
33
80
|
export interface AllProfanityOptions {
|
|
81
|
+
/**
|
|
82
|
+
* Array of language keys to load (e.g., 'english', 'hindi', 'french').
|
|
83
|
+
* Available languages: english, hindi, french, german, spanish, bengali, tamil, telugu, brazilian.
|
|
84
|
+
*
|
|
85
|
+
* @default ['english', 'hindi'] (loaded by default in constructor)
|
|
86
|
+
*/
|
|
34
87
|
languages?: string[];
|
|
88
|
+
/**
|
|
89
|
+
* Custom dictionaries to load in addition to built-in languages.
|
|
90
|
+
* Key is the dictionary name, value is an array of words.
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* customDictionaries: {
|
|
95
|
+
* 'gaming': ['noob', 'trash'],
|
|
96
|
+
* 'custom': ['word1', 'word2']
|
|
97
|
+
* }
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
35
100
|
customDictionaries?: Record<string, string[]>;
|
|
101
|
+
/**
|
|
102
|
+
* Single character to use as replacement placeholder for profane characters.
|
|
103
|
+
*
|
|
104
|
+
* @default "*"
|
|
105
|
+
*/
|
|
36
106
|
defaultPlaceholder?: string;
|
|
107
|
+
/**
|
|
108
|
+
* Enable detection and normalization of leet speak variations (e.g., "h3ll0" -> "hello").
|
|
109
|
+
*
|
|
110
|
+
* @default true
|
|
111
|
+
*/
|
|
37
112
|
enableLeetSpeak?: boolean;
|
|
113
|
+
/**
|
|
114
|
+
* Enable case-sensitive matching. When false, all matching is done in lowercase.
|
|
115
|
+
*
|
|
116
|
+
* @default false
|
|
117
|
+
*/
|
|
38
118
|
caseSensitive?: boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Array of words to whitelist (never flag as profanity even if in dictionaries).
|
|
121
|
+
*
|
|
122
|
+
* @example ['hello', 'class', 'assignment']
|
|
123
|
+
*/
|
|
39
124
|
whitelistWords?: string[];
|
|
125
|
+
/**
|
|
126
|
+
* Strict mode requires profanity to be surrounded by word boundaries (spaces, punctuation).
|
|
127
|
+
* When false, profanity embedded in other words may be detected.
|
|
128
|
+
*
|
|
129
|
+
* @default false
|
|
130
|
+
*/
|
|
40
131
|
strictMode?: boolean;
|
|
132
|
+
/**
|
|
133
|
+
* Allow detection of profanity as partial matches within larger words.
|
|
134
|
+
* When true, "badword" will be detected in "mybadwordhere".
|
|
135
|
+
*
|
|
136
|
+
* @default false
|
|
137
|
+
*/
|
|
41
138
|
detectPartialWords?: boolean;
|
|
139
|
+
/**
|
|
140
|
+
* Custom logger implementation for handling log messages.
|
|
141
|
+
* If not provided, defaults to ConsoleLogger unless silent mode is enabled.
|
|
142
|
+
*/
|
|
42
143
|
logger?: Logger;
|
|
144
|
+
/**
|
|
145
|
+
* Silent mode suppresses all logging output.
|
|
146
|
+
* When true, uses SilentLogger to discard all log messages.
|
|
147
|
+
*
|
|
148
|
+
* @default false
|
|
149
|
+
*/
|
|
150
|
+
silent?: boolean;
|
|
151
|
+
/**
|
|
152
|
+
* Advanced algorithm configuration for pattern matching strategies.
|
|
153
|
+
*/
|
|
43
154
|
algorithm?: {
|
|
155
|
+
/**
|
|
156
|
+
* Primary matching algorithm to use.
|
|
157
|
+
* - 'trie': Fast prefix tree matching (default, best for most use cases)
|
|
158
|
+
* - 'aho-corasick': Multi-pattern matching (best for large dictionaries)
|
|
159
|
+
* - 'hybrid': Combines Aho-Corasick with Bloom Filter (best for extreme performance)
|
|
160
|
+
*
|
|
161
|
+
* @default "trie"
|
|
162
|
+
*/
|
|
44
163
|
matching?: "trie" | "aho-corasick" | "hybrid";
|
|
164
|
+
/**
|
|
165
|
+
* Enable Aho-Corasick automaton for multi-pattern matching.
|
|
166
|
+
* Automatically enabled when matching is set to 'aho-corasick' or 'hybrid'.
|
|
167
|
+
*
|
|
168
|
+
* @default false
|
|
169
|
+
*/
|
|
45
170
|
useAhoCorasick?: boolean;
|
|
171
|
+
/**
|
|
172
|
+
* Enable Bloom Filter for probabilistic quick rejection of non-profane text.
|
|
173
|
+
* Automatically enabled when matching is set to 'hybrid'.
|
|
174
|
+
*
|
|
175
|
+
* @default false
|
|
176
|
+
*/
|
|
46
177
|
useBloomFilter?: boolean;
|
|
178
|
+
/**
|
|
179
|
+
* Enable context analysis to reduce false positives based on surrounding words.
|
|
180
|
+
*
|
|
181
|
+
* @default false
|
|
182
|
+
*/
|
|
47
183
|
useContextAnalysis?: boolean;
|
|
48
184
|
};
|
|
185
|
+
/**
|
|
186
|
+
* Bloom Filter configuration for probabilistic matching optimization.
|
|
187
|
+
*/
|
|
49
188
|
bloomFilter?: {
|
|
189
|
+
/**
|
|
190
|
+
* Enable Bloom Filter.
|
|
191
|
+
*
|
|
192
|
+
* @default false
|
|
193
|
+
*/
|
|
50
194
|
enabled?: boolean;
|
|
195
|
+
/**
|
|
196
|
+
* Expected number of items to be stored in the Bloom Filter.
|
|
197
|
+
* Higher values increase memory usage but reduce false positive rate.
|
|
198
|
+
*
|
|
199
|
+
* @default 10000
|
|
200
|
+
*/
|
|
51
201
|
expectedItems?: number;
|
|
202
|
+
/**
|
|
203
|
+
* Target false positive rate (probability of incorrectly identifying non-profanity as profanity).
|
|
204
|
+
* Lower values increase memory usage but improve accuracy.
|
|
205
|
+
*
|
|
206
|
+
* @default 0.01 (1%)
|
|
207
|
+
*/
|
|
52
208
|
falsePositiveRate?: number;
|
|
53
209
|
};
|
|
210
|
+
/**
|
|
211
|
+
* Aho-Corasick automaton configuration for multi-pattern matching.
|
|
212
|
+
*/
|
|
54
213
|
ahoCorasick?: {
|
|
214
|
+
/**
|
|
215
|
+
* Enable Aho-Corasick automaton.
|
|
216
|
+
*
|
|
217
|
+
* @default false
|
|
218
|
+
*/
|
|
55
219
|
enabled?: boolean;
|
|
220
|
+
/**
|
|
221
|
+
* Pre-build the automaton during initialization.
|
|
222
|
+
* When false, automaton is built lazily on first use.
|
|
223
|
+
*
|
|
224
|
+
* @default false
|
|
225
|
+
*/
|
|
56
226
|
prebuild?: boolean;
|
|
57
227
|
};
|
|
228
|
+
/**
|
|
229
|
+
* Context analysis configuration for reducing false positives.
|
|
230
|
+
*/
|
|
58
231
|
contextAnalysis?: {
|
|
232
|
+
/**
|
|
233
|
+
* Enable context-aware profanity detection.
|
|
234
|
+
*
|
|
235
|
+
* @default false
|
|
236
|
+
*/
|
|
59
237
|
enabled?: boolean;
|
|
238
|
+
/**
|
|
239
|
+
* Number of words before and after the detected word to analyze for context.
|
|
240
|
+
*
|
|
241
|
+
* @default 5
|
|
242
|
+
*/
|
|
60
243
|
contextWindow?: number;
|
|
244
|
+
/**
|
|
245
|
+
* Languages to use for context analysis (e.g., ['en', 'es']).
|
|
246
|
+
*
|
|
247
|
+
* @default ['en']
|
|
248
|
+
*/
|
|
61
249
|
languages?: string[];
|
|
250
|
+
/**
|
|
251
|
+
* Minimum confidence score (0-1) required to flag as profanity.
|
|
252
|
+
* Higher values reduce false positives but may miss some profanity.
|
|
253
|
+
*
|
|
254
|
+
* @default 0.5
|
|
255
|
+
*/
|
|
62
256
|
scoreThreshold?: number;
|
|
63
257
|
};
|
|
258
|
+
/**
|
|
259
|
+
* Evasion-protection configuration. All passes are enabled by default and
|
|
260
|
+
* only run when their trigger characters are present in the text, so they
|
|
261
|
+
* add near-zero cost on ordinary input.
|
|
262
|
+
*/
|
|
263
|
+
evasionProtection?: {
|
|
264
|
+
/**
|
|
265
|
+
* Fold unicode evasion: fullwidth forms (fuck), Cyrillic/Greek
|
|
266
|
+
* homoglyphs (fυck), diacritics (fück) and invisible characters
|
|
267
|
+
* (zero-width spaces, soft hyphens) injected inside words.
|
|
268
|
+
*
|
|
269
|
+
* @default true
|
|
270
|
+
*/
|
|
271
|
+
unicode?: boolean;
|
|
272
|
+
/**
|
|
273
|
+
* Collapse stretched characters ("fuuuuck" -> "fuck"). Only triggers when
|
|
274
|
+
* a run of 3+ identical characters exists.
|
|
275
|
+
*
|
|
276
|
+
* @default true
|
|
277
|
+
*/
|
|
278
|
+
repeatedCharacters?: boolean;
|
|
279
|
+
/**
|
|
280
|
+
* Resolve masked characters as single-character wildcards ("f*ck",
|
|
281
|
+
* "f#ck", "f@ck"). A masked token only matches when the visible letters
|
|
282
|
+
* align exactly with a dictionary word.
|
|
283
|
+
*
|
|
284
|
+
* @default true
|
|
285
|
+
*/
|
|
286
|
+
maskedCharacters?: boolean;
|
|
287
|
+
/**
|
|
288
|
+
* Detect words spelled out with uniform single separators
|
|
289
|
+
* ("f u c k", "f.u.c.k"). The joined letters must equal a dictionary
|
|
290
|
+
* word exactly, which keeps initialisms like "U S A" clean.
|
|
291
|
+
*
|
|
292
|
+
* @default true
|
|
293
|
+
*/
|
|
294
|
+
separatedLetters?: boolean;
|
|
295
|
+
/**
|
|
296
|
+
* Detect unambiguous profanity stems embedded inside larger tokens
|
|
297
|
+
* ("sisfuck", "totalshitshow"). Applies only to a curated list of
|
|
298
|
+
* strong words that never occur in legitimate vocabulary, with built-in
|
|
299
|
+
* exceptions (Scunthorpe, mishit, snigger, ...), so "classic", "bass"
|
|
300
|
+
* and "Hitchcock" stay clean.
|
|
301
|
+
*
|
|
302
|
+
* @default true
|
|
303
|
+
*/
|
|
304
|
+
embeddedWords?: boolean;
|
|
305
|
+
};
|
|
306
|
+
/**
|
|
307
|
+
* Performance optimization configuration.
|
|
308
|
+
*/
|
|
64
309
|
performance?: {
|
|
310
|
+
/**
|
|
311
|
+
* Maximum number of results to cache in LRU cache.
|
|
312
|
+
*
|
|
313
|
+
* @default 1000
|
|
314
|
+
*/
|
|
65
315
|
cacheSize?: number;
|
|
316
|
+
/**
|
|
317
|
+
* Enable result caching to speed up repeated queries.
|
|
318
|
+
* Stores detection results for previously seen text.
|
|
319
|
+
*
|
|
320
|
+
* @default false
|
|
321
|
+
*/
|
|
66
322
|
enableCaching?: boolean;
|
|
67
323
|
};
|
|
68
324
|
}
|
|
69
325
|
/**
|
|
70
|
-
* Severity levels for profanity detection.
|
|
326
|
+
* Severity levels for profanity detection results.
|
|
327
|
+
*
|
|
328
|
+
* @enum {number}
|
|
329
|
+
* @description Categorizes the severity of detected profanity based on the number
|
|
330
|
+
* of unique words and total matches found in the text.
|
|
331
|
+
*
|
|
332
|
+
* @readonly
|
|
333
|
+
* @example
|
|
334
|
+
* ```typescript
|
|
335
|
+
* const result = filter.detect("some text");
|
|
336
|
+
* if (result.severity === ProfanitySeverity.EXTREME) {
|
|
337
|
+
* // Handle extreme profanity
|
|
338
|
+
* }
|
|
339
|
+
* ```
|
|
71
340
|
*/
|
|
72
341
|
export declare enum ProfanitySeverity {
|
|
342
|
+
/** No profanity detected */
|
|
343
|
+
NONE = 0,
|
|
344
|
+
/** Mild profanity: 1 unique word or 1 total match */
|
|
73
345
|
MILD = 1,
|
|
346
|
+
/** Moderate profanity: 2 unique words or 2 total matches */
|
|
74
347
|
MODERATE = 2,
|
|
348
|
+
/** Severe profanity: 3 unique words or 3 total matches */
|
|
75
349
|
SEVERE = 3,
|
|
350
|
+
/** Extreme profanity: 4+ unique words or 5+ total matches */
|
|
76
351
|
EXTREME = 4
|
|
77
352
|
}
|
|
78
353
|
/**
|
|
79
|
-
*
|
|
354
|
+
* Result object returned from profanity detection operations.
|
|
355
|
+
*
|
|
356
|
+
* @interface ProfanityDetectionResult
|
|
357
|
+
* @description Contains comprehensive information about detected profanity including
|
|
358
|
+
* what was found, where it was found, how severe it is, and a cleaned version of the text.
|
|
359
|
+
*
|
|
360
|
+
* @example
|
|
361
|
+
* ```typescript
|
|
362
|
+
* const result = filter.detect("This is a bad word");
|
|
363
|
+
* console.log(result.hasProfanity); // true
|
|
364
|
+
* console.log(result.detectedWords); // ['bad word']
|
|
365
|
+
* console.log(result.cleanedText); // 'This is a *** ****'
|
|
366
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
367
|
+
* console.log(result.positions); // [{ word: 'bad word', start: 10, end: 18 }]
|
|
368
|
+
* ```
|
|
80
369
|
*/
|
|
81
370
|
export interface ProfanityDetectionResult {
|
|
371
|
+
/**
|
|
372
|
+
* Whether any profanity was detected in the text.
|
|
373
|
+
*
|
|
374
|
+
* @type {boolean}
|
|
375
|
+
*/
|
|
82
376
|
hasProfanity: boolean;
|
|
377
|
+
/**
|
|
378
|
+
* Array of detected profane words/phrases as they appeared in the original text.
|
|
379
|
+
* Includes case and formatting from the original text.
|
|
380
|
+
*
|
|
381
|
+
* @type {string[]}
|
|
382
|
+
*/
|
|
83
383
|
detectedWords: string[];
|
|
384
|
+
/**
|
|
385
|
+
* The text with all profanity replaced by placeholder characters.
|
|
386
|
+
* Each profane character is replaced with the configured placeholder (default: '*').
|
|
387
|
+
*
|
|
388
|
+
* @type {string}
|
|
389
|
+
*/
|
|
84
390
|
cleanedText: string;
|
|
391
|
+
/**
|
|
392
|
+
* Severity level of detected profanity.
|
|
393
|
+
*
|
|
394
|
+
* @type {ProfanitySeverity}
|
|
395
|
+
*/
|
|
85
396
|
severity: ProfanitySeverity;
|
|
397
|
+
/**
|
|
398
|
+
* Precise positions of each detected profane word in the original text.
|
|
399
|
+
* Useful for highlighting or further processing.
|
|
400
|
+
*
|
|
401
|
+
* @type {Array<{ word: string; start: number; end: number }>}
|
|
402
|
+
*/
|
|
86
403
|
positions: Array<{
|
|
87
404
|
word: string;
|
|
88
405
|
start: number;
|
|
@@ -90,7 +407,99 @@ export interface ProfanityDetectionResult {
|
|
|
90
407
|
}>;
|
|
91
408
|
}
|
|
92
409
|
/**
|
|
93
|
-
*
|
|
410
|
+
* AllProfanity - Professional-grade multilingual profanity detection and filtering library.
|
|
411
|
+
*
|
|
412
|
+
* @class AllProfanity
|
|
413
|
+
* @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
|
|
414
|
+
* with advanced features including leet speak detection, context analysis, multiple matching algorithms,
|
|
415
|
+
* and customizable filtering options.
|
|
416
|
+
*
|
|
417
|
+
* @remarks
|
|
418
|
+
* ### Features:
|
|
419
|
+
* - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
|
|
420
|
+
* - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
|
|
421
|
+
* - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
|
|
422
|
+
* - **Context Analysis**: Reduces false positives using surrounding word context
|
|
423
|
+
* - **Performance**: Built-in caching and optimized data structures
|
|
424
|
+
* - **Flexible**: Custom dictionaries, whitelisting, severity levels
|
|
425
|
+
*
|
|
426
|
+
* ### Default Behavior:
|
|
427
|
+
* - Loads English and Hindi dictionaries by default
|
|
428
|
+
* - Case-insensitive matching
|
|
429
|
+
* - Leet speak detection enabled
|
|
430
|
+
* - Uses Trie algorithm (fastest for most cases)
|
|
431
|
+
*
|
|
432
|
+
* @example
|
|
433
|
+
* ```typescript
|
|
434
|
+
* // Basic usage with default instance
|
|
435
|
+
* import allProfanity from 'allprofanity';
|
|
436
|
+
*
|
|
437
|
+
* const result = allProfanity.detect("This is some bad text");
|
|
438
|
+
* console.log(result.hasProfanity); // true
|
|
439
|
+
* console.log(result.cleanedText); // "This is some *** text"
|
|
440
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
441
|
+
* ```
|
|
442
|
+
*
|
|
443
|
+
* @example
|
|
444
|
+
* ```typescript
|
|
445
|
+
* // Advanced usage with custom configuration
|
|
446
|
+
* import { AllProfanity, ProfanitySeverity } from 'allprofanity';
|
|
447
|
+
*
|
|
448
|
+
* const filter = new AllProfanity({
|
|
449
|
+
* languages: ['english', 'french', 'spanish'],
|
|
450
|
+
* enableLeetSpeak: true,
|
|
451
|
+
* strictMode: true,
|
|
452
|
+
* algorithm: {
|
|
453
|
+
* matching: 'hybrid',
|
|
454
|
+
* useBloomFilter: true
|
|
455
|
+
* },
|
|
456
|
+
* performance: {
|
|
457
|
+
* enableCaching: true,
|
|
458
|
+
* cacheSize: 500
|
|
459
|
+
* },
|
|
460
|
+
* whitelistWords: ['class', 'assignment']
|
|
461
|
+
* });
|
|
462
|
+
*
|
|
463
|
+
* const text = "This text has some b@d w0rds";
|
|
464
|
+
* const result = filter.detect(text);
|
|
465
|
+
*
|
|
466
|
+
* if (result.hasProfanity) {
|
|
467
|
+
* console.log(`Found ${result.detectedWords.length} profane words`);
|
|
468
|
+
* console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
|
|
469
|
+
* console.log(`Cleaned: ${result.cleanedText}`);
|
|
470
|
+
* }
|
|
471
|
+
* ```
|
|
472
|
+
*
|
|
473
|
+
* @example
|
|
474
|
+
* ```typescript
|
|
475
|
+
* // Using individual methods
|
|
476
|
+
* const filter = new AllProfanity();
|
|
477
|
+
*
|
|
478
|
+
* // Simple check
|
|
479
|
+
* if (filter.check("some text")) {
|
|
480
|
+
* console.log("Contains profanity!");
|
|
481
|
+
* }
|
|
482
|
+
*
|
|
483
|
+
* // Clean with custom placeholder
|
|
484
|
+
* const cleaned = filter.clean("bad words here", "#");
|
|
485
|
+
*
|
|
486
|
+
* // Load additional languages
|
|
487
|
+
* filter.loadLanguage('german');
|
|
488
|
+
* filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
|
|
489
|
+
*
|
|
490
|
+
* // Add custom words
|
|
491
|
+
* filter.add(['customword1', 'customword2']);
|
|
492
|
+
*
|
|
493
|
+
* // Remove words
|
|
494
|
+
* filter.remove(['someword']);
|
|
495
|
+
*
|
|
496
|
+
* // Whitelist words
|
|
497
|
+
* filter.addToWhitelist(['class', 'assignment']);
|
|
498
|
+
* ```
|
|
499
|
+
*
|
|
500
|
+
* @see {@link AllProfanityOptions} for all configuration options
|
|
501
|
+
* @see {@link ProfanityDetectionResult} for detection result format
|
|
502
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
94
503
|
*/
|
|
95
504
|
export declare class AllProfanity {
|
|
96
505
|
private readonly profanityTrie;
|
|
@@ -102,17 +511,60 @@ export declare class AllProfanity {
|
|
|
102
511
|
private caseSensitive;
|
|
103
512
|
private strictMode;
|
|
104
513
|
private detectPartialWords;
|
|
514
|
+
private evasionUnicode;
|
|
515
|
+
private evasionRepeatedChars;
|
|
516
|
+
private evasionMaskedChars;
|
|
517
|
+
private evasionSeparatedLetters;
|
|
518
|
+
private evasionEmbeddedWords;
|
|
105
519
|
private readonly availableLanguages;
|
|
106
520
|
private readonly leetMappings;
|
|
107
521
|
private readonly dynamicWords;
|
|
108
522
|
private ahoCorasickAutomaton;
|
|
109
523
|
private bloomFilter;
|
|
110
524
|
private contextAnalyzer;
|
|
525
|
+
private contextScoreThreshold;
|
|
111
526
|
private matchingAlgorithm;
|
|
112
527
|
private resultCache;
|
|
528
|
+
private cacheMaxSize;
|
|
529
|
+
private leetTokensByFirstChar;
|
|
113
530
|
/**
|
|
114
|
-
*
|
|
115
|
-
*
|
|
531
|
+
* Creates a new AllProfanity instance with the specified configuration.
|
|
532
|
+
*
|
|
533
|
+
* @constructor
|
|
534
|
+
* @param {AllProfanityOptions} [options] - Configuration options for profanity detection behavior
|
|
535
|
+
*
|
|
536
|
+
* @remarks
|
|
537
|
+
* ### Default Initialization:
|
|
538
|
+
* - Loads English and Hindi dictionaries automatically
|
|
539
|
+
* - Enables leet speak detection
|
|
540
|
+
* - Case-insensitive matching
|
|
541
|
+
* - Uses Trie algorithm for pattern matching
|
|
542
|
+
*
|
|
543
|
+
* ### Performance Considerations:
|
|
544
|
+
* - Initial load time depends on number of languages loaded
|
|
545
|
+
* - Aho-Corasick automaton (if enabled) is built during construction
|
|
546
|
+
* - Bloom Filter (if enabled) is populated during construction
|
|
547
|
+
*
|
|
548
|
+
* @throws {TypeError} If invalid options are provided
|
|
549
|
+
*
|
|
550
|
+
* @example
|
|
551
|
+
* ```typescript
|
|
552
|
+
* // Default instance
|
|
553
|
+
* const filter = new AllProfanity();
|
|
554
|
+
*
|
|
555
|
+
* // Custom configuration
|
|
556
|
+
* const filter = new AllProfanity({
|
|
557
|
+
* languages: ['english', 'french'],
|
|
558
|
+
* strictMode: true,
|
|
559
|
+
* defaultPlaceholder: '#',
|
|
560
|
+
* algorithm: { matching: 'hybrid' }
|
|
561
|
+
* });
|
|
562
|
+
*
|
|
563
|
+
* // Silent mode (no logging)
|
|
564
|
+
* const filter = new AllProfanity({ silent: true });
|
|
565
|
+
* ```
|
|
566
|
+
*
|
|
567
|
+
* @see {@link AllProfanityOptions} for all available configuration options
|
|
116
568
|
*/
|
|
117
569
|
constructor(options?: AllProfanityOptions);
|
|
118
570
|
/**
|
|
@@ -120,17 +572,56 @@ export declare class AllProfanity {
|
|
|
120
572
|
*/
|
|
121
573
|
private initializeAdvancedAlgorithms;
|
|
122
574
|
/**
|
|
123
|
-
* Normalize leet speak to regular characters
|
|
124
|
-
*
|
|
125
|
-
*
|
|
575
|
+
* Normalize leet speak to regular characters, keeping a map from each
|
|
576
|
+
* normalized character back to its source range in the input text.
|
|
577
|
+
*
|
|
578
|
+
* For normalized index i, starts[i]/ends[i] give the [start, end) range in
|
|
579
|
+
* the input that produced that character. A match [s, e) in the normalized
|
|
580
|
+
* string therefore spans [starts[s], ends[e - 1]) in the input. This is what
|
|
581
|
+
* keeps positions correct when length-changing mappings like "ph" -> "f"
|
|
582
|
+
* apply.
|
|
583
|
+
*/
|
|
584
|
+
private normalizeLeetSpeakWithMap;
|
|
585
|
+
/**
|
|
586
|
+
* Fold unicode evasion tactics into ASCII with a position map: fullwidth
|
|
587
|
+
* forms, Cyrillic/Greek homoglyphs, Latin diacritics, and invisible
|
|
588
|
+
* characters injected inside words. Non-Latin scripts (Devanagari, Tamil,
|
|
589
|
+
* etc.) pass through untouched. Returns null when nothing changed.
|
|
590
|
+
*/
|
|
591
|
+
private unicodeNormalizeWithMap;
|
|
592
|
+
/**
|
|
593
|
+
* Collapse runs of repeated characters ("fuuuuck" -> "fuck") with a
|
|
594
|
+
* position map. Only triggers when a run of 3+ identical characters
|
|
595
|
+
* exists, so ordinary doubled letters never pay for this pass.
|
|
596
|
+
* Returns null when not triggered.
|
|
126
597
|
*/
|
|
127
|
-
private
|
|
598
|
+
private collapseRepeatsWithMap;
|
|
128
599
|
/**
|
|
129
|
-
*
|
|
130
|
-
*
|
|
131
|
-
*
|
|
600
|
+
* Build the list of (text, position-map) variants to scan: the base text
|
|
601
|
+
* plus unicode-folded, leet-normalized and repeat-collapsed variants, each
|
|
602
|
+
* included only when its normalization actually changed something.
|
|
132
603
|
*/
|
|
133
|
-
private
|
|
604
|
+
private buildScanPasses;
|
|
605
|
+
/**
|
|
606
|
+
* Find dictionary words hidden behind masked characters ("f*ck", "f#ck").
|
|
607
|
+
* Each mask matches exactly one character and the token's visible letters
|
|
608
|
+
* must align with a dictionary word, so "c#" or "5% off" never flag.
|
|
609
|
+
*/
|
|
610
|
+
private findMaskedMatches;
|
|
611
|
+
/**
|
|
612
|
+
* Find words spelled out with a uniform single separator ("f u c k",
|
|
613
|
+
* "f.u.c.k"). The joined letters must equal a dictionary word exactly:
|
|
614
|
+
* runs like "U S A" or letters inside spelled-out sentences never flag.
|
|
615
|
+
*/
|
|
616
|
+
private findSeparatedMatches;
|
|
617
|
+
/**
|
|
618
|
+
* Find unambiguous profanity stems embedded inside larger tokens
|
|
619
|
+
* ("sisfuck", "totalshitshow"). Only stems from EMBEDDED_STRONG_STEMS that
|
|
620
|
+
* are currently in the dictionary are considered, and tokens listed in
|
|
621
|
+
* EMBEDDED_SAFE_WORDS or the whitelist never flag. The whole containing
|
|
622
|
+
* token is reported so cleaning masks all of it.
|
|
623
|
+
*/
|
|
624
|
+
private findEmbeddedMatches;
|
|
134
625
|
/**
|
|
135
626
|
* Check if a match is bounded by word boundaries (strict mode).
|
|
136
627
|
* @param text - The text.
|
|
@@ -154,6 +645,11 @@ export declare class AllProfanity {
|
|
|
154
645
|
* @returns True if whitelisted, false otherwise.
|
|
155
646
|
*/
|
|
156
647
|
private isWhitelistedMatch;
|
|
648
|
+
/**
|
|
649
|
+
* In partial-word mode, check whether the word CONTAINING the match is
|
|
650
|
+
* whitelisted: with "classic" whitelisted, the embedded "ass" must not flag.
|
|
651
|
+
*/
|
|
652
|
+
private isWhitelistedContainingWord;
|
|
157
653
|
/**
|
|
158
654
|
* Remove overlapping matches, keeping only the longest at each start position.
|
|
159
655
|
* @param matches - Array of match results.
|
|
@@ -165,7 +661,13 @@ export declare class AllProfanity {
|
|
|
165
661
|
*/
|
|
166
662
|
private findMatchesWithAhoCorasick;
|
|
167
663
|
/**
|
|
168
|
-
*
|
|
664
|
+
* Check whether the Bloom Filter can quickly rule out any profanity in the
|
|
665
|
+
* text. Only safe for ASCII whole-word matching: partial matches and
|
|
666
|
+
* non-ASCII scripts can match inside tokens, so they bypass the prefilter.
|
|
667
|
+
*/
|
|
668
|
+
private bloomQuickReject;
|
|
669
|
+
/**
|
|
670
|
+
* Hybrid approach: Bloom Filter for quick rejection, Aho-Corasick for matching
|
|
169
671
|
*/
|
|
170
672
|
private findMatchesHybrid;
|
|
171
673
|
/**
|
|
@@ -173,9 +675,61 @@ export declare class AllProfanity {
|
|
|
173
675
|
*/
|
|
174
676
|
private applyContextAnalysis;
|
|
175
677
|
/**
|
|
176
|
-
*
|
|
177
|
-
*
|
|
178
|
-
|
|
678
|
+
* Drop all cached detection results. Must be called whenever the word lists
|
|
679
|
+
* or any option that affects detection output changes.
|
|
680
|
+
*/
|
|
681
|
+
private invalidateCache;
|
|
682
|
+
/**
|
|
683
|
+
* Detects profanity in the provided text and returns comprehensive analysis.
|
|
684
|
+
*
|
|
685
|
+
* @param {string} text - The text to analyze for profanity
|
|
686
|
+
* @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
|
|
687
|
+
*
|
|
688
|
+
* @throws {TypeError} If text is not a string
|
|
689
|
+
*
|
|
690
|
+
* @remarks
|
|
691
|
+
* ### Performance:
|
|
692
|
+
* - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
|
|
693
|
+
* - With Bloom Filter: O(n) average case (faster early rejection)
|
|
694
|
+
* - With Caching: O(1) for repeated identical text
|
|
695
|
+
*
|
|
696
|
+
* ### Features:
|
|
697
|
+
* - Detects leet speak variations (if enabled): "h3ll0" → "hello"
|
|
698
|
+
* - Respects word boundaries (strict mode) or detects partial matches
|
|
699
|
+
* - Returns exact positions for highlighting/masking
|
|
700
|
+
* - Calculates severity based on match count and uniqueness
|
|
701
|
+
*
|
|
702
|
+
* ### Caching:
|
|
703
|
+
* - Results are cached if `performance.enableCaching` is true
|
|
704
|
+
* - Cache uses LRU eviction when size limit is reached
|
|
705
|
+
*
|
|
706
|
+
* @example
|
|
707
|
+
* ```typescript
|
|
708
|
+
* const filter = new AllProfanity();
|
|
709
|
+
* const result = filter.detect("This has bad words");
|
|
710
|
+
*
|
|
711
|
+
* console.log(result.hasProfanity); // true
|
|
712
|
+
* console.log(result.detectedWords); // ['bad']
|
|
713
|
+
* console.log(result.cleanedText); // 'This has *** words'
|
|
714
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
715
|
+
* console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
|
|
716
|
+
* ```
|
|
717
|
+
*
|
|
718
|
+
* @example
|
|
719
|
+
* ```typescript
|
|
720
|
+
* // With leet speak detection
|
|
721
|
+
* const filter = new AllProfanity({ enableLeetSpeak: true });
|
|
722
|
+
* const result = filter.detect("st0p b3ing b@d");
|
|
723
|
+
*
|
|
724
|
+
* if (result.hasProfanity) {
|
|
725
|
+
* result.positions.forEach(pos => {
|
|
726
|
+
* console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
|
|
727
|
+
* });
|
|
728
|
+
* }
|
|
729
|
+
* ```
|
|
730
|
+
*
|
|
731
|
+
* @see {@link ProfanityDetectionResult} for result structure
|
|
732
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
179
733
|
*/
|
|
180
734
|
detect(text: string): ProfanityDetectionResult;
|
|
181
735
|
/**
|
|
@@ -193,33 +747,219 @@ export declare class AllProfanity {
|
|
|
193
747
|
*/
|
|
194
748
|
private generateCleanedText;
|
|
195
749
|
/**
|
|
196
|
-
*
|
|
197
|
-
*
|
|
198
|
-
* @
|
|
750
|
+
* Quick boolean check for profanity presence in text.
|
|
751
|
+
*
|
|
752
|
+
* @param {string} text - The text to check for profanity
|
|
753
|
+
* @returns {boolean} True if profanity is detected, false otherwise
|
|
754
|
+
*
|
|
755
|
+
* @throws {TypeError} If text is not a string
|
|
756
|
+
*
|
|
757
|
+
* @remarks
|
|
758
|
+
* - Convenience method that internally calls `detect()` and returns only the boolean result
|
|
759
|
+
* - For detailed information about matches, use `detect()` instead
|
|
760
|
+
* - Results are cached if caching is enabled (same cache as `detect()`)
|
|
761
|
+
*
|
|
762
|
+
* @example
|
|
763
|
+
* ```typescript
|
|
764
|
+
* const filter = new AllProfanity();
|
|
765
|
+
*
|
|
766
|
+
* if (filter.check("This has bad words")) {
|
|
767
|
+
* console.log("Profanity detected!");
|
|
768
|
+
* }
|
|
769
|
+
*
|
|
770
|
+
* // Quick validation
|
|
771
|
+
* const isClean = !filter.check(userInput);
|
|
772
|
+
* ```
|
|
773
|
+
*
|
|
774
|
+
* @see {@link detect} for detailed profanity analysis
|
|
199
775
|
*/
|
|
200
776
|
check(text: string): boolean;
|
|
201
777
|
/**
|
|
202
|
-
*
|
|
203
|
-
*
|
|
204
|
-
|
|
205
|
-
|
|
778
|
+
* Trie scan that stops at the first match surviving the whole-word,
|
|
779
|
+
* whitelist and boundary checks. Powers the fast path in check().
|
|
780
|
+
*/
|
|
781
|
+
private hasMatchInPass;
|
|
782
|
+
/**
|
|
783
|
+
* Cleans text by replacing profanity with a placeholder character.
|
|
784
|
+
*
|
|
785
|
+
* @param {string} text - The text to clean
|
|
786
|
+
* @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
|
|
787
|
+
* @returns {string} The cleaned text with profanity replaced
|
|
788
|
+
*
|
|
789
|
+
* @throws {TypeError} If text is not a string
|
|
790
|
+
*
|
|
791
|
+
* @remarks
|
|
792
|
+
* ### Character-level Replacement:
|
|
793
|
+
* - Each profane character is replaced individually
|
|
794
|
+
* - "bad" with placeholder "*" becomes "***"
|
|
795
|
+
* - Preserves text length and structure
|
|
796
|
+
*
|
|
797
|
+
* ### Placeholder Behavior:
|
|
798
|
+
* - If no placeholder provided, uses the instance's default placeholder
|
|
799
|
+
* - If placeholder provided, uses only the first character
|
|
800
|
+
* - Empty placeholder throws error
|
|
801
|
+
*
|
|
802
|
+
* @example
|
|
803
|
+
* ```typescript
|
|
804
|
+
* const filter = new AllProfanity();
|
|
805
|
+
*
|
|
806
|
+
* // Using default placeholder (*)
|
|
807
|
+
* const cleaned = filter.clean("This has bad words");
|
|
808
|
+
* console.log(cleaned); // "This has *** *****"
|
|
809
|
+
*
|
|
810
|
+
* // Using custom placeholder
|
|
811
|
+
* const cleaned = filter.clean("This has bad words", "#");
|
|
812
|
+
* console.log(cleaned); // "This has ### #####"
|
|
813
|
+
* ```
|
|
814
|
+
*
|
|
815
|
+
* @example
|
|
816
|
+
* ```typescript
|
|
817
|
+
* // Clean user-generated content for display
|
|
818
|
+
* const userComment = "Some inappropriate words here";
|
|
819
|
+
* const safeComment = filter.clean(userComment);
|
|
820
|
+
* displayComment(safeComment);
|
|
821
|
+
* ```
|
|
822
|
+
*
|
|
823
|
+
* @see {@link cleanWithPlaceholder} for word-level replacement
|
|
824
|
+
* @see {@link setPlaceholder} to change default placeholder
|
|
206
825
|
*/
|
|
207
826
|
clean(text: string, placeholder?: string): string;
|
|
208
827
|
/**
|
|
209
|
-
*
|
|
210
|
-
*
|
|
211
|
-
* @param
|
|
212
|
-
* @
|
|
828
|
+
* Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
|
|
829
|
+
*
|
|
830
|
+
* @param {string} text - The text to clean
|
|
831
|
+
* @param {string} [placeholder="***"] - The placeholder string to use for each profane word
|
|
832
|
+
* @returns {string} The cleaned text with each profane word replaced by the placeholder
|
|
833
|
+
*
|
|
834
|
+
* @throws {TypeError} If text is not a string
|
|
835
|
+
*
|
|
836
|
+
* @remarks
|
|
837
|
+
* ### Word-level Replacement:
|
|
838
|
+
* - Each profane word is replaced with the entire placeholder string (not character-by-character)
|
|
839
|
+
* - "bad words" with placeholder "***" becomes "*** ***"
|
|
840
|
+
* - Does NOT preserve original text length
|
|
841
|
+
*
|
|
842
|
+
* ### Difference from `clean()`:
|
|
843
|
+
* - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
|
|
844
|
+
* - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
|
|
845
|
+
*
|
|
846
|
+
* @example
|
|
847
|
+
* ```typescript
|
|
848
|
+
* const filter = new AllProfanity();
|
|
849
|
+
*
|
|
850
|
+
* // Default placeholder (***) const text = "This has bad words";
|
|
851
|
+
* const cleaned = filter.cleanWithPlaceholder(text);
|
|
852
|
+
* console.log(cleaned); // "This has *** ***"
|
|
853
|
+
*
|
|
854
|
+
* // Custom placeholder
|
|
855
|
+
* const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
|
|
856
|
+
* console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
|
|
857
|
+
* ```
|
|
858
|
+
*
|
|
859
|
+
* @example
|
|
860
|
+
* ```typescript
|
|
861
|
+
* // Censoring chat messages
|
|
862
|
+
* const message = "You are a badword and stupid";
|
|
863
|
+
* const censored = filter.cleanWithPlaceholder(message, "[***]");
|
|
864
|
+
* // Result: "You are a [***] and [***]"
|
|
865
|
+
* ```
|
|
866
|
+
*
|
|
867
|
+
* @see {@link clean} for character-level replacement
|
|
213
868
|
*/
|
|
214
869
|
cleanWithPlaceholder(text: string, placeholder?: string): string;
|
|
215
870
|
/**
|
|
216
|
-
*
|
|
217
|
-
*
|
|
871
|
+
* Dynamically adds one or more words to the profanity filter at runtime.
|
|
872
|
+
*
|
|
873
|
+
* @param {string | string[]} word - A single word or array of words to add to the filter
|
|
874
|
+
* @returns {void}
|
|
875
|
+
*
|
|
876
|
+
* @remarks
|
|
877
|
+
* ### Behavior:
|
|
878
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
879
|
+
* - Automatically normalizes words based on caseSensitive setting
|
|
880
|
+
* - Skips whitelisted words
|
|
881
|
+
* - Validates and filters out non-string or empty values
|
|
882
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
883
|
+
*
|
|
884
|
+
* ### Use Cases:
|
|
885
|
+
* - Adding context-specific profanity
|
|
886
|
+
* - Building dynamic word lists from user reports
|
|
887
|
+
* - Customizing filters for specific communities/applications
|
|
888
|
+
*
|
|
889
|
+
* @example
|
|
890
|
+
* ```typescript
|
|
891
|
+
* const filter = new AllProfanity();
|
|
892
|
+
*
|
|
893
|
+
* // Add single word
|
|
894
|
+
* filter.add('newbadword');
|
|
895
|
+
*
|
|
896
|
+
* // Add multiple words
|
|
897
|
+
* filter.add(['word1', 'word2', 'word3']);
|
|
898
|
+
*
|
|
899
|
+
* // Now these words will be detected
|
|
900
|
+
* filter.check('newbadword'); // true
|
|
901
|
+
* ```
|
|
902
|
+
*
|
|
903
|
+
* @example
|
|
904
|
+
* ```typescript
|
|
905
|
+
* // Add game-specific slang dynamically
|
|
906
|
+
* const filter = new AllProfanity();
|
|
907
|
+
* const gamingSlang = ['noob', 'trash', 'tryhard'];
|
|
908
|
+
* filter.add(gamingSlang);
|
|
909
|
+
*
|
|
910
|
+
* const message = "You're such a noob";
|
|
911
|
+
* console.log(filter.check(message)); // true
|
|
912
|
+
* ```
|
|
913
|
+
*
|
|
914
|
+
* @see {@link remove} to remove words
|
|
915
|
+
* @see {@link loadCustomDictionary} for loading named dictionaries
|
|
218
916
|
*/
|
|
219
917
|
add(word: string | string[]): void;
|
|
220
918
|
/**
|
|
221
|
-
*
|
|
222
|
-
*
|
|
919
|
+
* Dynamically removes one or more words from the profanity filter at runtime.
|
|
920
|
+
*
|
|
921
|
+
* @param {string | string[]} word - A single word or array of words to remove from the filter
|
|
922
|
+
* @returns {void}
|
|
923
|
+
*
|
|
924
|
+
* @remarks
|
|
925
|
+
* ### Behavior:
|
|
926
|
+
* - Removes words from all active data structures (Trie, dynamic words set)
|
|
927
|
+
* - Normalizes words based on caseSensitive setting before removal
|
|
928
|
+
* - Only removes dynamically added words, not words from loaded language dictionaries
|
|
929
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
930
|
+
*
|
|
931
|
+
* ### Important Notes:
|
|
932
|
+
* - Cannot remove words from built-in language dictionaries
|
|
933
|
+
* - To exclude dictionary words, use `addToWhitelist()` instead
|
|
934
|
+
* - Validates and filters out non-string or empty values
|
|
935
|
+
*
|
|
936
|
+
* @example
|
|
937
|
+
* ```typescript
|
|
938
|
+
* const filter = new AllProfanity();
|
|
939
|
+
*
|
|
940
|
+
* // Add then remove a word
|
|
941
|
+
* filter.add('tempword');
|
|
942
|
+
* filter.check('tempword'); // true
|
|
943
|
+
*
|
|
944
|
+
* filter.remove('tempword');
|
|
945
|
+
* filter.check('tempword'); // false
|
|
946
|
+
*
|
|
947
|
+
* // Remove multiple words
|
|
948
|
+
* filter.remove(['word1', 'word2']);
|
|
949
|
+
* ```
|
|
950
|
+
*
|
|
951
|
+
* @example
|
|
952
|
+
* ```typescript
|
|
953
|
+
* // Managing custom word list
|
|
954
|
+
* const filter = new AllProfanity();
|
|
955
|
+
* filter.add(['custom1', 'custom2', 'custom3']);
|
|
956
|
+
*
|
|
957
|
+
* // Later, remove one that's no longer needed
|
|
958
|
+
* filter.remove('custom2');
|
|
959
|
+
* ```
|
|
960
|
+
*
|
|
961
|
+
* @see {@link add} to add words
|
|
962
|
+
* @see {@link addToWhitelist} to exclude dictionary words without removing them
|
|
223
963
|
*/
|
|
224
964
|
remove(word: string | string[]): void;
|
|
225
965
|
/**
|
|
@@ -239,9 +979,60 @@ export declare class AllProfanity {
|
|
|
239
979
|
*/
|
|
240
980
|
private isWhitelisted;
|
|
241
981
|
/**
|
|
242
|
-
*
|
|
243
|
-
*
|
|
244
|
-
* @
|
|
982
|
+
* Loads a built-in language dictionary into the profanity filter.
|
|
983
|
+
*
|
|
984
|
+
* @param {string} language - The language key to load (case-insensitive)
|
|
985
|
+
* @returns {boolean} True if language was loaded successfully, false if not found or already loaded
|
|
986
|
+
*
|
|
987
|
+
* @remarks
|
|
988
|
+
* ### Available Languages:
|
|
989
|
+
* - `'english'` - English profanity words
|
|
990
|
+
* - `'hindi'` - Hindi profanity words
|
|
991
|
+
* - `'french'` - French profanity words
|
|
992
|
+
* - `'german'` - German profanity words
|
|
993
|
+
* - `'spanish'` - Spanish profanity words
|
|
994
|
+
* - `'bengali'` - Bengali profanity words
|
|
995
|
+
* - `'tamil'` - Tamil profanity words
|
|
996
|
+
* - `'telugu'` - Telugu profanity words
|
|
997
|
+
* - `'brazilian'` - Brazilian Portuguese profanity words
|
|
998
|
+
*
|
|
999
|
+
* ### Behavior:
|
|
1000
|
+
* - Language keys are case-insensitive
|
|
1001
|
+
* - Loading is idempotent - calling multiple times for same language is safe
|
|
1002
|
+
* - Returns true if language loaded successfully or was already loaded
|
|
1003
|
+
* - Returns false if language not found
|
|
1004
|
+
* - Logs success/failure messages (unless silent mode enabled)
|
|
1005
|
+
* - Words are added to all active data structures
|
|
1006
|
+
*
|
|
1007
|
+
* ### Default Languages:
|
|
1008
|
+
* English and Hindi are loaded automatically in the constructor
|
|
1009
|
+
*
|
|
1010
|
+
* @example
|
|
1011
|
+
* ```typescript
|
|
1012
|
+
* const filter = new AllProfanity();
|
|
1013
|
+
*
|
|
1014
|
+
* // Load additional languages
|
|
1015
|
+
* filter.loadLanguage('french');
|
|
1016
|
+
* filter.loadLanguage('spanish');
|
|
1017
|
+
*
|
|
1018
|
+
* // Case-insensitive
|
|
1019
|
+
* filter.loadLanguage('GERMAN'); // Works
|
|
1020
|
+
*
|
|
1021
|
+
* // Check if loaded
|
|
1022
|
+
* console.log(filter.getLoadedLanguages()); // ['english', 'hindi', 'french', 'spanish', 'german']
|
|
1023
|
+
* ```
|
|
1024
|
+
*
|
|
1025
|
+
* @example
|
|
1026
|
+
* ```typescript
|
|
1027
|
+
* // Load all Indian languages at once
|
|
1028
|
+
* const filter = new AllProfanity();
|
|
1029
|
+
* filter.loadIndianLanguages();
|
|
1030
|
+
* ```
|
|
1031
|
+
*
|
|
1032
|
+
* @see {@link loadLanguages} to load multiple languages at once
|
|
1033
|
+
* @see {@link loadIndianLanguages} for convenience method
|
|
1034
|
+
* @see {@link getAvailableLanguages} to see all available languages
|
|
1035
|
+
* @see {@link getLoadedLanguages} to see currently loaded languages
|
|
245
1036
|
*/
|
|
246
1037
|
loadLanguage(language: string): boolean;
|
|
247
1038
|
/**
|
|
@@ -256,9 +1047,64 @@ export declare class AllProfanity {
|
|
|
256
1047
|
*/
|
|
257
1048
|
loadIndianLanguages(): number;
|
|
258
1049
|
/**
|
|
259
|
-
*
|
|
260
|
-
*
|
|
261
|
-
* @param
|
|
1050
|
+
* Loads a custom dictionary of profane words with a specific name.
|
|
1051
|
+
*
|
|
1052
|
+
* @param {string} name - Unique name/identifier for this custom dictionary
|
|
1053
|
+
* @param {string[]} words - Array of profane words to add to the dictionary
|
|
1054
|
+
* @returns {void}
|
|
1055
|
+
*
|
|
1056
|
+
* @throws {TypeError} If name is not a string or words is not an array
|
|
1057
|
+
*
|
|
1058
|
+
* @remarks
|
|
1059
|
+
* ### Behavior:
|
|
1060
|
+
* - Creates a new named dictionary or overwrites existing one with same name
|
|
1061
|
+
* - Validates and filters out non-string and empty values from words array
|
|
1062
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
1063
|
+
* - Dictionary name is converted to lowercase for storage
|
|
1064
|
+
* - Logs count of loaded words (unless silent mode enabled)
|
|
1065
|
+
*
|
|
1066
|
+
* ### Use Cases:
|
|
1067
|
+
* - Domain-specific profanity (gaming, medical, legal, etc.)
|
|
1068
|
+
* - Organization-specific word lists
|
|
1069
|
+
* - Temporary or context-dependent filters
|
|
1070
|
+
* - Testing and development
|
|
1071
|
+
*
|
|
1072
|
+
* @example
|
|
1073
|
+
* ```typescript
|
|
1074
|
+
* const filter = new AllProfanity();
|
|
1075
|
+
*
|
|
1076
|
+
* // Load gaming-specific slang
|
|
1077
|
+
* filter.loadCustomDictionary('gaming', [
|
|
1078
|
+
* 'noob',
|
|
1079
|
+
* 'scrub',
|
|
1080
|
+
* 'tryhard',
|
|
1081
|
+
* 'trash'
|
|
1082
|
+
* ]);
|
|
1083
|
+
*
|
|
1084
|
+
* // Load company-specific terms
|
|
1085
|
+
* filter.loadCustomDictionary('company', [
|
|
1086
|
+
* 'competitor1',
|
|
1087
|
+
* 'bannedTerm1',
|
|
1088
|
+
* 'inappropriateJargon'
|
|
1089
|
+
* ]);
|
|
1090
|
+
*
|
|
1091
|
+
* console.log(filter.check('You are such a noob')); // true
|
|
1092
|
+
* ```
|
|
1093
|
+
*
|
|
1094
|
+
* @example
|
|
1095
|
+
* ```typescript
|
|
1096
|
+
* // Load from external source
|
|
1097
|
+
* const filter = new AllProfanity();
|
|
1098
|
+
*
|
|
1099
|
+
* async function loadExternalDictionary() {
|
|
1100
|
+
* const response = await fetch('https://example.com/custom-words.json');
|
|
1101
|
+
* const customWords = await response.json();
|
|
1102
|
+
* filter.loadCustomDictionary('external', customWords);
|
|
1103
|
+
* }
|
|
1104
|
+
* ```
|
|
1105
|
+
*
|
|
1106
|
+
* @see {@link add} for adding individual words dynamically
|
|
1107
|
+
* @see {@link loadLanguage} for loading built-in language dictionaries
|
|
262
1108
|
*/
|
|
263
1109
|
loadCustomDictionary(name: string, words: string[]): void;
|
|
264
1110
|
/**
|
|
@@ -298,9 +1144,10 @@ export declare class AllProfanity {
|
|
|
298
1144
|
*/
|
|
299
1145
|
getConfig(): Partial<AllProfanityOptions>;
|
|
300
1146
|
/**
|
|
301
|
-
* Rebuild
|
|
1147
|
+
* Rebuild all matching structures (trie, Aho-Corasick automaton, Bloom
|
|
1148
|
+
* Filter) from loaded dictionaries and dynamic words.
|
|
302
1149
|
*/
|
|
303
|
-
private
|
|
1150
|
+
private rebuildIndexes;
|
|
304
1151
|
/**
|
|
305
1152
|
* Update configuration options for the profanity filter.
|
|
306
1153
|
* @param options - Partial configuration object.
|
|
@@ -315,6 +1162,7 @@ export declare class AllProfanity {
|
|
|
315
1162
|
}
|
|
316
1163
|
/**
|
|
317
1164
|
* Singleton instance of AllProfanity with default configuration.
|
|
1165
|
+
* Silent so that importing the library never writes to the console.
|
|
318
1166
|
*/
|
|
319
1167
|
declare const allProfanity: AllProfanity;
|
|
320
1168
|
export default allProfanity;
|