bekindprofanityfilter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTORS.md +106 -0
- package/LICENSE +22 -0
- package/README.md +1015 -0
- package/allprofanity.config.example.json +35 -0
- package/bin/init.js +49 -0
- package/config.schema.json +163 -0
- package/dist/algos/aho-corasick.d.ts +75 -0
- package/dist/algos/aho-corasick.js +238 -0
- package/dist/algos/aho-corasick.js.map +1 -0
- package/dist/algos/bloom-filter.d.ts +103 -0
- package/dist/algos/bloom-filter.js +208 -0
- package/dist/algos/bloom-filter.js.map +1 -0
- package/dist/algos/context-patterns.d.ts +102 -0
- package/dist/algos/context-patterns.js +484 -0
- package/dist/algos/context-patterns.js.map +1 -0
- package/dist/index.d.ts +1332 -0
- package/dist/index.js +2631 -0
- package/dist/index.js.map +1 -0
- package/dist/innocence-scoring.d.ts +23 -0
- package/dist/innocence-scoring.js +118 -0
- package/dist/innocence-scoring.js.map +1 -0
- package/dist/language-detector.d.ts +162 -0
- package/dist/language-detector.js +952 -0
- package/dist/language-detector.js.map +1 -0
- package/dist/language-dicts.d.ts +60 -0
- package/dist/language-dicts.js +2718 -0
- package/dist/language-dicts.js.map +1 -0
- package/dist/languages/arabic-words.d.ts +10 -0
- package/dist/languages/arabic-words.js +1649 -0
- package/dist/languages/arabic-words.js.map +1 -0
- package/dist/languages/bengali-words.d.ts +10 -0
- package/dist/languages/bengali-words.js +1696 -0
- package/dist/languages/bengali-words.js.map +1 -0
- package/dist/languages/brazilian-words.d.ts +10 -0
- package/dist/languages/brazilian-words.js +2122 -0
- package/dist/languages/brazilian-words.js.map +1 -0
- package/dist/languages/chinese-words.d.ts +10 -0
- package/dist/languages/chinese-words.js +2728 -0
- package/dist/languages/chinese-words.js.map +1 -0
- package/dist/languages/english-primary-all-languages.d.ts +23 -0
- package/dist/languages/english-primary-all-languages.js +36894 -0
- package/dist/languages/english-primary-all-languages.js.map +1 -0
- package/dist/languages/english-words.d.ts +5 -0
- package/dist/languages/english-words.js +5156 -0
- package/dist/languages/english-words.js.map +1 -0
- package/dist/languages/french-words.d.ts +10 -0
- package/dist/languages/french-words.js +2326 -0
- package/dist/languages/french-words.js.map +1 -0
- package/dist/languages/german-words.d.ts +10 -0
- package/dist/languages/german-words.js +2633 -0
- package/dist/languages/german-words.js.map +1 -0
- package/dist/languages/hindi-words.d.ts +10 -0
- package/dist/languages/hindi-words.js +2341 -0
- package/dist/languages/hindi-words.js.map +1 -0
- package/dist/languages/innocent-words.d.ts +41 -0
- package/dist/languages/innocent-words.js +109 -0
- package/dist/languages/innocent-words.js.map +1 -0
- package/dist/languages/italian-words.d.ts +10 -0
- package/dist/languages/italian-words.js +2287 -0
- package/dist/languages/italian-words.js.map +1 -0
- package/dist/languages/japanese-words.d.ts +11 -0
- package/dist/languages/japanese-words.js +2557 -0
- package/dist/languages/japanese-words.js.map +1 -0
- package/dist/languages/korean-words.d.ts +10 -0
- package/dist/languages/korean-words.js +2509 -0
- package/dist/languages/korean-words.js.map +1 -0
- package/dist/languages/russian-words.d.ts +10 -0
- package/dist/languages/russian-words.js +2175 -0
- package/dist/languages/russian-words.js.map +1 -0
- package/dist/languages/spanish-words.d.ts +11 -0
- package/dist/languages/spanish-words.js +2536 -0
- package/dist/languages/spanish-words.js.map +1 -0
- package/dist/languages/tamil-words.d.ts +10 -0
- package/dist/languages/tamil-words.js +1722 -0
- package/dist/languages/tamil-words.js.map +1 -0
- package/dist/languages/telugu-words.d.ts +10 -0
- package/dist/languages/telugu-words.js +1739 -0
- package/dist/languages/telugu-words.js.map +1 -0
- package/dist/romanization-detector.d.ts +50 -0
- package/dist/romanization-detector.js +779 -0
- package/dist/romanization-detector.js.map +1 -0
- package/package.json +79 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,1332 @@
|
|
|
1
|
+
export { default as allLanguagesBadWords } from "./languages/english-primary-all-languages.js";
|
|
2
|
+
/**
|
|
3
|
+
* Logger interface for BeKind library logging operations.
|
|
4
|
+
*
|
|
5
|
+
* @interface Logger
|
|
6
|
+
* @description Provides a contract for logging implementations used by the BeKind library.
|
|
7
|
+
* Implement this interface to provide custom logging behavior (e.g., logging to files, external services).
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* class CustomLogger implements Logger {
|
|
12
|
+
* info(message: string): void {
|
|
13
|
+
* // Custom info logging logic
|
|
14
|
+
* }
|
|
15
|
+
* warn(message: string): void {
|
|
16
|
+
* // Custom warning logging logic
|
|
17
|
+
* }
|
|
18
|
+
* error(message: string): void {
|
|
19
|
+
* // Custom error logging logic
|
|
20
|
+
* }
|
|
21
|
+
* }
|
|
22
|
+
* const filter = new BeKind({ logger: new CustomLogger() });
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export interface Logger {
|
|
26
|
+
/**
|
|
27
|
+
* Log informational messages about normal operations.
|
|
28
|
+
*
|
|
29
|
+
* @param message - The informational message to log
|
|
30
|
+
* @returns void
|
|
31
|
+
*/
|
|
32
|
+
info(message: string): void;
|
|
33
|
+
/**
|
|
34
|
+
* Log warning messages about potential issues or deprecated usage.
|
|
35
|
+
*
|
|
36
|
+
* @param message - The warning message to log
|
|
37
|
+
* @returns void
|
|
38
|
+
*/
|
|
39
|
+
warn(message: string): void;
|
|
40
|
+
/**
|
|
41
|
+
* Log error messages about failures or critical issues.
|
|
42
|
+
*
|
|
43
|
+
* @param message - The error message to log
|
|
44
|
+
* @returns void
|
|
45
|
+
*/
|
|
46
|
+
error(message: string): void;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Configuration options for initializing an BeKind instance.
|
|
50
|
+
*
|
|
51
|
+
* @interface BeKindOptions
|
|
52
|
+
* @description Comprehensive configuration object for customizing profanity detection behavior,
|
|
53
|
+
* algorithm selection, performance optimizations, and logging.
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```typescript
|
|
57
|
+
* const filter = new BeKind({
|
|
58
|
+
* languages: ['english', 'french'],
|
|
59
|
+
* enableLeetSpeak: true,
|
|
60
|
+
* strictMode: true,
|
|
61
|
+
* algorithm: {
|
|
62
|
+
* matching: 'hybrid',
|
|
63
|
+
* useBloomFilter: true
|
|
64
|
+
* },
|
|
65
|
+
* performance: {
|
|
66
|
+
* enableCaching: true,
|
|
67
|
+
* cacheSize: 500
|
|
68
|
+
* }
|
|
69
|
+
* });
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
export interface BeKindOptions {
|
|
73
|
+
/**
|
|
74
|
+
* Array of language keys to load (e.g., 'english', 'hindi', 'french').
|
|
75
|
+
* Available languages: english, hindi, french, german, spanish, bengali, tamil, telugu, brazilian.
|
|
76
|
+
*
|
|
77
|
+
* @default ['english', 'hindi'] (loaded by default in constructor)
|
|
78
|
+
*/
|
|
79
|
+
languages?: string[];
|
|
80
|
+
/**
|
|
81
|
+
* Custom dictionaries to load in addition to built-in languages.
|
|
82
|
+
* Key is the dictionary name, value is an array of words.
|
|
83
|
+
*
|
|
84
|
+
* @example
|
|
85
|
+
* ```typescript
|
|
86
|
+
* customDictionaries: {
|
|
87
|
+
* 'gaming': ['noob', 'trash'],
|
|
88
|
+
* 'custom': ['word1', 'word2']
|
|
89
|
+
* }
|
|
90
|
+
* ```
|
|
91
|
+
*/
|
|
92
|
+
customDictionaries?: Record<string, string[]>;
|
|
93
|
+
/**
|
|
94
|
+
* Single character to use as replacement placeholder for profane characters.
|
|
95
|
+
*
|
|
96
|
+
* @default "*"
|
|
97
|
+
*/
|
|
98
|
+
defaultPlaceholder?: string;
|
|
99
|
+
/**
|
|
100
|
+
* Enable detection and normalization of leet speak variations (e.g., "h3ll0" -> "hello").
|
|
101
|
+
*
|
|
102
|
+
* @default true
|
|
103
|
+
*/
|
|
104
|
+
enableLeetSpeak?: boolean;
|
|
105
|
+
/**
|
|
106
|
+
* Enable case-sensitive matching. When false, all matching is done in lowercase.
|
|
107
|
+
*
|
|
108
|
+
* @default false
|
|
109
|
+
*/
|
|
110
|
+
caseSensitive?: boolean;
|
|
111
|
+
/**
|
|
112
|
+
* Array of words to whitelist (never flag as profanity even if in dictionaries).
|
|
113
|
+
*
|
|
114
|
+
* @example ['hello', 'class', 'assignment']
|
|
115
|
+
*/
|
|
116
|
+
whitelistWords?: string[];
|
|
117
|
+
/**
|
|
118
|
+
* Strict mode requires profanity to be surrounded by word boundaries (spaces, punctuation).
|
|
119
|
+
* When false, profanity embedded in other words may be detected.
|
|
120
|
+
*
|
|
121
|
+
* @default false
|
|
122
|
+
*/
|
|
123
|
+
strictMode?: boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Allow detection of profanity as partial matches within larger words.
|
|
126
|
+
* When true, "badword" will be detected in "mybadwordhere".
|
|
127
|
+
*
|
|
128
|
+
* @default false
|
|
129
|
+
*/
|
|
130
|
+
detectPartialWords?: boolean;
|
|
131
|
+
/**
|
|
132
|
+
* Enable embedded profanity detection with certainty decay.
|
|
133
|
+
* When true, profane substrings inside larger words are detected (e.g., "bitch" in "lbitch")
|
|
134
|
+
* with decayed certainty based on extra characters and length ratio.
|
|
135
|
+
* Only reports matches where decayed certainty >= 2.
|
|
136
|
+
*
|
|
137
|
+
* Formula: decayed_c = base_c * (0.9 ^ extra_chars) * (profane_len / total_word_len)
|
|
138
|
+
*
|
|
139
|
+
* @default false
|
|
140
|
+
*/
|
|
141
|
+
embeddedProfanityDetection?: boolean;
|
|
142
|
+
/**
|
|
143
|
+
* Allow the trie to skip over separator characters (spaces, @, ., -, _, *, etc.)
|
|
144
|
+
* during matching. Catches evasion patterns like "fu ck", "cun t", "fu@ck@cu@nt@bi@tch".
|
|
145
|
+
*
|
|
146
|
+
* When set to a number, specifies the max consecutive separators to skip per gap
|
|
147
|
+
* (e.g., 5 means "f uck" is caught but "f uck" with 6 spaces is not).
|
|
148
|
+
* When true, defaults to 5. When false, disabled.
|
|
149
|
+
*
|
|
150
|
+
* @default true (max 5 separators per gap)
|
|
151
|
+
*/
|
|
152
|
+
separatorTolerance?: boolean | number;
|
|
153
|
+
/**
|
|
154
|
+
* Custom logger implementation for handling log messages.
|
|
155
|
+
* If not provided, defaults to ConsoleLogger unless silent mode is enabled.
|
|
156
|
+
*/
|
|
157
|
+
logger?: Logger;
|
|
158
|
+
/**
|
|
159
|
+
* Silent mode suppresses all logging output.
|
|
160
|
+
* When true, uses SilentLogger to discard all log messages.
|
|
161
|
+
*
|
|
162
|
+
* @default false
|
|
163
|
+
*/
|
|
164
|
+
silent?: boolean;
|
|
165
|
+
/**
|
|
166
|
+
* Sensitive mode flags AMBIVALENT words as profanity too.
|
|
167
|
+
* When true, cross-language collisions that were dampened by innocence
|
|
168
|
+
* scoring (e.g. "bitte" = German "please" dampened from French "bite")
|
|
169
|
+
* still count as profanity in check()/detect().hasProfanity.
|
|
170
|
+
* When false (default), only PROFANE-scored words trigger hasProfanity —
|
|
171
|
+
* dampened words are ignored, reducing false positives.
|
|
172
|
+
*
|
|
173
|
+
* @default false
|
|
174
|
+
*/
|
|
175
|
+
sensitiveMode?: boolean;
|
|
176
|
+
/**
|
|
177
|
+
* Advanced algorithm configuration for pattern matching strategies.
|
|
178
|
+
*/
|
|
179
|
+
algorithm?: {
|
|
180
|
+
/**
|
|
181
|
+
* Primary matching algorithm to use.
|
|
182
|
+
* - 'trie': Fast prefix tree matching (default, best for most use cases)
|
|
183
|
+
* - 'aho-corasick': Multi-pattern matching (best for large dictionaries)
|
|
184
|
+
* - 'hybrid': Combines Aho-Corasick with Bloom Filter (best for extreme performance)
|
|
185
|
+
*
|
|
186
|
+
* @default "trie"
|
|
187
|
+
*/
|
|
188
|
+
matching?: "trie" | "aho-corasick" | "hybrid";
|
|
189
|
+
/**
|
|
190
|
+
* Enable Aho-Corasick automaton for multi-pattern matching.
|
|
191
|
+
* Automatically enabled when matching is set to 'aho-corasick' or 'hybrid'.
|
|
192
|
+
*
|
|
193
|
+
* @default false
|
|
194
|
+
*/
|
|
195
|
+
useAhoCorasick?: boolean;
|
|
196
|
+
/**
|
|
197
|
+
* Enable Bloom Filter for probabilistic quick rejection of non-profane text.
|
|
198
|
+
* Automatically enabled when matching is set to 'hybrid'.
|
|
199
|
+
*
|
|
200
|
+
* @default false
|
|
201
|
+
*/
|
|
202
|
+
useBloomFilter?: boolean;
|
|
203
|
+
/**
|
|
204
|
+
* Enable context analysis to reduce false positives based on surrounding words.
|
|
205
|
+
*
|
|
206
|
+
* @default false
|
|
207
|
+
*/
|
|
208
|
+
useContextAnalysis?: boolean;
|
|
209
|
+
};
|
|
210
|
+
/**
|
|
211
|
+
* Bloom Filter configuration for probabilistic matching optimization.
|
|
212
|
+
*/
|
|
213
|
+
bloomFilter?: {
|
|
214
|
+
/**
|
|
215
|
+
* Enable Bloom Filter.
|
|
216
|
+
*
|
|
217
|
+
* @default false
|
|
218
|
+
*/
|
|
219
|
+
enabled?: boolean;
|
|
220
|
+
/**
|
|
221
|
+
* Expected number of items to be stored in the Bloom Filter.
|
|
222
|
+
* Higher values increase memory usage but reduce false positive rate.
|
|
223
|
+
*
|
|
224
|
+
* @default 10000
|
|
225
|
+
*/
|
|
226
|
+
expectedItems?: number;
|
|
227
|
+
/**
|
|
228
|
+
* Target false positive rate (probability of incorrectly identifying non-profanity as profanity).
|
|
229
|
+
* Lower values increase memory usage but improve accuracy.
|
|
230
|
+
*
|
|
231
|
+
* @default 0.01 (1%)
|
|
232
|
+
*/
|
|
233
|
+
falsePositiveRate?: number;
|
|
234
|
+
};
|
|
235
|
+
/**
|
|
236
|
+
* Aho-Corasick automaton configuration for multi-pattern matching.
|
|
237
|
+
*/
|
|
238
|
+
ahoCorasick?: {
|
|
239
|
+
/**
|
|
240
|
+
* Enable Aho-Corasick automaton.
|
|
241
|
+
*
|
|
242
|
+
* @default false
|
|
243
|
+
*/
|
|
244
|
+
enabled?: boolean;
|
|
245
|
+
/**
|
|
246
|
+
* Pre-build the automaton during initialization.
|
|
247
|
+
* When false, automaton is built lazily on first use.
|
|
248
|
+
*
|
|
249
|
+
* @default false
|
|
250
|
+
*/
|
|
251
|
+
prebuild?: boolean;
|
|
252
|
+
};
|
|
253
|
+
/**
|
|
254
|
+
* Context analysis configuration for reducing false positives.
|
|
255
|
+
*/
|
|
256
|
+
contextAnalysis?: {
|
|
257
|
+
/**
|
|
258
|
+
* Enable context-aware profanity detection.
|
|
259
|
+
*
|
|
260
|
+
* @default false
|
|
261
|
+
*/
|
|
262
|
+
enabled?: boolean;
|
|
263
|
+
/**
|
|
264
|
+
* Number of words before and after the detected word to analyze for context.
|
|
265
|
+
*
|
|
266
|
+
* @default 5
|
|
267
|
+
*/
|
|
268
|
+
contextWindow?: number;
|
|
269
|
+
/**
|
|
270
|
+
* Languages to use for context analysis (e.g., ['en', 'es']).
|
|
271
|
+
*
|
|
272
|
+
* @default ['en']
|
|
273
|
+
*/
|
|
274
|
+
languages?: string[];
|
|
275
|
+
/**
|
|
276
|
+
* Minimum confidence score (0-1) required to flag as profanity.
|
|
277
|
+
* Higher values reduce false positives but may miss some profanity.
|
|
278
|
+
*
|
|
279
|
+
* @default 0.5
|
|
280
|
+
*/
|
|
281
|
+
scoreThreshold?: number;
|
|
282
|
+
};
|
|
283
|
+
/**
|
|
284
|
+
* Performance optimization configuration.
|
|
285
|
+
*/
|
|
286
|
+
performance?: {
|
|
287
|
+
/**
|
|
288
|
+
* Maximum number of results to cache in LRU cache.
|
|
289
|
+
*
|
|
290
|
+
* @default 1000
|
|
291
|
+
*/
|
|
292
|
+
cacheSize?: number;
|
|
293
|
+
/**
|
|
294
|
+
* Enable result caching to speed up repeated queries.
|
|
295
|
+
* Stores detection results for previously seen text.
|
|
296
|
+
*
|
|
297
|
+
* @default false
|
|
298
|
+
*/
|
|
299
|
+
enableCaching?: boolean;
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Severity levels for profanity detection results.
|
|
304
|
+
*
|
|
305
|
+
* @enum {number}
|
|
306
|
+
* @description Categorizes the severity of detected profanity based on the number
|
|
307
|
+
* of unique words and total matches found in the text.
|
|
308
|
+
*
|
|
309
|
+
* @readonly
|
|
310
|
+
* @example
|
|
311
|
+
* ```typescript
|
|
312
|
+
* const result = filter.detect("some text");
|
|
313
|
+
* if (result.severity === ProfanitySeverity.EXTREME) {
|
|
314
|
+
* // Handle extreme profanity
|
|
315
|
+
* }
|
|
316
|
+
* ```
|
|
317
|
+
*/
|
|
318
|
+
export declare enum ProfanitySeverity {
|
|
319
|
+
/** Mild profanity: 1 unique word or 1 total match */
|
|
320
|
+
MILD = 1,
|
|
321
|
+
/** Moderate profanity: 2 unique words or 2 total matches */
|
|
322
|
+
MODERATE = 2,
|
|
323
|
+
/** Severe profanity: 3 unique words or 3 total matches */
|
|
324
|
+
SEVERE = 3,
|
|
325
|
+
/** Extreme profanity: 4+ unique words or 5+ total matches */
|
|
326
|
+
EXTREME = 4
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Per-word severity classification for individual detected words.
|
|
330
|
+
*
|
|
331
|
+
* @enum {number}
|
|
332
|
+
*/
|
|
333
|
+
export declare enum WordSeverity {
|
|
334
|
+
/** Ambivalent: mild/contextual profanity that may be acceptable (damn, hell, crap, suck) */
|
|
335
|
+
AMBIVALENT = 1,
|
|
336
|
+
/** Profane: should be flagged — strong profanity, slurs, explicit content */
|
|
337
|
+
PROFANE = 2
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* A detected word with its individual severity classification.
|
|
341
|
+
*/
|
|
342
|
+
export interface ScoredWord {
|
|
343
|
+
/** The word as it appeared in the original text */
|
|
344
|
+
word: string;
|
|
345
|
+
/** Severity classification for this specific word */
|
|
346
|
+
severity: WordSeverity;
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Result object returned from profanity detection operations.
|
|
350
|
+
*
|
|
351
|
+
* @interface ProfanityDetectionResult
|
|
352
|
+
* @description Contains comprehensive information about detected profanity including
|
|
353
|
+
* what was found, where it was found, how severe it is, and a cleaned version of the text.
|
|
354
|
+
*
|
|
355
|
+
* @example
|
|
356
|
+
* ```typescript
|
|
357
|
+
* const result = filter.detect("This is a bad word");
|
|
358
|
+
* console.log(result.hasProfanity); // true
|
|
359
|
+
* console.log(result.detectedWords); // ['bad word']
|
|
360
|
+
* console.log(result.cleanedText); // 'This is a *** ****'
|
|
361
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
362
|
+
* console.log(result.positions); // [{ word: 'bad word', start: 10, end: 18 }]
|
|
363
|
+
* ```
|
|
364
|
+
*/
|
|
365
|
+
export interface ProfanityDetectionResult {
|
|
366
|
+
/**
|
|
367
|
+
* Whether any profanity was detected in the text.
|
|
368
|
+
*
|
|
369
|
+
* @type {boolean}
|
|
370
|
+
*/
|
|
371
|
+
hasProfanity: boolean;
|
|
372
|
+
/**
|
|
373
|
+
* Array of detected profane words/phrases as they appeared in the original text.
|
|
374
|
+
* Includes case and formatting from the original text.
|
|
375
|
+
*
|
|
376
|
+
* @type {string[]}
|
|
377
|
+
*/
|
|
378
|
+
detectedWords: string[];
|
|
379
|
+
/**
|
|
380
|
+
* The text with all profanity replaced by placeholder characters.
|
|
381
|
+
* Each profane character is replaced with the configured placeholder (default: '*').
|
|
382
|
+
*
|
|
383
|
+
* @type {string}
|
|
384
|
+
*/
|
|
385
|
+
cleanedText: string;
|
|
386
|
+
/**
|
|
387
|
+
* Severity level of detected profanity.
|
|
388
|
+
*
|
|
389
|
+
* @type {ProfanitySeverity}
|
|
390
|
+
*/
|
|
391
|
+
severity: ProfanitySeverity;
|
|
392
|
+
/**
|
|
393
|
+
* Precise positions of each detected profane word in the original text.
|
|
394
|
+
* Useful for highlighting or further processing.
|
|
395
|
+
*
|
|
396
|
+
* @type {Array<{ word: string; start: number; end: number }>}
|
|
397
|
+
*/
|
|
398
|
+
positions: Array<{
|
|
399
|
+
word: string;
|
|
400
|
+
start: number;
|
|
401
|
+
end: number;
|
|
402
|
+
}>;
|
|
403
|
+
/**
|
|
404
|
+
* Whether the text contains abhorrent language (hate speech, slurs, extremist terms)
|
|
405
|
+
* that should be flagged for manual review rather than auto-moderated.
|
|
406
|
+
*
|
|
407
|
+
* @type {boolean}
|
|
408
|
+
*/
|
|
409
|
+
needsManualReview: boolean;
|
|
410
|
+
/**
|
|
411
|
+
* The specific abhorrent words that triggered the manual review flag.
|
|
412
|
+
* Empty array if needsManualReview is false.
|
|
413
|
+
*
|
|
414
|
+
* @type {string[]}
|
|
415
|
+
*/
|
|
416
|
+
flaggedAbhorrentWords: string[];
|
|
417
|
+
/**
|
|
418
|
+
* Each detected word with its individual severity classification.
|
|
419
|
+
* Severity is assigned by the library: AMBIVALENT (1) or PROFANE (2).
|
|
420
|
+
* PROFANE words cross the flag threshold (s:5 any c, s:4+ c:2+, s:3 c:3+).
|
|
421
|
+
* AMBIVALENT words are below threshold — mild or contextually acceptable.
|
|
422
|
+
*
|
|
423
|
+
* @type {ScoredWord[]}
|
|
424
|
+
*/
|
|
425
|
+
scoredWords: ScoredWord[];
|
|
426
|
+
/** Highest severity among all detected words. Null if no profanity detected. */
|
|
427
|
+
maxSeverity: WordSeverity | null;
|
|
428
|
+
/**
|
|
429
|
+
* Phrases that matched profanity across space boundaries during separator-tolerant
|
|
430
|
+
* detection. These are NOT flagged as profanity but are captured for review.
|
|
431
|
+
* Each entry includes the matched word, the surrounding context (±5 words),
|
|
432
|
+
* the base score, and the number of space boundaries crossed.
|
|
433
|
+
*/
|
|
434
|
+
suspiciousPhrases: SuspiciousPhrase[];
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* A phrase that matched profanity across word boundaries (spaces).
|
|
438
|
+
* Not flagged as profanity — captured for manual review or secondary scoring.
|
|
439
|
+
*/
|
|
440
|
+
export interface SuspiciousPhrase {
|
|
441
|
+
/** The profanity dictionary word that was matched */
|
|
442
|
+
word: string;
|
|
443
|
+
/** The text as it appeared in the original input (with separators) */
|
|
444
|
+
originalText: string;
|
|
445
|
+
/** Surrounding context: ±5 words around the suspicious match */
|
|
446
|
+
context: string;
|
|
447
|
+
/** Start position of the match in the original text */
|
|
448
|
+
start: number;
|
|
449
|
+
/** End position of the match in the original text */
|
|
450
|
+
end: number;
|
|
451
|
+
/** Base severity/certainty score from the word list */
|
|
452
|
+
baseScore: {
|
|
453
|
+
severity: number;
|
|
454
|
+
certainty: number;
|
|
455
|
+
};
|
|
456
|
+
/** Number of space boundaries crossed to form this match */
|
|
457
|
+
spaceBoundaries: number;
|
|
458
|
+
}
|
|
459
|
+
/**
|
|
460
|
+
* BeKind - Professional-grade multilingual profanity detection and filtering library.
|
|
461
|
+
*
|
|
462
|
+
* @class BeKind
|
|
463
|
+
* @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
|
|
464
|
+
* with advanced features including leet speak detection, context analysis, multiple matching algorithms,
|
|
465
|
+
* and customizable filtering options.
|
|
466
|
+
*
|
|
467
|
+
* @remarks
|
|
468
|
+
* ### Features:
|
|
469
|
+
* - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
|
|
470
|
+
* - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
|
|
471
|
+
* - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
|
|
472
|
+
* - **Context Analysis**: Reduces false positives using surrounding word context
|
|
473
|
+
* - **Performance**: Built-in caching and optimized data structures
|
|
474
|
+
* - **Flexible**: Custom dictionaries, whitelisting, severity levels
|
|
475
|
+
*
|
|
476
|
+
* ### Default Behavior:
|
|
477
|
+
* - Loads English and Hindi dictionaries by default
|
|
478
|
+
* - Case-insensitive matching
|
|
479
|
+
* - Leet speak detection enabled
|
|
480
|
+
* - Uses Trie algorithm (fastest for most cases)
|
|
481
|
+
*
|
|
482
|
+
* @example
|
|
483
|
+
* ```typescript
|
|
484
|
+
* // Basic usage with default instance
|
|
485
|
+
* import allProfanity from 'allprofanity';
|
|
486
|
+
*
|
|
487
|
+
* const result = allProfanity.detect("This is some bad text");
|
|
488
|
+
* console.log(result.hasProfanity); // true
|
|
489
|
+
* console.log(result.cleanedText); // "This is some *** text"
|
|
490
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
491
|
+
* ```
|
|
492
|
+
*
|
|
493
|
+
* @example
|
|
494
|
+
* ```typescript
|
|
495
|
+
* // Advanced usage with custom configuration
|
|
496
|
+
* import { BeKind, ProfanitySeverity } from 'allprofanity';
|
|
497
|
+
*
|
|
498
|
+
* const filter = new BeKind({
|
|
499
|
+
* languages: ['english', 'french', 'spanish'],
|
|
500
|
+
* enableLeetSpeak: true,
|
|
501
|
+
* strictMode: true,
|
|
502
|
+
* algorithm: {
|
|
503
|
+
* matching: 'hybrid',
|
|
504
|
+
* useBloomFilter: true
|
|
505
|
+
* },
|
|
506
|
+
* performance: {
|
|
507
|
+
* enableCaching: true,
|
|
508
|
+
* cacheSize: 500
|
|
509
|
+
* },
|
|
510
|
+
* whitelistWords: ['class', 'assignment']
|
|
511
|
+
* });
|
|
512
|
+
*
|
|
513
|
+
* const text = "This text has some b@d w0rds";
|
|
514
|
+
* const result = filter.detect(text);
|
|
515
|
+
*
|
|
516
|
+
* if (result.hasProfanity) {
|
|
517
|
+
* console.log(`Found ${result.detectedWords.length} profane words`);
|
|
518
|
+
* console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
|
|
519
|
+
* console.log(`Cleaned: ${result.cleanedText}`);
|
|
520
|
+
* }
|
|
521
|
+
* ```
|
|
522
|
+
*
|
|
523
|
+
* @example
|
|
524
|
+
* ```typescript
|
|
525
|
+
* // Using individual methods
|
|
526
|
+
* const filter = new BeKind();
|
|
527
|
+
*
|
|
528
|
+
* // Simple check
|
|
529
|
+
* if (filter.check("some text")) {
|
|
530
|
+
* console.log("Contains profanity!");
|
|
531
|
+
* }
|
|
532
|
+
*
|
|
533
|
+
* // Clean with custom placeholder
|
|
534
|
+
* const cleaned = filter.clean("bad words here", "#");
|
|
535
|
+
*
|
|
536
|
+
* // Load additional languages
|
|
537
|
+
* filter.loadLanguage('german');
|
|
538
|
+
* filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
|
|
539
|
+
*
|
|
540
|
+
* // Add custom words
|
|
541
|
+
* filter.add(['customword1', 'customword2']);
|
|
542
|
+
*
|
|
543
|
+
* // Remove words
|
|
544
|
+
* filter.remove(['someword']);
|
|
545
|
+
*
|
|
546
|
+
* // Whitelist words
|
|
547
|
+
* filter.addToWhitelist(['class', 'assignment']);
|
|
548
|
+
* ```
|
|
549
|
+
*
|
|
550
|
+
* @see {@link BeKindOptions} for all configuration options
|
|
551
|
+
* @see {@link ProfanityDetectionResult} for detection result format
|
|
552
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
553
|
+
*/
|
|
554
|
+
export declare class BeKind {
|
|
555
|
+
private readonly profanityTrie;
|
|
556
|
+
private readonly whitelistSet;
|
|
557
|
+
private readonly loadedLanguages;
|
|
558
|
+
private readonly logger;
|
|
559
|
+
private defaultPlaceholder;
|
|
560
|
+
private enableLeetSpeak;
|
|
561
|
+
private caseSensitive;
|
|
562
|
+
private strictMode;
|
|
563
|
+
private detectPartialWords;
|
|
564
|
+
private embeddedProfanityDetection;
|
|
565
|
+
private separatorTolerance;
|
|
566
|
+
private sensitiveMode;
|
|
567
|
+
/**
|
|
568
|
+
* Temporary storage for suspicious matches found during separator-tolerant detection.
|
|
569
|
+
* Populated by findSeparatorTolerantMatches() and consumed by detect().
|
|
570
|
+
*/
|
|
571
|
+
private _suspiciousMatches;
|
|
572
|
+
private readonly availableLanguages;
|
|
573
|
+
/**
|
|
574
|
+
* Word score lookup map. Maps lowercase words to their severity and certainty scores.
|
|
575
|
+
* Populated from the scored word list on construction.
|
|
576
|
+
*/
|
|
577
|
+
private readonly wordScores;
|
|
578
|
+
/**
|
|
579
|
+
* Set of abhorrent words/phrases that trigger needsManualReview.
|
|
580
|
+
* Includes hate groups, slurs, extremist terminology, and Nazi references.
|
|
581
|
+
* Stored in lowercase for case-insensitive matching.
|
|
582
|
+
*/
|
|
583
|
+
private readonly abhorrentWords;
|
|
584
|
+
private readonly leetMappings;
|
|
585
|
+
private readonly dynamicWords;
|
|
586
|
+
private ahoCorasickAutomaton;
|
|
587
|
+
private bloomFilter;
|
|
588
|
+
private contextAnalyzer;
|
|
589
|
+
private matchingAlgorithm;
|
|
590
|
+
private resultCache;
|
|
591
|
+
/**
|
|
592
|
+
* Creates a new BeKind instance with the specified configuration.
|
|
593
|
+
*
|
|
594
|
+
* @constructor
|
|
595
|
+
* @param {BeKindOptions} [options] - Configuration options for profanity detection behavior
|
|
596
|
+
*
|
|
597
|
+
* @remarks
|
|
598
|
+
* ### Default Initialization:
|
|
599
|
+
* - Loads English and Hindi dictionaries automatically
|
|
600
|
+
* - Enables leet speak detection
|
|
601
|
+
* - Case-insensitive matching
|
|
602
|
+
* - Uses Trie algorithm for pattern matching
|
|
603
|
+
*
|
|
604
|
+
* ### Performance Considerations:
|
|
605
|
+
* - Initial load time depends on number of languages loaded
|
|
606
|
+
* - Aho-Corasick automaton (if enabled) is built during construction
|
|
607
|
+
* - Bloom Filter (if enabled) is populated during construction
|
|
608
|
+
*
|
|
609
|
+
* @throws {TypeError} If invalid options are provided
|
|
610
|
+
*
|
|
611
|
+
* @example
|
|
612
|
+
* ```typescript
|
|
613
|
+
* // Default instance
|
|
614
|
+
* const filter = new BeKind();
|
|
615
|
+
*
|
|
616
|
+
* // Custom configuration
|
|
617
|
+
* const filter = new BeKind({
|
|
618
|
+
* languages: ['english', 'french'],
|
|
619
|
+
* strictMode: true,
|
|
620
|
+
* defaultPlaceholder: '#',
|
|
621
|
+
* algorithm: { matching: 'hybrid' }
|
|
622
|
+
* });
|
|
623
|
+
*
|
|
624
|
+
* // Silent mode (no logging)
|
|
625
|
+
* const filter = new BeKind({ silent: true });
|
|
626
|
+
* ```
|
|
627
|
+
*
|
|
628
|
+
* @see {@link BeKindOptions} for all available configuration options
|
|
629
|
+
*/
|
|
630
|
+
constructor(options?: BeKindOptions);
|
|
631
|
+
/**
|
|
632
|
+
* Initialize advanced algorithms based on configuration
|
|
633
|
+
*/
|
|
634
|
+
private initializeAdvancedAlgorithms;
|
|
635
|
+
/**
|
|
636
|
+
* Leet mappings where the source is a regular letter (e.g. z→s, v→u, j→y).
|
|
637
|
+
* These are ambiguous because they can destroy legitimate words during
|
|
638
|
+
* normalization (e.g. "nazi" → "nasi"). Separated so that layered
|
|
639
|
+
* normalization can try symbol-only mappings first.
|
|
640
|
+
*/
|
|
641
|
+
private readonly letterToLetterLeetKeys;
|
|
642
|
+
/**
|
|
643
|
+
* Normalize leet speak to regular characters (full pass — all mappings).
|
|
644
|
+
* @param text - The input text.
|
|
645
|
+
* @returns Normalized text.
|
|
646
|
+
*/
|
|
647
|
+
private normalizeLeetSpeak;
|
|
648
|
+
/**
|
|
649
|
+
* Conservative leet normalization — only replaces non-letter characters
|
|
650
|
+
* (digits, symbols, punctuation) with their letter equivalents.
|
|
651
|
+
* Letter-to-letter mappings (z→s, v→u, j→y, ph→f) are skipped so that
|
|
652
|
+
* real letters are preserved, avoiding collisions like "nazi" → "nasi".
|
|
653
|
+
*/
|
|
654
|
+
private normalizeLeetSpeakSymbolsOnly;
|
|
655
|
+
/**
|
|
656
|
+
* Returns all unique leet-normalized variants of the text that differ
|
|
657
|
+
* from the base normalizedText. Runs two layers:
|
|
658
|
+
* 1. Symbol-only normalization (digits/special → letters, preserves real letters)
|
|
659
|
+
* 2. Full normalization (all mappings including letter→letter)
|
|
660
|
+
*
|
|
661
|
+
* This layered approach catches both "n4zi" (symbol-only → "nazi") and
|
|
662
|
+
* "a55" (full → "ass") without one breaking the other.
|
|
663
|
+
*/
|
|
664
|
+
private getLeetVariants;
|
|
665
|
+
/**
|
|
666
|
+
* Non-space separator characters (evasion symbols like @, ., -, etc.)
|
|
667
|
+
* These are skipped freely with no certainty penalty.
|
|
668
|
+
*/
|
|
669
|
+
private static readonly SYMBOL_SEPARATOR_SET;
|
|
670
|
+
/**
|
|
671
|
+
* Check if a character is a non-space separator (skipped freely).
|
|
672
|
+
*/
|
|
673
|
+
private static isSymbolSeparator;
|
|
674
|
+
/**
|
|
675
|
+
* Check if a character is whitespace (skipped with certainty penalty).
|
|
676
|
+
*/
|
|
677
|
+
private static isWhitespaceSeparator;
|
|
678
|
+
/**
|
|
679
|
+
* Check if a character is any kind of separator.
|
|
680
|
+
*/
|
|
681
|
+
private static isSeparator;
|
|
682
|
+
/**
|
|
683
|
+
* Certainty penalty per space boundary crossed during separator-tolerant matching.
|
|
684
|
+
* Each distinct whitespace gap reduces the matched word's certainty by this amount.
|
|
685
|
+
* e.g., "fu ck" → fuck (c:5) → c:5-2 = c:3 → still flags at s:3
|
|
686
|
+
* e.g., "No m" → nom (c:3) → c:3-2 = c:1 → drops below threshold
|
|
687
|
+
*/
|
|
688
|
+
private static readonly SPACE_CERTAINTY_PENALTY;
|
|
689
|
+
/**
|
|
690
|
+
* Extract surrounding context (±N words) around a match position in text.
|
|
691
|
+
*/
|
|
692
|
+
private extractSurroundingContext;
|
|
693
|
+
/**
|
|
694
|
+
* Escape regex special characters in a string.
|
|
695
|
+
* @param str - The string to escape.
|
|
696
|
+
* @returns The escaped string.
|
|
697
|
+
*/
|
|
698
|
+
private escapeRegex;
|
|
699
|
+
/**
|
|
700
|
+
* Check if a match is bounded by word boundaries (strict mode).
|
|
701
|
+
* @param text - The text.
|
|
702
|
+
* @param start - Start index.
|
|
703
|
+
* @param end - End index.
|
|
704
|
+
* @returns True if match is at word boundaries, false otherwise.
|
|
705
|
+
*/
|
|
706
|
+
private hasWordBoundaries;
|
|
707
|
+
/**
|
|
708
|
+
* Determine if a match is a whole word.
|
|
709
|
+
* @param text - The text.
|
|
710
|
+
* @param start - Start index.
|
|
711
|
+
* @param end - End index.
|
|
712
|
+
* @returns True if whole word, false otherwise.
|
|
713
|
+
*/
|
|
714
|
+
private static readonly CJK_RE;
|
|
715
|
+
private isWholeWord;
|
|
716
|
+
/**
|
|
717
|
+
* Returns the char-index bounds of the host word containing [start, end).
|
|
718
|
+
* Scans outward using the same Unicode-letter definition as isWholeWord.
|
|
719
|
+
*/
|
|
720
|
+
private getHostWordBounds;
|
|
721
|
+
/**
|
|
722
|
+
* When a match is embedded (not a whole word), check whether the profane
|
|
723
|
+
* substring covers a large enough fraction of its host word to be flagged
|
|
724
|
+
* anyway. This catches deliberate obfuscation like "urASSHOLEbro" where
|
|
725
|
+
* "asshole" (7 chars) = 58 % of the 12-char host word.
|
|
726
|
+
*
|
|
727
|
+
* Guards (all must pass):
|
|
728
|
+
* 1. Match length ≥ 6 chars — short words (ass/shit/anal/semen) are too common.
|
|
729
|
+
* 2. Graduated coverage threshold — shorter matches need higher coverage:
|
|
730
|
+
* - 6-char matches: ≥ 85% (only catches near-exact wraps like "ufucker")
|
|
731
|
+
* - 7+ char matches: ≥ 55% (catches obfuscation like "urASSHOLEbro")
|
|
732
|
+
* 3. Language signal — scoreWord() on the host word must show signal for
|
|
733
|
+
* the profane word's language. If the host word has no signal for that
|
|
734
|
+
* language it's a cross-language collision (e.g. "singe" = French slur
|
|
735
|
+
* inside "singer" which scores as English → skip).
|
|
736
|
+
*
|
|
737
|
+
* Examples:
|
|
738
|
+
* "asshole" (7, en) in "urASSHOLEbro" (en signal) = 58 % → flagged ✓
|
|
739
|
+
* "fucker" (6, en) in "ufucker" (en signal) = 86 % → flagged ✓
|
|
740
|
+
* "raging" (6, en) in "foraging" = 75 % → below 85% for 6-char → safe ✓
|
|
741
|
+
* "semen" (5) in "basement" → too short → safe ✓
|
|
742
|
+
* "anal" (4) in "canal" → too short → safe ✓
|
|
743
|
+
* "singe" (5, fr) in "singer" → too short → safe ✓
|
|
744
|
+
* "negro" (5, en) in "negroni" → too short → safe ✓
|
|
745
|
+
*/
|
|
746
|
+
private static readonly HIGH_COVERAGE_THRESHOLD_SHORT;
|
|
747
|
+
private static readonly HIGH_COVERAGE_THRESHOLD_LONG;
|
|
748
|
+
private static readonly HIGH_COVERAGE_MIN_MATCH_LEN;
|
|
749
|
+
private static readonly HIGH_COVERAGE_LANG_SIGNAL_MIN;
|
|
750
|
+
private isHighCoverageEmbed;
|
|
751
|
+
/**
|
|
752
|
+
* Check if a match is whitelisted.
|
|
753
|
+
* @param word - Word from dictionary.
|
|
754
|
+
* @param matchedText - Actual matched text.
|
|
755
|
+
* @returns True if whitelisted, false otherwise.
|
|
756
|
+
*/
|
|
757
|
+
private isWhitelistedMatch;
|
|
758
|
+
/**
|
|
759
|
+
* Remove overlapping matches, keeping only the longest at each start position.
|
|
760
|
+
* @param matches - Array of match results.
|
|
761
|
+
* @returns Deduplicated matches.
|
|
762
|
+
*/
|
|
763
|
+
private deduplicateMatches;
|
|
764
|
+
/**
|
|
765
|
+
* Use Aho-Corasick algorithm for pattern matching
|
|
766
|
+
*/
|
|
767
|
+
private findMatchesWithAhoCorasick;
|
|
768
|
+
/**
|
|
769
|
+
* Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
|
|
770
|
+
*/
|
|
771
|
+
private findMatchesHybrid;
|
|
772
|
+
/**
|
|
773
|
+
* Apply context analysis to filter false positives
|
|
774
|
+
*/
|
|
775
|
+
private applyContextAnalysis;
|
|
776
|
+
/**
|
|
777
|
+
* Detects profanity in the provided text and returns comprehensive analysis.
|
|
778
|
+
*
|
|
779
|
+
* @param {string} text - The text to analyze for profanity
|
|
780
|
+
* @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
|
|
781
|
+
*
|
|
782
|
+
* @throws {TypeError} If text is not a string
|
|
783
|
+
*
|
|
784
|
+
* @remarks
|
|
785
|
+
* ### Performance:
|
|
786
|
+
* - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
|
|
787
|
+
* - With Bloom Filter: O(n) average case (faster early rejection)
|
|
788
|
+
* - With Caching: O(1) for repeated identical text
|
|
789
|
+
*
|
|
790
|
+
* ### Features:
|
|
791
|
+
* - Detects leet speak variations (if enabled): "h3ll0" → "hello"
|
|
792
|
+
* - Respects word boundaries (strict mode) or detects partial matches
|
|
793
|
+
* - Returns exact positions for highlighting/masking
|
|
794
|
+
* - Calculates severity based on match count and uniqueness
|
|
795
|
+
*
|
|
796
|
+
* ### Caching:
|
|
797
|
+
* - Results are cached if `performance.enableCaching` is true
|
|
798
|
+
* - Cache uses LRU eviction when size limit is reached
|
|
799
|
+
*
|
|
800
|
+
* @example
|
|
801
|
+
* ```typescript
|
|
802
|
+
* const filter = new BeKind();
|
|
803
|
+
* const result = filter.detect("This has bad words");
|
|
804
|
+
*
|
|
805
|
+
* console.log(result.hasProfanity); // true
|
|
806
|
+
* console.log(result.detectedWords); // ['bad']
|
|
807
|
+
* console.log(result.cleanedText); // 'This has *** words'
|
|
808
|
+
* console.log(result.severity); // ProfanitySeverity.MILD
|
|
809
|
+
* console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
|
|
810
|
+
* ```
|
|
811
|
+
*
|
|
812
|
+
* @example
|
|
813
|
+
* ```typescript
|
|
814
|
+
* // With leet speak detection
|
|
815
|
+
* const filter = new BeKind({ enableLeetSpeak: true });
|
|
816
|
+
* const result = filter.detect("st0p b3ing b@d");
|
|
817
|
+
*
|
|
818
|
+
* if (result.hasProfanity) {
|
|
819
|
+
* result.positions.forEach(pos => {
|
|
820
|
+
* console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
|
|
821
|
+
* });
|
|
822
|
+
* }
|
|
823
|
+
* ```
|
|
824
|
+
*
|
|
825
|
+
* @see {@link ProfanityDetectionResult} for result structure
|
|
826
|
+
* @see {@link ProfanitySeverity} for severity levels
|
|
827
|
+
*/
|
|
828
|
+
detect(text: string): ProfanityDetectionResult;
|
|
829
|
+
/**
|
|
830
|
+
* Main matching function, with whole-word logic.
|
|
831
|
+
* @param searchText - The normalized text to search.
|
|
832
|
+
* @param originalText - The original text.
|
|
833
|
+
* @param matches - Array to collect matches.
|
|
834
|
+
*/
|
|
835
|
+
private findMatches;
|
|
836
|
+
/**
|
|
837
|
+
* Walk the trie while tolerating separator characters between letters.
|
|
838
|
+
* Catches evasion patterns: "fu ck", "c.u.n.t", "fu@ck@cu@nt@bi@tch"
|
|
839
|
+
*
|
|
840
|
+
* Symbol separators (@, ., -, etc.) are skipped freely.
|
|
841
|
+
* Space separators reduce certainty by SPACE_CERTAINTY_PENALTY per gap.
|
|
842
|
+
* Matches that drop below the flagging threshold become "suspicious" instead.
|
|
843
|
+
*/
|
|
844
|
+
private findSeparatorTolerantMatches;
|
|
845
|
+
/**
|
|
846
|
+
* Recursively walk the trie from a given node, skipping separator chars.
|
|
847
|
+
* Tracks space boundaries crossed (for certainty penalty) separately from
|
|
848
|
+
* symbol separators (which are free to skip).
|
|
849
|
+
*/
|
|
850
|
+
private walkTrieWithSeparators;
|
|
851
|
+
/**
|
|
852
|
+
* Decay constant for embedded profanity detection.
|
|
853
|
+
* Each extra character beyond the profane root reduces certainty by this factor.
|
|
854
|
+
*/
|
|
855
|
+
private static readonly EMBEDDED_DECAY_RATE;
|
|
856
|
+
/**
|
|
857
|
+
* Minimum decayed certainty to report an embedded match.
|
|
858
|
+
*/
|
|
859
|
+
private static readonly EMBEDDED_MIN_CERTAINTY;
|
|
860
|
+
/**
|
|
861
|
+
* Find profane substrings embedded inside larger words with certainty decay.
|
|
862
|
+
*
|
|
863
|
+
* Formula: decayed_c = base_c * (DECAY_RATE ^ extra_chars) * (profane_len / host_word_len)
|
|
864
|
+
*
|
|
865
|
+
* Multi-profanity bonus: if a host word contains multiple profane substrings,
|
|
866
|
+
* certainty is boosted (sum of base severities used as multiplier, capped at c:5).
|
|
867
|
+
*
|
|
868
|
+
* Unusually long words (12+ chars) containing profanity get a certainty bonus
|
|
869
|
+
* since legitimate words rarely exceed this length.
|
|
870
|
+
*/
|
|
871
|
+
private findEmbeddedMatches;
|
|
872
|
+
/**
|
|
873
|
+
* Deduplicate embedded finds: at overlapping positions, keep the longest match.
|
|
874
|
+
*/
|
|
875
|
+
private deduplicateEmbeddedFinds;
|
|
876
|
+
/**
|
|
877
|
+
* Generate cleaned text by replacing profane words.
|
|
878
|
+
* @param originalText - The original text.
|
|
879
|
+
* @param matches - Array of matches.
|
|
880
|
+
* @returns Cleaned text.
|
|
881
|
+
*/
|
|
882
|
+
private generateCleanedText;
|
|
883
|
+
/**
|
|
884
|
+
* Quick boolean check for profanity presence in text.
|
|
885
|
+
*
|
|
886
|
+
* @param {string} text - The text to check for profanity
|
|
887
|
+
* @returns {boolean} True if profanity is detected, false otherwise
|
|
888
|
+
*
|
|
889
|
+
* @throws {TypeError} If text is not a string
|
|
890
|
+
*
|
|
891
|
+
* @remarks
|
|
892
|
+
* - Convenience method that internally calls `detect()` and returns only the boolean result
|
|
893
|
+
* - For detailed information about matches, use `detect()` instead
|
|
894
|
+
* - Results are cached if caching is enabled (same cache as `detect()`)
|
|
895
|
+
*
|
|
896
|
+
* @example
|
|
897
|
+
* ```typescript
|
|
898
|
+
* const filter = new BeKind();
|
|
899
|
+
*
|
|
900
|
+
* if (filter.check("This has bad words")) {
|
|
901
|
+
* console.log("Profanity detected!");
|
|
902
|
+
* }
|
|
903
|
+
*
|
|
904
|
+
* // Quick validation
|
|
905
|
+
* const isClean = !filter.check(userInput);
|
|
906
|
+
* ```
|
|
907
|
+
*
|
|
908
|
+
* @see {@link detect} for detailed profanity analysis
|
|
909
|
+
*/
|
|
910
|
+
check(text: string): boolean;
|
|
911
|
+
/**
|
|
912
|
+
* Cleans text by replacing profanity with a placeholder character.
|
|
913
|
+
*
|
|
914
|
+
* @param {string} text - The text to clean
|
|
915
|
+
* @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
|
|
916
|
+
* @returns {string} The cleaned text with profanity replaced
|
|
917
|
+
*
|
|
918
|
+
* @throws {TypeError} If text is not a string
|
|
919
|
+
*
|
|
920
|
+
* @remarks
|
|
921
|
+
* ### Character-level Replacement:
|
|
922
|
+
* - Each profane character is replaced individually
|
|
923
|
+
* - "bad" with placeholder "*" becomes "***"
|
|
924
|
+
* - Preserves text length and structure
|
|
925
|
+
*
|
|
926
|
+
* ### Placeholder Behavior:
|
|
927
|
+
* - If no placeholder provided, uses the instance's default placeholder
|
|
928
|
+
* - If placeholder provided, uses only the first character
|
|
929
|
+
* - Empty placeholder throws error
|
|
930
|
+
*
|
|
931
|
+
* @example
|
|
932
|
+
* ```typescript
|
|
933
|
+
* const filter = new BeKind();
|
|
934
|
+
*
|
|
935
|
+
* // Using default placeholder (*)
|
|
936
|
+
* const cleaned = filter.clean("This has bad words");
|
|
937
|
+
* console.log(cleaned); // "This has *** *****"
|
|
938
|
+
*
|
|
939
|
+
* // Using custom placeholder
|
|
940
|
+
* const cleaned = filter.clean("This has bad words", "#");
|
|
941
|
+
* console.log(cleaned); // "This has ### #####"
|
|
942
|
+
* ```
|
|
943
|
+
*
|
|
944
|
+
* @example
|
|
945
|
+
* ```typescript
|
|
946
|
+
* // Clean user-generated content for display
|
|
947
|
+
* const userComment = "Some inappropriate words here";
|
|
948
|
+
* const safeComment = filter.clean(userComment);
|
|
949
|
+
* displayComment(safeComment);
|
|
950
|
+
* ```
|
|
951
|
+
*
|
|
952
|
+
* @see {@link cleanWithPlaceholder} for word-level replacement
|
|
953
|
+
* @see {@link setPlaceholder} to change default placeholder
|
|
954
|
+
*/
|
|
955
|
+
clean(text: string, placeholder?: string): string;
|
|
956
|
+
/**
|
|
957
|
+
* Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
|
|
958
|
+
*
|
|
959
|
+
* @param {string} text - The text to clean
|
|
960
|
+
* @param {string} [placeholder="***"] - The placeholder string to use for each profane word
|
|
961
|
+
* @returns {string} The cleaned text with each profane word replaced by the placeholder
|
|
962
|
+
*
|
|
963
|
+
* @throws {TypeError} If text is not a string
|
|
964
|
+
*
|
|
965
|
+
* @remarks
|
|
966
|
+
* ### Word-level Replacement:
|
|
967
|
+
* - Each profane word is replaced with the entire placeholder string (not character-by-character)
|
|
968
|
+
* - "bad words" with placeholder "***" becomes "*** ***"
|
|
969
|
+
* - Does NOT preserve original text length
|
|
970
|
+
*
|
|
971
|
+
* ### Difference from `clean()`:
|
|
972
|
+
* - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
|
|
973
|
+
* - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
|
|
974
|
+
*
|
|
975
|
+
* @example
|
|
976
|
+
* ```typescript
|
|
977
|
+
* const filter = new BeKind();
|
|
978
|
+
*
|
|
979
|
+
* // Default placeholder (***) const text = "This has bad words";
|
|
980
|
+
* const cleaned = filter.cleanWithPlaceholder(text);
|
|
981
|
+
* console.log(cleaned); // "This has *** ***"
|
|
982
|
+
*
|
|
983
|
+
* // Custom placeholder
|
|
984
|
+
* const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
|
|
985
|
+
* console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
|
|
986
|
+
* ```
|
|
987
|
+
*
|
|
988
|
+
* @example
|
|
989
|
+
* ```typescript
|
|
990
|
+
* // Censoring chat messages
|
|
991
|
+
* const message = "You are a badword and stupid";
|
|
992
|
+
* const censored = filter.cleanWithPlaceholder(message, "[***]");
|
|
993
|
+
* // Result: "You are a [***] and [***]"
|
|
994
|
+
* ```
|
|
995
|
+
*
|
|
996
|
+
* @see {@link clean} for character-level replacement
|
|
997
|
+
*/
|
|
998
|
+
cleanWithPlaceholder(text: string, placeholder?: string): string;
|
|
999
|
+
/**
|
|
1000
|
+
* Dynamically adds one or more words to the profanity filter at runtime.
|
|
1001
|
+
*
|
|
1002
|
+
* @param {string | string[]} word - A single word or array of words to add to the filter
|
|
1003
|
+
* @returns {void}
|
|
1004
|
+
*
|
|
1005
|
+
* @remarks
|
|
1006
|
+
* ### Behavior:
|
|
1007
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
1008
|
+
* - Automatically normalizes words based on caseSensitive setting
|
|
1009
|
+
* - Skips whitelisted words
|
|
1010
|
+
* - Validates and filters out non-string or empty values
|
|
1011
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
1012
|
+
*
|
|
1013
|
+
* ### Use Cases:
|
|
1014
|
+
* - Adding context-specific profanity
|
|
1015
|
+
* - Building dynamic word lists from user reports
|
|
1016
|
+
* - Customizing filters for specific communities/applications
|
|
1017
|
+
*
|
|
1018
|
+
* @example
|
|
1019
|
+
* ```typescript
|
|
1020
|
+
* const filter = new BeKind();
|
|
1021
|
+
*
|
|
1022
|
+
* // Add single word
|
|
1023
|
+
* filter.add('newbadword');
|
|
1024
|
+
*
|
|
1025
|
+
* // Add multiple words
|
|
1026
|
+
* filter.add(['word1', 'word2', 'word3']);
|
|
1027
|
+
*
|
|
1028
|
+
* // Now these words will be detected
|
|
1029
|
+
* filter.check('newbadword'); // true
|
|
1030
|
+
* ```
|
|
1031
|
+
*
|
|
1032
|
+
* @example
|
|
1033
|
+
* ```typescript
|
|
1034
|
+
* // Add game-specific slang dynamically
|
|
1035
|
+
* const filter = new BeKind();
|
|
1036
|
+
* const gamingSlang = ['noob', 'trash', 'tryhard'];
|
|
1037
|
+
* filter.add(gamingSlang);
|
|
1038
|
+
*
|
|
1039
|
+
* const message = "You're such a noob";
|
|
1040
|
+
* console.log(filter.check(message)); // true
|
|
1041
|
+
* ```
|
|
1042
|
+
*
|
|
1043
|
+
* @see {@link remove} to remove words
|
|
1044
|
+
* @see {@link loadCustomDictionary} for loading named dictionaries
|
|
1045
|
+
*/
|
|
1046
|
+
add(word: string | string[]): void;
|
|
1047
|
+
/**
|
|
1048
|
+
* Dynamically removes one or more words from the profanity filter at runtime.
|
|
1049
|
+
*
|
|
1050
|
+
* @param {string | string[]} word - A single word or array of words to remove from the filter
|
|
1051
|
+
* @returns {void}
|
|
1052
|
+
*
|
|
1053
|
+
* @remarks
|
|
1054
|
+
* ### Behavior:
|
|
1055
|
+
* - Removes words from all active data structures (Trie, dynamic words set)
|
|
1056
|
+
* - Normalizes words based on caseSensitive setting before removal
|
|
1057
|
+
* - Only removes dynamically added words, not words from loaded language dictionaries
|
|
1058
|
+
* - Changes take effect immediately for subsequent detect/check/clean calls
|
|
1059
|
+
*
|
|
1060
|
+
* ### Important Notes:
|
|
1061
|
+
* - Cannot remove words from built-in language dictionaries
|
|
1062
|
+
* - To exclude dictionary words, use `addToWhitelist()` instead
|
|
1063
|
+
* - Validates and filters out non-string or empty values
|
|
1064
|
+
*
|
|
1065
|
+
* @example
|
|
1066
|
+
* ```typescript
|
|
1067
|
+
* const filter = new BeKind();
|
|
1068
|
+
*
|
|
1069
|
+
* // Add then remove a word
|
|
1070
|
+
* filter.add('tempword');
|
|
1071
|
+
* filter.check('tempword'); // true
|
|
1072
|
+
*
|
|
1073
|
+
* filter.remove('tempword');
|
|
1074
|
+
* filter.check('tempword'); // false
|
|
1075
|
+
*
|
|
1076
|
+
* // Remove multiple words
|
|
1077
|
+
* filter.remove(['word1', 'word2']);
|
|
1078
|
+
* ```
|
|
1079
|
+
*
|
|
1080
|
+
* @example
|
|
1081
|
+
* ```typescript
|
|
1082
|
+
* // Managing custom word list
|
|
1083
|
+
* const filter = new BeKind();
|
|
1084
|
+
* filter.add(['custom1', 'custom2', 'custom3']);
|
|
1085
|
+
*
|
|
1086
|
+
* // Later, remove one that's no longer needed
|
|
1087
|
+
* filter.remove('custom2');
|
|
1088
|
+
* ```
|
|
1089
|
+
*
|
|
1090
|
+
* @see {@link add} to add words
|
|
1091
|
+
* @see {@link addToWhitelist} to exclude dictionary words without removing them
|
|
1092
|
+
*/
|
|
1093
|
+
remove(word: string | string[]): void;
|
|
1094
|
+
/**
|
|
1095
|
+
* Add words to the whitelist.
|
|
1096
|
+
* @param words - Words to whitelist.
|
|
1097
|
+
*/
|
|
1098
|
+
addToWhitelist(words: string[]): void;
|
|
1099
|
+
/**
|
|
1100
|
+
* Remove words from the whitelist.
|
|
1101
|
+
* @param words - Words to remove from whitelist.
|
|
1102
|
+
*/
|
|
1103
|
+
removeFromWhitelist(words: string[]): void;
|
|
1104
|
+
/**
|
|
1105
|
+
* Check if a word is whitelisted.
|
|
1106
|
+
* @param word - The word to check.
|
|
1107
|
+
* @returns True if whitelisted, false otherwise.
|
|
1108
|
+
*/
|
|
1109
|
+
private isWhitelisted;
|
|
1110
|
+
/**
|
|
1111
|
+
* Loads a built-in language dictionary into the profanity filter.
|
|
1112
|
+
*
|
|
1113
|
+
* @param {string} language - The language key to load (case-insensitive)
|
|
1114
|
+
* @returns {boolean} True if language was loaded successfully, false if not found or already loaded
|
|
1115
|
+
*
|
|
1116
|
+
* @remarks
|
|
1117
|
+
* ### Available Languages:
|
|
1118
|
+
* - `'english'` - English profanity words
|
|
1119
|
+
* - `'hindi'` - Hindi profanity words
|
|
1120
|
+
* - `'french'` - French profanity words
|
|
1121
|
+
* - `'german'` - German profanity words
|
|
1122
|
+
* - `'spanish'` - Spanish profanity words
|
|
1123
|
+
* - `'bengali'` - Bengali profanity words
|
|
1124
|
+
* - `'tamil'` - Tamil profanity words
|
|
1125
|
+
* - `'telugu'` - Telugu profanity words
|
|
1126
|
+
* - `'brazilian'` - Brazilian Portuguese profanity words
|
|
1127
|
+
*
|
|
1128
|
+
* ### Behavior:
|
|
1129
|
+
* - Language keys are case-insensitive
|
|
1130
|
+
* - Loading is idempotent - calling multiple times for same language is safe
|
|
1131
|
+
* - Returns true if language loaded successfully or was already loaded
|
|
1132
|
+
* - Returns false if language not found
|
|
1133
|
+
* - Logs success/failure messages (unless silent mode enabled)
|
|
1134
|
+
* - Words are added to all active data structures
|
|
1135
|
+
*
|
|
1136
|
+
* ### Default Languages:
|
|
1137
|
+
* English and Hindi are loaded automatically in the constructor
|
|
1138
|
+
*
|
|
1139
|
+
* @example
|
|
1140
|
+
* ```typescript
|
|
1141
|
+
* const filter = new BeKind();
|
|
1142
|
+
*
|
|
1143
|
+
* // Load additional languages
|
|
1144
|
+
* filter.loadLanguage('french');
|
|
1145
|
+
* filter.loadLanguage('spanish');
|
|
1146
|
+
*
|
|
1147
|
+
* // Case-insensitive
|
|
1148
|
+
* filter.loadLanguage('GERMAN'); // Works
|
|
1149
|
+
*
|
|
1150
|
+
* // Check if loaded
|
|
1151
|
+
* console.log(filter.getLoadedLanguages()); // ['english', 'hindi', 'french', 'spanish', 'german']
|
|
1152
|
+
* ```
|
|
1153
|
+
*
|
|
1154
|
+
* @example
|
|
1155
|
+
* ```typescript
|
|
1156
|
+
* // Load all Indian languages at once
|
|
1157
|
+
* const filter = new BeKind();
|
|
1158
|
+
* filter.loadIndianLanguages();
|
|
1159
|
+
* ```
|
|
1160
|
+
*
|
|
1161
|
+
* @see {@link loadLanguages} to load multiple languages at once
|
|
1162
|
+
* @see {@link loadIndianLanguages} for convenience method
|
|
1163
|
+
* @see {@link getAvailableLanguages} to see all available languages
|
|
1164
|
+
* @see {@link getLoadedLanguages} to see currently loaded languages
|
|
1165
|
+
*/
|
|
1166
|
+
loadLanguage(language: string): boolean;
|
|
1167
|
+
/**
|
|
1168
|
+
* Load multiple language dictionaries.
|
|
1169
|
+
* @param languages - Array of languages to load.
|
|
1170
|
+
* @returns Number of successfully loaded languages.
|
|
1171
|
+
*/
|
|
1172
|
+
loadLanguages(languages: string[]): number;
|
|
1173
|
+
/**
|
|
1174
|
+
* Load all supported Indian languages.
|
|
1175
|
+
* @returns Number of loaded Indian languages.
|
|
1176
|
+
*/
|
|
1177
|
+
loadIndianLanguages(): number;
|
|
1178
|
+
/**
|
|
1179
|
+
* Loads a custom dictionary of profane words with a specific name.
|
|
1180
|
+
*
|
|
1181
|
+
* @param {string} name - Unique name/identifier for this custom dictionary
|
|
1182
|
+
* @param {string[]} words - Array of profane words to add to the dictionary
|
|
1183
|
+
* @returns {void}
|
|
1184
|
+
*
|
|
1185
|
+
* @throws {TypeError} If name is not a string or words is not an array
|
|
1186
|
+
*
|
|
1187
|
+
* @remarks
|
|
1188
|
+
* ### Behavior:
|
|
1189
|
+
* - Creates a new named dictionary or overwrites existing one with same name
|
|
1190
|
+
* - Validates and filters out non-string and empty values from words array
|
|
1191
|
+
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
|
|
1192
|
+
* - Dictionary name is converted to lowercase for storage
|
|
1193
|
+
* - Logs count of loaded words (unless silent mode enabled)
|
|
1194
|
+
*
|
|
1195
|
+
* ### Use Cases:
|
|
1196
|
+
* - Domain-specific profanity (gaming, medical, legal, etc.)
|
|
1197
|
+
* - Organization-specific word lists
|
|
1198
|
+
* - Temporary or context-dependent filters
|
|
1199
|
+
* - Testing and development
|
|
1200
|
+
*
|
|
1201
|
+
* @example
|
|
1202
|
+
* ```typescript
|
|
1203
|
+
* const filter = new BeKind();
|
|
1204
|
+
*
|
|
1205
|
+
* // Load gaming-specific slang
|
|
1206
|
+
* filter.loadCustomDictionary('gaming', [
|
|
1207
|
+
* 'noob',
|
|
1208
|
+
* 'scrub',
|
|
1209
|
+
* 'tryhard',
|
|
1210
|
+
* 'trash'
|
|
1211
|
+
* ]);
|
|
1212
|
+
*
|
|
1213
|
+
* // Load company-specific terms
|
|
1214
|
+
* filter.loadCustomDictionary('company', [
|
|
1215
|
+
* 'competitor1',
|
|
1216
|
+
* 'bannedTerm1',
|
|
1217
|
+
* 'inappropriateJargon'
|
|
1218
|
+
* ]);
|
|
1219
|
+
*
|
|
1220
|
+
* console.log(filter.check('You are such a noob')); // true
|
|
1221
|
+
* ```
|
|
1222
|
+
*
|
|
1223
|
+
* @example
|
|
1224
|
+
* ```typescript
|
|
1225
|
+
* // Load from external source
|
|
1226
|
+
* const filter = new BeKind();
|
|
1227
|
+
*
|
|
1228
|
+
* async function loadExternalDictionary() {
|
|
1229
|
+
* const response = await fetch('https://example.com/custom-words.json');
|
|
1230
|
+
* const customWords = await response.json();
|
|
1231
|
+
* filter.loadCustomDictionary('external', customWords);
|
|
1232
|
+
* }
|
|
1233
|
+
* ```
|
|
1234
|
+
*
|
|
1235
|
+
* @see {@link add} for adding individual words dynamically
|
|
1236
|
+
* @see {@link loadLanguage} for loading built-in language dictionaries
|
|
1237
|
+
*/
|
|
1238
|
+
loadCustomDictionary(name: string, words: string[]): void;
|
|
1239
|
+
/**
|
|
1240
|
+
* Add a single word to the trie.
|
|
1241
|
+
* @param word - The word to add.
|
|
1242
|
+
* @returns True if added, false otherwise.
|
|
1243
|
+
*/
|
|
1244
|
+
private addWordToTrie;
|
|
1245
|
+
/**
|
|
1246
|
+
* Calculate severity from matches.
|
|
1247
|
+
* @param matches - Array of matches.
|
|
1248
|
+
* @returns Severity level.
|
|
1249
|
+
*/
|
|
1250
|
+
private calculateSeverity;
|
|
1251
|
+
/**
|
|
1252
|
+
* Get the severity (s) and certainty (c) scores for a word.
|
|
1253
|
+
* Returns null if the word has no score entry.
|
|
1254
|
+
*
|
|
1255
|
+
* @param word - The word to look up
|
|
1256
|
+
* @returns The score object or null
|
|
1257
|
+
*/
|
|
1258
|
+
getWordScore(word: string): {
|
|
1259
|
+
severity: number;
|
|
1260
|
+
certainty: number;
|
|
1261
|
+
language: string;
|
|
1262
|
+
} | null;
|
|
1263
|
+
/**
|
|
1264
|
+
* Check whether a word should be flagged based on its severity/certainty scores.
|
|
1265
|
+
*
|
|
1266
|
+
* Threshold rules:
|
|
1267
|
+
* - Flag if s:5 (any certainty)
|
|
1268
|
+
* - Flag if s:4+ AND c:2+
|
|
1269
|
+
* - Flag if s:3 AND c:3+
|
|
1270
|
+
* - Allow everything else
|
|
1271
|
+
*
|
|
1272
|
+
* @param word - The word to check
|
|
1273
|
+
* @returns true if the word should be flagged
|
|
1274
|
+
*/
|
|
1275
|
+
/**
|
|
1276
|
+
* Shared threshold logic: determines whether a severity/certainty pair
|
|
1277
|
+
* crosses the flag threshold. Used by shouldFlag, shouldFlagWithContext,
|
|
1278
|
+
* and inline threshold checks.
|
|
1279
|
+
*/
|
|
1280
|
+
static shouldFlagWithCertainty(severity: number, certainty: number): boolean;
|
|
1281
|
+
shouldFlag(word: string): boolean;
|
|
1282
|
+
/**
|
|
1283
|
+
* Context-aware shouldFlag: for words with certainty ≤ 3, applies
|
|
1284
|
+
* certainty-delta adjustments from surrounding context before evaluating
|
|
1285
|
+
* the shouldFlag threshold. Words with certainty > 3 skip context analysis.
|
|
1286
|
+
*/
|
|
1287
|
+
private shouldFlagWithContext;
|
|
1288
|
+
/**
|
|
1289
|
+
* Clear all loaded dictionaries and dynamic words.
|
|
1290
|
+
*/
|
|
1291
|
+
clearList(): void;
|
|
1292
|
+
/**
|
|
1293
|
+
* Set the placeholder character for filtered words.
|
|
1294
|
+
* @param placeholder - The placeholder character.
|
|
1295
|
+
*/
|
|
1296
|
+
setPlaceholder(placeholder: string): void;
|
|
1297
|
+
/**
|
|
1298
|
+
* Get the list of loaded languages.
|
|
1299
|
+
* @returns Array of loaded language keys.
|
|
1300
|
+
*/
|
|
1301
|
+
getLoadedLanguages(): string[];
|
|
1302
|
+
/**
|
|
1303
|
+
* Get the list of available built-in languages.
|
|
1304
|
+
* @returns Array of available language keys.
|
|
1305
|
+
*/
|
|
1306
|
+
getAvailableLanguages(): string[];
|
|
1307
|
+
/**
|
|
1308
|
+
* Get the current configuration of the profanity filter.
|
|
1309
|
+
* @returns Partial configuration object.
|
|
1310
|
+
*/
|
|
1311
|
+
getConfig(): Partial<BeKindOptions>;
|
|
1312
|
+
/**
|
|
1313
|
+
* Rebuild the profanity trie from loaded dictionaries and dynamic words.
|
|
1314
|
+
*/
|
|
1315
|
+
private rebuildTrie;
|
|
1316
|
+
/**
|
|
1317
|
+
* Update configuration options for the profanity filter.
|
|
1318
|
+
* @param options - Partial configuration object.
|
|
1319
|
+
*/
|
|
1320
|
+
updateConfig(options: Partial<BeKindOptions>): void;
|
|
1321
|
+
/**
|
|
1322
|
+
* Create an BeKind instance from a configuration object.
|
|
1323
|
+
* @param config - Configuration object
|
|
1324
|
+
* @returns A new BeKind instance
|
|
1325
|
+
*/
|
|
1326
|
+
static fromConfig(config: BeKindOptions | any): BeKind;
|
|
1327
|
+
}
|
|
1328
|
+
/**
|
|
1329
|
+
* Singleton instance of BeKind with default configuration.
|
|
1330
|
+
*/
|
|
1331
|
+
declare const allProfanity: BeKind;
|
|
1332
|
+
export default allProfanity;
|