allprofanity 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTORS.md +106 -0
- package/README.md +361 -26
- package/allprofanity.config.example.json +35 -0
- package/bin/init.js +49 -0
- package/config.schema.json +163 -0
- package/dist/algos/aho-corasick.d.ts +75 -0
- package/dist/algos/aho-corasick.js +238 -0
- package/dist/algos/aho-corasick.js.map +1 -0
- package/dist/algos/bloom-filter.d.ts +103 -0
- package/dist/algos/bloom-filter.js +208 -0
- package/dist/algos/bloom-filter.js.map +1 -0
- package/dist/algos/context-patterns.d.ts +88 -0
- package/dist/algos/context-patterns.js +298 -0
- package/dist/algos/context-patterns.js.map +1 -0
- package/dist/index.d.ts +161 -35
- package/dist/index.js +353 -82
- package/dist/index.js.map +1 -1
- package/dist/languages/brazilian-words.d.ts +7 -0
- package/dist/languages/brazilian-words.js +207 -0
- package/dist/languages/brazilian-words.js.map +1 -0
- package/package.json +23 -7
package/dist/index.d.ts
CHANGED
|
@@ -6,16 +6,29 @@ export { default as spanishBadWords } from "./languages/spanish-words.js";
|
|
|
6
6
|
export { default as bengaliBadWords } from "./languages/bengali-words.js";
|
|
7
7
|
export { default as tamilBadWords } from "./languages/tamil-words.js";
|
|
8
8
|
export { default as teluguBadWords } from "./languages/telugu-words.js";
|
|
9
|
+
export { default as brazilianBadWords } from "./languages/brazilian-words.js";
|
|
9
10
|
/**
|
|
10
|
-
*
|
|
11
|
+
* Logger interface for the library.
|
|
11
12
|
*/
|
|
12
13
|
export interface Logger {
|
|
14
|
+
/**
|
|
15
|
+
* Log informational messages.
|
|
16
|
+
* @param message - The message to log.
|
|
17
|
+
*/
|
|
13
18
|
info(message: string): void;
|
|
19
|
+
/**
|
|
20
|
+
* Log warning messages.
|
|
21
|
+
* @param message - The message to log.
|
|
22
|
+
*/
|
|
14
23
|
warn(message: string): void;
|
|
24
|
+
/**
|
|
25
|
+
* Log error messages.
|
|
26
|
+
* @param message - The message to log.
|
|
27
|
+
*/
|
|
15
28
|
error(message: string): void;
|
|
16
29
|
}
|
|
17
30
|
/**
|
|
18
|
-
* Configuration options for AllProfanity
|
|
31
|
+
* Configuration options for AllProfanity.
|
|
19
32
|
*/
|
|
20
33
|
export interface AllProfanityOptions {
|
|
21
34
|
languages?: string[];
|
|
@@ -27,9 +40,34 @@ export interface AllProfanityOptions {
|
|
|
27
40
|
strictMode?: boolean;
|
|
28
41
|
detectPartialWords?: boolean;
|
|
29
42
|
logger?: Logger;
|
|
43
|
+
algorithm?: {
|
|
44
|
+
matching?: "trie" | "aho-corasick" | "hybrid";
|
|
45
|
+
useAhoCorasick?: boolean;
|
|
46
|
+
useBloomFilter?: boolean;
|
|
47
|
+
useContextAnalysis?: boolean;
|
|
48
|
+
};
|
|
49
|
+
bloomFilter?: {
|
|
50
|
+
enabled?: boolean;
|
|
51
|
+
expectedItems?: number;
|
|
52
|
+
falsePositiveRate?: number;
|
|
53
|
+
};
|
|
54
|
+
ahoCorasick?: {
|
|
55
|
+
enabled?: boolean;
|
|
56
|
+
prebuild?: boolean;
|
|
57
|
+
};
|
|
58
|
+
contextAnalysis?: {
|
|
59
|
+
enabled?: boolean;
|
|
60
|
+
contextWindow?: number;
|
|
61
|
+
languages?: string[];
|
|
62
|
+
scoreThreshold?: number;
|
|
63
|
+
};
|
|
64
|
+
performance?: {
|
|
65
|
+
cacheSize?: number;
|
|
66
|
+
enableCaching?: boolean;
|
|
67
|
+
};
|
|
30
68
|
}
|
|
31
69
|
/**
|
|
32
|
-
* Severity levels for profanity detection
|
|
70
|
+
* Severity levels for profanity detection.
|
|
33
71
|
*/
|
|
34
72
|
export declare enum ProfanitySeverity {
|
|
35
73
|
MILD = 1,
|
|
@@ -38,7 +76,7 @@ export declare enum ProfanitySeverity {
|
|
|
38
76
|
EXTREME = 4
|
|
39
77
|
}
|
|
40
78
|
/**
|
|
41
|
-
* Detection result
|
|
79
|
+
* Detection result for profanity detection.
|
|
42
80
|
*/
|
|
43
81
|
export interface ProfanityDetectionResult {
|
|
44
82
|
hasProfanity: boolean;
|
|
@@ -52,8 +90,7 @@ export interface ProfanityDetectionResult {
|
|
|
52
90
|
}>;
|
|
53
91
|
}
|
|
54
92
|
/**
|
|
55
|
-
*
|
|
56
|
-
* Addresses all critical issues from the original implementation
|
|
93
|
+
* Main class for profanity detection and filtering.
|
|
57
94
|
*/
|
|
58
95
|
export declare class AllProfanity {
|
|
59
96
|
private readonly profanityTrie;
|
|
@@ -68,127 +105,216 @@ export declare class AllProfanity {
|
|
|
68
105
|
private readonly availableLanguages;
|
|
69
106
|
private readonly leetMappings;
|
|
70
107
|
private readonly dynamicWords;
|
|
108
|
+
private ahoCorasickAutomaton;
|
|
109
|
+
private bloomFilter;
|
|
110
|
+
private contextAnalyzer;
|
|
111
|
+
private matchingAlgorithm;
|
|
112
|
+
private resultCache;
|
|
113
|
+
/**
|
|
114
|
+
* Create an AllProfanity instance.
|
|
115
|
+
* @param options - Profanity filter configuration options.
|
|
116
|
+
*/
|
|
71
117
|
constructor(options?: AllProfanityOptions);
|
|
72
118
|
/**
|
|
73
|
-
*
|
|
119
|
+
* Initialize advanced algorithms based on configuration
|
|
120
|
+
*/
|
|
121
|
+
private initializeAdvancedAlgorithms;
|
|
122
|
+
/**
|
|
123
|
+
* Normalize leet speak to regular characters.
|
|
124
|
+
* @param text - The input text.
|
|
125
|
+
* @returns Normalized text.
|
|
74
126
|
*/
|
|
75
127
|
private normalizeLeetSpeak;
|
|
76
128
|
/**
|
|
77
|
-
*
|
|
129
|
+
* Escape regex special characters in a string.
|
|
130
|
+
* @param str - The string to escape.
|
|
131
|
+
* @returns The escaped string.
|
|
78
132
|
*/
|
|
79
133
|
private escapeRegex;
|
|
80
134
|
/**
|
|
81
|
-
* Check if a
|
|
135
|
+
* Check if a match is bounded by word boundaries (strict mode).
|
|
136
|
+
* @param text - The text.
|
|
137
|
+
* @param start - Start index.
|
|
138
|
+
* @param end - End index.
|
|
139
|
+
* @returns True if match is at word boundaries, false otherwise.
|
|
82
140
|
*/
|
|
83
141
|
private hasWordBoundaries;
|
|
84
142
|
/**
|
|
85
|
-
*
|
|
143
|
+
* Determine if a match is a whole word.
|
|
144
|
+
* @param text - The text.
|
|
145
|
+
* @param start - Start index.
|
|
146
|
+
* @param end - End index.
|
|
147
|
+
* @returns True if whole word, false otherwise.
|
|
86
148
|
*/
|
|
87
149
|
private isWholeWord;
|
|
88
150
|
/**
|
|
89
|
-
* Check if a match is whitelisted
|
|
151
|
+
* Check if a match is whitelisted.
|
|
152
|
+
* @param word - Word from dictionary.
|
|
153
|
+
* @param matchedText - Actual matched text.
|
|
154
|
+
* @returns True if whitelisted, false otherwise.
|
|
90
155
|
*/
|
|
91
156
|
private isWhitelistedMatch;
|
|
92
157
|
/**
|
|
93
|
-
* Remove overlapping matches,
|
|
158
|
+
* Remove overlapping matches, keeping only the longest at each start position.
|
|
159
|
+
* @param matches - Array of match results.
|
|
160
|
+
* @returns Deduplicated matches.
|
|
94
161
|
*/
|
|
95
162
|
private deduplicateMatches;
|
|
96
163
|
/**
|
|
97
|
-
*
|
|
164
|
+
* Use Aho-Corasick algorithm for pattern matching
|
|
165
|
+
*/
|
|
166
|
+
private findMatchesWithAhoCorasick;
|
|
167
|
+
/**
|
|
168
|
+
* Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
|
|
169
|
+
*/
|
|
170
|
+
private findMatchesHybrid;
|
|
171
|
+
/**
|
|
172
|
+
* Apply context analysis to filter false positives
|
|
173
|
+
*/
|
|
174
|
+
private applyContextAnalysis;
|
|
175
|
+
/**
|
|
176
|
+
* Detect profanity in a given text.
|
|
177
|
+
* @param text - The text to check.
|
|
178
|
+
* @returns Profanity detection result.
|
|
98
179
|
*/
|
|
99
180
|
detect(text: string): ProfanityDetectionResult;
|
|
100
181
|
/**
|
|
101
182
|
* Main matching function, with whole-word logic.
|
|
183
|
+
* @param searchText - The normalized text to search.
|
|
184
|
+
* @param originalText - The original text.
|
|
185
|
+
* @param matches - Array to collect matches.
|
|
102
186
|
*/
|
|
103
187
|
private findMatches;
|
|
104
188
|
/**
|
|
105
|
-
* Generate cleaned text by replacing profane words
|
|
189
|
+
* Generate cleaned text by replacing profane words.
|
|
190
|
+
* @param originalText - The original text.
|
|
191
|
+
* @param matches - Array of matches.
|
|
192
|
+
* @returns Cleaned text.
|
|
106
193
|
*/
|
|
107
194
|
private generateCleanedText;
|
|
108
195
|
/**
|
|
109
|
-
*
|
|
196
|
+
* Check if a string contains profanity.
|
|
197
|
+
* @param text - The text to check.
|
|
198
|
+
* @returns True if profanity is found, false otherwise.
|
|
110
199
|
*/
|
|
111
200
|
check(text: string): boolean;
|
|
112
201
|
/**
|
|
113
|
-
* Clean text with custom placeholder
|
|
202
|
+
* Clean text with a custom placeholder.
|
|
203
|
+
* @param text - The text to clean.
|
|
204
|
+
* @param placeholder - The placeholder to use.
|
|
205
|
+
* @returns Cleaned text.
|
|
114
206
|
*/
|
|
115
207
|
clean(text: string, placeholder?: string): string;
|
|
116
208
|
/**
|
|
117
|
-
* Clean text by replacing each profane word with a single placeholder (word-level)
|
|
209
|
+
* Clean text by replacing each profane word with a single placeholder (word-level).
|
|
210
|
+
* @param text - The text to clean.
|
|
211
|
+
* @param placeholder - The placeholder to use.
|
|
212
|
+
* @returns Word-level cleaned text.
|
|
118
213
|
*/
|
|
119
214
|
cleanWithPlaceholder(text: string, placeholder?: string): string;
|
|
120
215
|
/**
|
|
121
|
-
* Add word(s) to the profanity
|
|
216
|
+
* Add word(s) to the profanity filter.
|
|
217
|
+
* @param word - Word or array of words to add.
|
|
122
218
|
*/
|
|
123
219
|
add(word: string | string[]): void;
|
|
124
220
|
/**
|
|
125
|
-
* Remove word(s) from the profanity
|
|
221
|
+
* Remove word(s) from the profanity filter.
|
|
222
|
+
* @param word - Word or array of words to remove.
|
|
126
223
|
*/
|
|
127
224
|
remove(word: string | string[]): void;
|
|
128
225
|
/**
|
|
129
|
-
* Add words to whitelist
|
|
226
|
+
* Add words to the whitelist.
|
|
227
|
+
* @param words - Words to whitelist.
|
|
130
228
|
*/
|
|
131
229
|
addToWhitelist(words: string[]): void;
|
|
132
230
|
/**
|
|
133
|
-
* Remove words from whitelist
|
|
231
|
+
* Remove words from the whitelist.
|
|
232
|
+
* @param words - Words to remove from whitelist.
|
|
134
233
|
*/
|
|
135
234
|
removeFromWhitelist(words: string[]): void;
|
|
136
235
|
/**
|
|
137
|
-
*
|
|
236
|
+
* Check if a word is whitelisted.
|
|
237
|
+
* @param word - The word to check.
|
|
238
|
+
* @returns True if whitelisted, false otherwise.
|
|
138
239
|
*/
|
|
139
240
|
private isWhitelisted;
|
|
140
241
|
/**
|
|
141
|
-
* Load a built-in language dictionary
|
|
242
|
+
* Load a built-in language dictionary.
|
|
243
|
+
* @param language - The language key.
|
|
244
|
+
* @returns True if loaded, false otherwise.
|
|
142
245
|
*/
|
|
143
246
|
loadLanguage(language: string): boolean;
|
|
144
247
|
/**
|
|
145
|
-
* Load multiple
|
|
248
|
+
* Load multiple language dictionaries.
|
|
249
|
+
* @param languages - Array of languages to load.
|
|
250
|
+
* @returns Number of successfully loaded languages.
|
|
146
251
|
*/
|
|
147
252
|
loadLanguages(languages: string[]): number;
|
|
148
253
|
/**
|
|
149
|
-
* Load all Indian languages
|
|
254
|
+
* Load all supported Indian languages.
|
|
255
|
+
* @returns Number of loaded Indian languages.
|
|
150
256
|
*/
|
|
151
257
|
loadIndianLanguages(): number;
|
|
152
258
|
/**
|
|
153
|
-
* Load a custom dictionary
|
|
259
|
+
* Load a custom dictionary.
|
|
260
|
+
* @param name - Name of the dictionary.
|
|
261
|
+
* @param words - Words to add.
|
|
154
262
|
*/
|
|
155
263
|
loadCustomDictionary(name: string, words: string[]): void;
|
|
156
264
|
/**
|
|
157
|
-
* Add a single word to the trie
|
|
265
|
+
* Add a single word to the trie.
|
|
266
|
+
* @param word - The word to add.
|
|
267
|
+
* @returns True if added, false otherwise.
|
|
158
268
|
*/
|
|
159
269
|
private addWordToTrie;
|
|
160
270
|
/**
|
|
161
|
-
*
|
|
271
|
+
* Calculate severity from matches.
|
|
272
|
+
* @param matches - Array of matches.
|
|
273
|
+
* @returns Severity level.
|
|
162
274
|
*/
|
|
163
275
|
private calculateSeverity;
|
|
164
276
|
/**
|
|
165
|
-
* Clear all loaded dictionaries
|
|
277
|
+
* Clear all loaded dictionaries and dynamic words.
|
|
166
278
|
*/
|
|
167
279
|
clearList(): void;
|
|
168
280
|
/**
|
|
169
|
-
* Set placeholder character
|
|
281
|
+
* Set the placeholder character for filtered words.
|
|
282
|
+
* @param placeholder - The placeholder character.
|
|
170
283
|
*/
|
|
171
284
|
setPlaceholder(placeholder: string): void;
|
|
172
285
|
/**
|
|
173
|
-
* Get loaded languages
|
|
286
|
+
* Get the list of loaded languages.
|
|
287
|
+
* @returns Array of loaded language keys.
|
|
174
288
|
*/
|
|
175
289
|
getLoadedLanguages(): string[];
|
|
176
290
|
/**
|
|
177
|
-
* Get available languages
|
|
291
|
+
* Get the list of available built-in languages.
|
|
292
|
+
* @returns Array of available language keys.
|
|
178
293
|
*/
|
|
179
294
|
getAvailableLanguages(): string[];
|
|
180
295
|
/**
|
|
181
|
-
* Get current configuration
|
|
296
|
+
* Get the current configuration of the profanity filter.
|
|
297
|
+
* @returns Partial configuration object.
|
|
182
298
|
*/
|
|
183
299
|
getConfig(): Partial<AllProfanityOptions>;
|
|
184
300
|
/**
|
|
185
|
-
*
|
|
301
|
+
* Rebuild the profanity trie from loaded dictionaries and dynamic words.
|
|
186
302
|
*/
|
|
187
303
|
private rebuildTrie;
|
|
188
304
|
/**
|
|
189
|
-
* Update configuration
|
|
305
|
+
* Update configuration options for the profanity filter.
|
|
306
|
+
* @param options - Partial configuration object.
|
|
190
307
|
*/
|
|
191
308
|
updateConfig(options: Partial<AllProfanityOptions>): void;
|
|
309
|
+
/**
|
|
310
|
+
* Create an AllProfanity instance from a configuration object.
|
|
311
|
+
* @param config - Configuration object
|
|
312
|
+
* @returns A new AllProfanity instance
|
|
313
|
+
*/
|
|
314
|
+
static fromConfig(config: AllProfanityOptions | any): AllProfanity;
|
|
192
315
|
}
|
|
316
|
+
/**
|
|
317
|
+
* Singleton instance of AllProfanity with default configuration.
|
|
318
|
+
*/
|
|
193
319
|
declare const allProfanity: AllProfanity;
|
|
194
320
|
export default allProfanity;
|