glin-profanity 2.3.8 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -0
- package/dist/chunk-KNHWF6MX.js +5050 -0
- package/dist/index.cjs +2041 -272
- package/dist/index.d.cts +252 -87
- package/dist/index.d.ts +252 -87
- package/dist/index.js +50 -3306
- package/dist/ml/index.cjs +5513 -0
- package/dist/ml/index.d.cts +357 -0
- package/dist/ml/index.d.ts +357 -0
- package/dist/ml/index.js +557 -0
- package/dist/types-BgQe4FSE.d.cts +350 -0
- package/dist/types-BgQe4FSE.d.ts +350 -0
- package/package.json +38 -3
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for glin-profanity JavaScript/TypeScript package.
|
|
3
|
+
* Unified API that mirrors the Python package structure.
|
|
4
|
+
*/
|
|
5
|
+
/** Severity levels for profanity matches - unified with Python */
|
|
6
|
+
declare enum SeverityLevel {
|
|
7
|
+
EXACT = 1,
|
|
8
|
+
FUZZY = 2
|
|
9
|
+
}
|
|
10
|
+
/** Supported languages - unified list with Python */
|
|
11
|
+
type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'dutch' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
|
|
12
|
+
/** Represents a profanity match in text - unified with Python */
|
|
13
|
+
interface Match {
|
|
14
|
+
word: string;
|
|
15
|
+
index: number;
|
|
16
|
+
severity: SeverityLevel;
|
|
17
|
+
contextScore?: number;
|
|
18
|
+
reason?: string;
|
|
19
|
+
isWhitelisted?: boolean;
|
|
20
|
+
}
|
|
21
|
+
/** Result of profanity check operation - unified field names */
|
|
22
|
+
interface CheckProfanityResult {
|
|
23
|
+
containsProfanity: boolean;
|
|
24
|
+
profaneWords: string[];
|
|
25
|
+
processedText?: string;
|
|
26
|
+
severityMap?: Record<string, SeverityLevel>;
|
|
27
|
+
matches?: Match[];
|
|
28
|
+
contextScore?: number;
|
|
29
|
+
reason?: string;
|
|
30
|
+
}
|
|
31
|
+
/** Configuration for context-aware filtering - unified with Python */
|
|
32
|
+
interface ContextAwareConfig {
|
|
33
|
+
enableContextAware?: boolean;
|
|
34
|
+
contextWindow?: number;
|
|
35
|
+
confidenceThreshold?: number;
|
|
36
|
+
domainWhitelists?: Record<string, string[]>;
|
|
37
|
+
}
|
|
38
|
+
/** Leetspeak detection intensity levels */
|
|
39
|
+
type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
|
|
40
|
+
/** Main filter configuration options - unified with Python */
|
|
41
|
+
interface FilterConfig extends ContextAwareConfig {
|
|
42
|
+
languages?: Language[];
|
|
43
|
+
allLanguages?: boolean;
|
|
44
|
+
caseSensitive?: boolean;
|
|
45
|
+
wordBoundaries?: boolean;
|
|
46
|
+
customWords?: string[];
|
|
47
|
+
replaceWith?: string;
|
|
48
|
+
severityLevels?: boolean;
|
|
49
|
+
ignoreWords?: string[];
|
|
50
|
+
logProfanity?: boolean;
|
|
51
|
+
allowObfuscatedMatch?: boolean;
|
|
52
|
+
fuzzyToleranceLevel?: number;
|
|
53
|
+
/**
|
|
54
|
+
* Enable leetspeak detection (e.g., "f4ck" → "fuck").
|
|
55
|
+
* @default false
|
|
56
|
+
*/
|
|
57
|
+
detectLeetspeak?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Leetspeak detection intensity level.
|
|
60
|
+
* - `basic`: Numbers only (0→o, 1→i, 3→e, 4→a, 5→s)
|
|
61
|
+
* - `moderate`: Basic + symbols (@→a, $→s, !→i)
|
|
62
|
+
* - `aggressive`: All known substitutions
|
|
63
|
+
* @default 'moderate'
|
|
64
|
+
*/
|
|
65
|
+
leetspeakLevel?: LeetspeakLevel;
|
|
66
|
+
/**
|
|
67
|
+
* Enable Unicode normalization to detect homoglyphs and obfuscation.
|
|
68
|
+
* @default true
|
|
69
|
+
*/
|
|
70
|
+
normalizeUnicode?: boolean;
|
|
71
|
+
/**
|
|
72
|
+
* Cache profanity check results for repeated strings.
|
|
73
|
+
* @default false
|
|
74
|
+
*/
|
|
75
|
+
cacheResults?: boolean;
|
|
76
|
+
/**
|
|
77
|
+
* Maximum cache size when caching is enabled.
|
|
78
|
+
* @default 1000
|
|
79
|
+
*/
|
|
80
|
+
maxCacheSize?: number;
|
|
81
|
+
}
|
|
82
|
+
/** Result with minimum severity filtering */
|
|
83
|
+
interface FilteredProfanityResult {
|
|
84
|
+
result: CheckProfanityResult;
|
|
85
|
+
filteredWords: string[];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Core profanity filter class.
|
|
90
|
+
* Provides comprehensive profanity detection with support for multiple languages,
|
|
91
|
+
* leetspeak detection, Unicode normalization, and context-aware filtering.
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* ```typescript
|
|
95
|
+
* const filter = new Filter({
|
|
96
|
+
* languages: ['english'],
|
|
97
|
+
* detectLeetspeak: true,
|
|
98
|
+
* normalizeUnicode: true,
|
|
99
|
+
* });
|
|
100
|
+
*
|
|
101
|
+
* filter.isProfane('f4ck'); // Returns: true
|
|
102
|
+
* filter.isProfane('fυck'); // Returns: true (Greek upsilon)
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
declare class Filter {
|
|
106
|
+
private words;
|
|
107
|
+
private caseSensitive;
|
|
108
|
+
private wordBoundaries;
|
|
109
|
+
private replaceWith?;
|
|
110
|
+
private severityLevels;
|
|
111
|
+
private ignoreWords;
|
|
112
|
+
private logProfanity;
|
|
113
|
+
private allowObfuscatedMatch;
|
|
114
|
+
private fuzzyToleranceLevel;
|
|
115
|
+
private enableContextAware;
|
|
116
|
+
private contextWindow;
|
|
117
|
+
private confidenceThreshold;
|
|
118
|
+
private contextAnalyzer?;
|
|
119
|
+
private primaryLanguage;
|
|
120
|
+
private detectLeetspeak;
|
|
121
|
+
private leetspeakLevel;
|
|
122
|
+
private normalizeUnicodeEnabled;
|
|
123
|
+
private cacheResults;
|
|
124
|
+
private maxCacheSize;
|
|
125
|
+
private cache;
|
|
126
|
+
/**
|
|
127
|
+
* Creates a new Filter instance with the specified configuration.
|
|
128
|
+
*
|
|
129
|
+
* @param config - Filter configuration options
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* ```typescript
|
|
133
|
+
* // Basic usage
|
|
134
|
+
* const filter = new Filter({ languages: ['english'] });
|
|
135
|
+
*
|
|
136
|
+
* // With leetspeak detection
|
|
137
|
+
* const filter = new Filter({
|
|
138
|
+
* languages: ['english'],
|
|
139
|
+
* detectLeetspeak: true,
|
|
140
|
+
* leetspeakLevel: 'moderate',
|
|
141
|
+
* });
|
|
142
|
+
*
|
|
143
|
+
* // With all advanced features
|
|
144
|
+
* const filter = new Filter({
|
|
145
|
+
* languages: ['english', 'spanish'],
|
|
146
|
+
* detectLeetspeak: true,
|
|
147
|
+
* normalizeUnicode: true,
|
|
148
|
+
* cacheResults: true,
|
|
149
|
+
* enableContextAware: true,
|
|
150
|
+
* });
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
153
|
+
constructor(config?: FilterConfig);
|
|
154
|
+
private debugLog;
|
|
155
|
+
/**
|
|
156
|
+
* Normalizes text for profanity detection using all enabled normalization methods.
|
|
157
|
+
* Applies Unicode normalization, leetspeak detection, and obfuscation handling.
|
|
158
|
+
*
|
|
159
|
+
* @param text - The input text to normalize
|
|
160
|
+
* @returns The normalized text
|
|
161
|
+
*/
|
|
162
|
+
private normalizeText;
|
|
163
|
+
/**
|
|
164
|
+
* Legacy obfuscation normalization method (for backward compatibility).
|
|
165
|
+
* @deprecated Use normalizeText() with detectLeetspeak option instead.
|
|
166
|
+
*/
|
|
167
|
+
private normalizeObfuscated;
|
|
168
|
+
/**
|
|
169
|
+
* Clears the result cache.
|
|
170
|
+
* Useful when dictionary or configuration changes.
|
|
171
|
+
*/
|
|
172
|
+
clearCache(): void;
|
|
173
|
+
/**
|
|
174
|
+
* Gets the current cache size.
|
|
175
|
+
* @returns Number of cached results
|
|
176
|
+
*/
|
|
177
|
+
getCacheSize(): number;
|
|
178
|
+
/**
|
|
179
|
+
* Exports the current filter configuration as a JSON-serializable object.
|
|
180
|
+
* Useful for saving configuration to files or sharing between environments.
|
|
181
|
+
*
|
|
182
|
+
* @returns The current filter configuration
|
|
183
|
+
*
|
|
184
|
+
* @example
|
|
185
|
+
* ```typescript
|
|
186
|
+
* const filter = new Filter({
|
|
187
|
+
* languages: ['english', 'spanish'],
|
|
188
|
+
* detectLeetspeak: true,
|
|
189
|
+
* leetspeakLevel: 'aggressive',
|
|
190
|
+
* });
|
|
191
|
+
*
|
|
192
|
+
* const config = filter.getConfig();
|
|
193
|
+
* // Save to file: fs.writeFileSync('filter.config.json', JSON.stringify(config));
|
|
194
|
+
*
|
|
195
|
+
* // Later, restore:
|
|
196
|
+
* // const saved = JSON.parse(fs.readFileSync('filter.config.json'));
|
|
197
|
+
* // const restored = new Filter(saved);
|
|
198
|
+
* ```
|
|
199
|
+
*/
|
|
200
|
+
getConfig(): FilterConfig;
|
|
201
|
+
/**
|
|
202
|
+
* Returns the current word dictionary size.
|
|
203
|
+
* Useful for monitoring and debugging.
|
|
204
|
+
*
|
|
205
|
+
* @returns Number of words in the dictionary
|
|
206
|
+
*/
|
|
207
|
+
getWordCount(): number;
|
|
208
|
+
/**
|
|
209
|
+
* Adds a result to the cache, evicting oldest entries if necessary.
|
|
210
|
+
*/
|
|
211
|
+
private addToCache;
|
|
212
|
+
/**
|
|
213
|
+
* Gets a cached result if available.
|
|
214
|
+
*/
|
|
215
|
+
private getFromCache;
|
|
216
|
+
private getRegex;
|
|
217
|
+
private isFuzzyToleranceMatch;
|
|
218
|
+
private evaluateSeverity;
|
|
219
|
+
/**
|
|
220
|
+
* Checks if the given text contains profanity.
|
|
221
|
+
*
|
|
222
|
+
* @param value - The text to check
|
|
223
|
+
* @returns True if the text contains profanity
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```typescript
|
|
227
|
+
* const filter = new Filter({ detectLeetspeak: true });
|
|
228
|
+
*
|
|
229
|
+
* filter.isProfane('hello'); // false
|
|
230
|
+
* filter.isProfane('fuck'); // true
|
|
231
|
+
* filter.isProfane('f4ck'); // true (leetspeak)
|
|
232
|
+
* filter.isProfane('fυck'); // true (Unicode homoglyph)
|
|
233
|
+
* ```
|
|
234
|
+
*/
|
|
235
|
+
isProfane(value: string): boolean;
|
|
236
|
+
matches(word: string): boolean;
|
|
237
|
+
/**
|
|
238
|
+
* Performs a comprehensive profanity check on the given text.
|
|
239
|
+
*
|
|
240
|
+
* @param text - The text to check for profanity
|
|
241
|
+
* @returns Result object containing detected profanity information
|
|
242
|
+
*
|
|
243
|
+
* @example
|
|
244
|
+
* ```typescript
|
|
245
|
+
* const filter = new Filter({
|
|
246
|
+
* languages: ['english'],
|
|
247
|
+
* detectLeetspeak: true,
|
|
248
|
+
* normalizeUnicode: true,
|
|
249
|
+
* });
|
|
250
|
+
*
|
|
251
|
+
* const result = filter.checkProfanity('This is f4ck!ng bad');
|
|
252
|
+
* console.log(result.containsProfanity); // true
|
|
253
|
+
* console.log(result.profaneWords); // ['fuck']
|
|
254
|
+
*
|
|
255
|
+
* // With caching for repeated checks
|
|
256
|
+
* const filter2 = new Filter({ cacheResults: true });
|
|
257
|
+
* filter2.checkProfanity('same text'); // Computed
|
|
258
|
+
* filter2.checkProfanity('same text'); // Retrieved from cache
|
|
259
|
+
* ```
|
|
260
|
+
*/
|
|
261
|
+
checkProfanity(text: string): CheckProfanityResult;
|
|
262
|
+
/**
|
|
263
|
+
* Checks profanity with minimum severity filtering.
|
|
264
|
+
*
|
|
265
|
+
* @param text - The text to check
|
|
266
|
+
* @param minSeverity - Minimum severity level to include in results
|
|
267
|
+
* @returns Object with filtered words and full result
|
|
268
|
+
*/
|
|
269
|
+
checkProfanityWithMinSeverity(text: string, minSeverity?: SeverityLevel): {
|
|
270
|
+
filteredWords: string[];
|
|
271
|
+
result: CheckProfanityResult;
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Type definitions for ML-based profanity detection.
|
|
277
|
+
*/
|
|
278
|
+
/**
|
|
279
|
+
* Toxicity categories detected by the TensorFlow.js model.
|
|
280
|
+
* These map to the civil comments dataset labels.
|
|
281
|
+
*/
|
|
282
|
+
type ToxicityLabel = 'identity_attack' | 'insult' | 'obscene' | 'severe_toxicity' | 'sexual_explicit' | 'threat' | 'toxicity';
|
|
283
|
+
/**
|
|
284
|
+
* Result from a single toxicity prediction.
|
|
285
|
+
*/
|
|
286
|
+
interface ToxicityPrediction {
|
|
287
|
+
/** The toxicity category */
|
|
288
|
+
label: ToxicityLabel;
|
|
289
|
+
/** Whether the text matches this category (null if below threshold) */
|
|
290
|
+
match: boolean | null;
|
|
291
|
+
/** Probability scores [non-toxic, toxic] */
|
|
292
|
+
probabilities: [number, number];
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Result from ML-based toxicity analysis.
|
|
296
|
+
*/
|
|
297
|
+
interface MLAnalysisResult {
|
|
298
|
+
/** Whether any toxicity was detected */
|
|
299
|
+
isToxic: boolean;
|
|
300
|
+
/** Overall toxicity score (0-1) */
|
|
301
|
+
overallScore: number;
|
|
302
|
+
/** Predictions for each category */
|
|
303
|
+
predictions: ToxicityPrediction[];
|
|
304
|
+
/** Categories that matched */
|
|
305
|
+
matchedCategories: ToxicityLabel[];
|
|
306
|
+
/** Processing time in milliseconds */
|
|
307
|
+
processingTimeMs: number;
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Configuration for the ML toxicity detector.
|
|
311
|
+
*/
|
|
312
|
+
interface MLDetectorConfig {
|
|
313
|
+
/**
|
|
314
|
+
* Minimum confidence threshold for predictions.
|
|
315
|
+
* Values below this threshold will return null for match.
|
|
316
|
+
* @default 0.85
|
|
317
|
+
*/
|
|
318
|
+
threshold?: number;
|
|
319
|
+
/**
|
|
320
|
+
* Specific toxicity categories to check.
|
|
321
|
+
* If not specified, all categories are checked.
|
|
322
|
+
*/
|
|
323
|
+
labels?: ToxicityLabel[];
|
|
324
|
+
/**
|
|
325
|
+
* Whether to load the model immediately on instantiation.
|
|
326
|
+
* If false, model will be loaded on first use.
|
|
327
|
+
* @default false
|
|
328
|
+
*/
|
|
329
|
+
preloadModel?: boolean;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Combined result from both rule-based and ML detection.
|
|
333
|
+
*/
|
|
334
|
+
interface HybridAnalysisResult {
|
|
335
|
+
/** Rule-based detection result */
|
|
336
|
+
ruleBasedResult: {
|
|
337
|
+
containsProfanity: boolean;
|
|
338
|
+
profaneWords: string[];
|
|
339
|
+
};
|
|
340
|
+
/** ML-based detection result (null if ML not enabled) */
|
|
341
|
+
mlResult: MLAnalysisResult | null;
|
|
342
|
+
/** Combined decision */
|
|
343
|
+
isToxic: boolean;
|
|
344
|
+
/** Confidence score for the decision */
|
|
345
|
+
confidence: number;
|
|
346
|
+
/** Reason for the decision */
|
|
347
|
+
reason: string;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
export { type CheckProfanityResult as C, Filter as F, type HybridAnalysisResult as H, type Language as L, type Match as M, SeverityLevel as S, type ToxicityLabel as T, type FilterConfig as a, type FilteredProfanityResult as b, type ContextAwareConfig as c, type ToxicityPrediction as d, type MLAnalysisResult as e, type MLDetectorConfig as f };
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for glin-profanity JavaScript/TypeScript package.
|
|
3
|
+
* Unified API that mirrors the Python package structure.
|
|
4
|
+
*/
|
|
5
|
+
/** Severity levels for profanity matches - unified with Python */
|
|
6
|
+
declare enum SeverityLevel {
|
|
7
|
+
EXACT = 1,
|
|
8
|
+
FUZZY = 2
|
|
9
|
+
}
|
|
10
|
+
/** Supported languages - unified list with Python */
|
|
11
|
+
type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'dutch' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
|
|
12
|
+
/** Represents a profanity match in text - unified with Python */
|
|
13
|
+
interface Match {
|
|
14
|
+
word: string;
|
|
15
|
+
index: number;
|
|
16
|
+
severity: SeverityLevel;
|
|
17
|
+
contextScore?: number;
|
|
18
|
+
reason?: string;
|
|
19
|
+
isWhitelisted?: boolean;
|
|
20
|
+
}
|
|
21
|
+
/** Result of profanity check operation - unified field names */
|
|
22
|
+
interface CheckProfanityResult {
|
|
23
|
+
containsProfanity: boolean;
|
|
24
|
+
profaneWords: string[];
|
|
25
|
+
processedText?: string;
|
|
26
|
+
severityMap?: Record<string, SeverityLevel>;
|
|
27
|
+
matches?: Match[];
|
|
28
|
+
contextScore?: number;
|
|
29
|
+
reason?: string;
|
|
30
|
+
}
|
|
31
|
+
/** Configuration for context-aware filtering - unified with Python */
|
|
32
|
+
interface ContextAwareConfig {
|
|
33
|
+
enableContextAware?: boolean;
|
|
34
|
+
contextWindow?: number;
|
|
35
|
+
confidenceThreshold?: number;
|
|
36
|
+
domainWhitelists?: Record<string, string[]>;
|
|
37
|
+
}
|
|
38
|
+
/** Leetspeak detection intensity levels */
|
|
39
|
+
type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
|
|
40
|
+
/** Main filter configuration options - unified with Python */
|
|
41
|
+
interface FilterConfig extends ContextAwareConfig {
|
|
42
|
+
languages?: Language[];
|
|
43
|
+
allLanguages?: boolean;
|
|
44
|
+
caseSensitive?: boolean;
|
|
45
|
+
wordBoundaries?: boolean;
|
|
46
|
+
customWords?: string[];
|
|
47
|
+
replaceWith?: string;
|
|
48
|
+
severityLevels?: boolean;
|
|
49
|
+
ignoreWords?: string[];
|
|
50
|
+
logProfanity?: boolean;
|
|
51
|
+
allowObfuscatedMatch?: boolean;
|
|
52
|
+
fuzzyToleranceLevel?: number;
|
|
53
|
+
/**
|
|
54
|
+
* Enable leetspeak detection (e.g., "f4ck" → "fuck").
|
|
55
|
+
* @default false
|
|
56
|
+
*/
|
|
57
|
+
detectLeetspeak?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Leetspeak detection intensity level.
|
|
60
|
+
* - `basic`: Numbers only (0→o, 1→i, 3→e, 4→a, 5→s)
|
|
61
|
+
* - `moderate`: Basic + symbols (@→a, $→s, !→i)
|
|
62
|
+
* - `aggressive`: All known substitutions
|
|
63
|
+
* @default 'moderate'
|
|
64
|
+
*/
|
|
65
|
+
leetspeakLevel?: LeetspeakLevel;
|
|
66
|
+
/**
|
|
67
|
+
* Enable Unicode normalization to detect homoglyphs and obfuscation.
|
|
68
|
+
* @default true
|
|
69
|
+
*/
|
|
70
|
+
normalizeUnicode?: boolean;
|
|
71
|
+
/**
|
|
72
|
+
* Cache profanity check results for repeated strings.
|
|
73
|
+
* @default false
|
|
74
|
+
*/
|
|
75
|
+
cacheResults?: boolean;
|
|
76
|
+
/**
|
|
77
|
+
* Maximum cache size when caching is enabled.
|
|
78
|
+
* @default 1000
|
|
79
|
+
*/
|
|
80
|
+
maxCacheSize?: number;
|
|
81
|
+
}
|
|
82
|
+
/** Result with minimum severity filtering */
|
|
83
|
+
interface FilteredProfanityResult {
|
|
84
|
+
result: CheckProfanityResult;
|
|
85
|
+
filteredWords: string[];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Core profanity filter class.
|
|
90
|
+
* Provides comprehensive profanity detection with support for multiple languages,
|
|
91
|
+
* leetspeak detection, Unicode normalization, and context-aware filtering.
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* ```typescript
|
|
95
|
+
* const filter = new Filter({
|
|
96
|
+
* languages: ['english'],
|
|
97
|
+
* detectLeetspeak: true,
|
|
98
|
+
* normalizeUnicode: true,
|
|
99
|
+
* });
|
|
100
|
+
*
|
|
101
|
+
* filter.isProfane('f4ck'); // Returns: true
|
|
102
|
+
* filter.isProfane('fυck'); // Returns: true (Greek upsilon)
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
declare class Filter {
|
|
106
|
+
private words;
|
|
107
|
+
private caseSensitive;
|
|
108
|
+
private wordBoundaries;
|
|
109
|
+
private replaceWith?;
|
|
110
|
+
private severityLevels;
|
|
111
|
+
private ignoreWords;
|
|
112
|
+
private logProfanity;
|
|
113
|
+
private allowObfuscatedMatch;
|
|
114
|
+
private fuzzyToleranceLevel;
|
|
115
|
+
private enableContextAware;
|
|
116
|
+
private contextWindow;
|
|
117
|
+
private confidenceThreshold;
|
|
118
|
+
private contextAnalyzer?;
|
|
119
|
+
private primaryLanguage;
|
|
120
|
+
private detectLeetspeak;
|
|
121
|
+
private leetspeakLevel;
|
|
122
|
+
private normalizeUnicodeEnabled;
|
|
123
|
+
private cacheResults;
|
|
124
|
+
private maxCacheSize;
|
|
125
|
+
private cache;
|
|
126
|
+
/**
|
|
127
|
+
* Creates a new Filter instance with the specified configuration.
|
|
128
|
+
*
|
|
129
|
+
* @param config - Filter configuration options
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* ```typescript
|
|
133
|
+
* // Basic usage
|
|
134
|
+
* const filter = new Filter({ languages: ['english'] });
|
|
135
|
+
*
|
|
136
|
+
* // With leetspeak detection
|
|
137
|
+
* const filter = new Filter({
|
|
138
|
+
* languages: ['english'],
|
|
139
|
+
* detectLeetspeak: true,
|
|
140
|
+
* leetspeakLevel: 'moderate',
|
|
141
|
+
* });
|
|
142
|
+
*
|
|
143
|
+
* // With all advanced features
|
|
144
|
+
* const filter = new Filter({
|
|
145
|
+
* languages: ['english', 'spanish'],
|
|
146
|
+
* detectLeetspeak: true,
|
|
147
|
+
* normalizeUnicode: true,
|
|
148
|
+
* cacheResults: true,
|
|
149
|
+
* enableContextAware: true,
|
|
150
|
+
* });
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
153
|
+
constructor(config?: FilterConfig);
|
|
154
|
+
private debugLog;
|
|
155
|
+
/**
|
|
156
|
+
* Normalizes text for profanity detection using all enabled normalization methods.
|
|
157
|
+
* Applies Unicode normalization, leetspeak detection, and obfuscation handling.
|
|
158
|
+
*
|
|
159
|
+
* @param text - The input text to normalize
|
|
160
|
+
* @returns The normalized text
|
|
161
|
+
*/
|
|
162
|
+
private normalizeText;
|
|
163
|
+
/**
|
|
164
|
+
* Legacy obfuscation normalization method (for backward compatibility).
|
|
165
|
+
* @deprecated Use normalizeText() with detectLeetspeak option instead.
|
|
166
|
+
*/
|
|
167
|
+
private normalizeObfuscated;
|
|
168
|
+
/**
|
|
169
|
+
* Clears the result cache.
|
|
170
|
+
* Useful when dictionary or configuration changes.
|
|
171
|
+
*/
|
|
172
|
+
clearCache(): void;
|
|
173
|
+
/**
|
|
174
|
+
* Gets the current cache size.
|
|
175
|
+
* @returns Number of cached results
|
|
176
|
+
*/
|
|
177
|
+
getCacheSize(): number;
|
|
178
|
+
/**
|
|
179
|
+
* Exports the current filter configuration as a JSON-serializable object.
|
|
180
|
+
* Useful for saving configuration to files or sharing between environments.
|
|
181
|
+
*
|
|
182
|
+
* @returns The current filter configuration
|
|
183
|
+
*
|
|
184
|
+
* @example
|
|
185
|
+
* ```typescript
|
|
186
|
+
* const filter = new Filter({
|
|
187
|
+
* languages: ['english', 'spanish'],
|
|
188
|
+
* detectLeetspeak: true,
|
|
189
|
+
* leetspeakLevel: 'aggressive',
|
|
190
|
+
* });
|
|
191
|
+
*
|
|
192
|
+
* const config = filter.getConfig();
|
|
193
|
+
* // Save to file: fs.writeFileSync('filter.config.json', JSON.stringify(config));
|
|
194
|
+
*
|
|
195
|
+
* // Later, restore:
|
|
196
|
+
* // const saved = JSON.parse(fs.readFileSync('filter.config.json'));
|
|
197
|
+
* // const restored = new Filter(saved);
|
|
198
|
+
* ```
|
|
199
|
+
*/
|
|
200
|
+
getConfig(): FilterConfig;
|
|
201
|
+
/**
|
|
202
|
+
* Returns the current word dictionary size.
|
|
203
|
+
* Useful for monitoring and debugging.
|
|
204
|
+
*
|
|
205
|
+
* @returns Number of words in the dictionary
|
|
206
|
+
*/
|
|
207
|
+
getWordCount(): number;
|
|
208
|
+
/**
|
|
209
|
+
* Adds a result to the cache, evicting oldest entries if necessary.
|
|
210
|
+
*/
|
|
211
|
+
private addToCache;
|
|
212
|
+
/**
|
|
213
|
+
* Gets a cached result if available.
|
|
214
|
+
*/
|
|
215
|
+
private getFromCache;
|
|
216
|
+
private getRegex;
|
|
217
|
+
private isFuzzyToleranceMatch;
|
|
218
|
+
private evaluateSeverity;
|
|
219
|
+
/**
|
|
220
|
+
* Checks if the given text contains profanity.
|
|
221
|
+
*
|
|
222
|
+
* @param value - The text to check
|
|
223
|
+
* @returns True if the text contains profanity
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```typescript
|
|
227
|
+
* const filter = new Filter({ detectLeetspeak: true });
|
|
228
|
+
*
|
|
229
|
+
* filter.isProfane('hello'); // false
|
|
230
|
+
* filter.isProfane('fuck'); // true
|
|
231
|
+
* filter.isProfane('f4ck'); // true (leetspeak)
|
|
232
|
+
* filter.isProfane('fυck'); // true (Unicode homoglyph)
|
|
233
|
+
* ```
|
|
234
|
+
*/
|
|
235
|
+
isProfane(value: string): boolean;
|
|
236
|
+
matches(word: string): boolean;
|
|
237
|
+
/**
|
|
238
|
+
* Performs a comprehensive profanity check on the given text.
|
|
239
|
+
*
|
|
240
|
+
* @param text - The text to check for profanity
|
|
241
|
+
* @returns Result object containing detected profanity information
|
|
242
|
+
*
|
|
243
|
+
* @example
|
|
244
|
+
* ```typescript
|
|
245
|
+
* const filter = new Filter({
|
|
246
|
+
* languages: ['english'],
|
|
247
|
+
* detectLeetspeak: true,
|
|
248
|
+
* normalizeUnicode: true,
|
|
249
|
+
* });
|
|
250
|
+
*
|
|
251
|
+
* const result = filter.checkProfanity('This is f4ck!ng bad');
|
|
252
|
+
* console.log(result.containsProfanity); // true
|
|
253
|
+
* console.log(result.profaneWords); // ['fuck']
|
|
254
|
+
*
|
|
255
|
+
* // With caching for repeated checks
|
|
256
|
+
* const filter2 = new Filter({ cacheResults: true });
|
|
257
|
+
* filter2.checkProfanity('same text'); // Computed
|
|
258
|
+
* filter2.checkProfanity('same text'); // Retrieved from cache
|
|
259
|
+
* ```
|
|
260
|
+
*/
|
|
261
|
+
checkProfanity(text: string): CheckProfanityResult;
|
|
262
|
+
/**
|
|
263
|
+
* Checks profanity with minimum severity filtering.
|
|
264
|
+
*
|
|
265
|
+
* @param text - The text to check
|
|
266
|
+
* @param minSeverity - Minimum severity level to include in results
|
|
267
|
+
* @returns Object with filtered words and full result
|
|
268
|
+
*/
|
|
269
|
+
checkProfanityWithMinSeverity(text: string, minSeverity?: SeverityLevel): {
|
|
270
|
+
filteredWords: string[];
|
|
271
|
+
result: CheckProfanityResult;
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Type definitions for ML-based profanity detection.
|
|
277
|
+
*/
|
|
278
|
+
/**
|
|
279
|
+
* Toxicity categories detected by the TensorFlow.js model.
|
|
280
|
+
* These map to the civil comments dataset labels.
|
|
281
|
+
*/
|
|
282
|
+
type ToxicityLabel = 'identity_attack' | 'insult' | 'obscene' | 'severe_toxicity' | 'sexual_explicit' | 'threat' | 'toxicity';
|
|
283
|
+
/**
|
|
284
|
+
* Result from a single toxicity prediction.
|
|
285
|
+
*/
|
|
286
|
+
interface ToxicityPrediction {
|
|
287
|
+
/** The toxicity category */
|
|
288
|
+
label: ToxicityLabel;
|
|
289
|
+
/** Whether the text matches this category (null if below threshold) */
|
|
290
|
+
match: boolean | null;
|
|
291
|
+
/** Probability scores [non-toxic, toxic] */
|
|
292
|
+
probabilities: [number, number];
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Result from ML-based toxicity analysis.
|
|
296
|
+
*/
|
|
297
|
+
interface MLAnalysisResult {
|
|
298
|
+
/** Whether any toxicity was detected */
|
|
299
|
+
isToxic: boolean;
|
|
300
|
+
/** Overall toxicity score (0-1) */
|
|
301
|
+
overallScore: number;
|
|
302
|
+
/** Predictions for each category */
|
|
303
|
+
predictions: ToxicityPrediction[];
|
|
304
|
+
/** Categories that matched */
|
|
305
|
+
matchedCategories: ToxicityLabel[];
|
|
306
|
+
/** Processing time in milliseconds */
|
|
307
|
+
processingTimeMs: number;
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Configuration for the ML toxicity detector.
|
|
311
|
+
*/
|
|
312
|
+
interface MLDetectorConfig {
|
|
313
|
+
/**
|
|
314
|
+
* Minimum confidence threshold for predictions.
|
|
315
|
+
* Values below this threshold will return null for match.
|
|
316
|
+
* @default 0.85
|
|
317
|
+
*/
|
|
318
|
+
threshold?: number;
|
|
319
|
+
/**
|
|
320
|
+
* Specific toxicity categories to check.
|
|
321
|
+
* If not specified, all categories are checked.
|
|
322
|
+
*/
|
|
323
|
+
labels?: ToxicityLabel[];
|
|
324
|
+
/**
|
|
325
|
+
* Whether to load the model immediately on instantiation.
|
|
326
|
+
* If false, model will be loaded on first use.
|
|
327
|
+
* @default false
|
|
328
|
+
*/
|
|
329
|
+
preloadModel?: boolean;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Combined result from both rule-based and ML detection.
|
|
333
|
+
*/
|
|
334
|
+
interface HybridAnalysisResult {
|
|
335
|
+
/** Rule-based detection result */
|
|
336
|
+
ruleBasedResult: {
|
|
337
|
+
containsProfanity: boolean;
|
|
338
|
+
profaneWords: string[];
|
|
339
|
+
};
|
|
340
|
+
/** ML-based detection result (null if ML not enabled) */
|
|
341
|
+
mlResult: MLAnalysisResult | null;
|
|
342
|
+
/** Combined decision */
|
|
343
|
+
isToxic: boolean;
|
|
344
|
+
/** Confidence score for the decision */
|
|
345
|
+
confidence: number;
|
|
346
|
+
/** Reason for the decision */
|
|
347
|
+
reason: string;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
export { type CheckProfanityResult as C, Filter as F, type HybridAnalysisResult as H, type Language as L, type Match as M, SeverityLevel as S, type ToxicityLabel as T, type FilterConfig as a, type FilteredProfanityResult as b, type ContextAwareConfig as c, type ToxicityPrediction as d, type MLAnalysisResult as e, type MLDetectorConfig as f };
|