glin-profanity 2.3.7 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -0
- package/dist/chunk-KNHWF6MX.js +5050 -0
- package/dist/index.cjs +2041 -272
- package/dist/index.d.cts +252 -87
- package/dist/index.d.ts +252 -87
- package/dist/index.js +50 -3306
- package/dist/ml/index.cjs +5513 -0
- package/dist/ml/index.d.cts +357 -0
- package/dist/ml/index.d.ts +357 -0
- package/dist/ml/index.js +557 -0
- package/dist/types-BgQe4FSE.d.cts +350 -0
- package/dist/types-BgQe4FSE.d.ts +350 -0
- package/package.json +38 -3
package/dist/index.d.cts
CHANGED
|
@@ -1,89 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
* Unified API that mirrors the Python package structure.
|
|
4
|
-
*/
|
|
5
|
-
/** Severity levels for profanity matches - unified with Python */
|
|
6
|
-
declare enum SeverityLevel {
|
|
7
|
-
EXACT = 1,
|
|
8
|
-
FUZZY = 2
|
|
9
|
-
}
|
|
10
|
-
/** Supported languages - unified list with Python */
|
|
11
|
-
type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
|
|
12
|
-
/** Represents a profanity match in text - unified with Python */
|
|
13
|
-
interface Match {
|
|
14
|
-
word: string;
|
|
15
|
-
index: number;
|
|
16
|
-
severity: SeverityLevel;
|
|
17
|
-
contextScore?: number;
|
|
18
|
-
reason?: string;
|
|
19
|
-
isWhitelisted?: boolean;
|
|
20
|
-
}
|
|
21
|
-
/** Result of profanity check operation - unified field names */
|
|
22
|
-
interface CheckProfanityResult {
|
|
23
|
-
containsProfanity: boolean;
|
|
24
|
-
profaneWords: string[];
|
|
25
|
-
processedText?: string;
|
|
26
|
-
severityMap?: Record<string, SeverityLevel>;
|
|
27
|
-
matches?: Match[];
|
|
28
|
-
contextScore?: number;
|
|
29
|
-
reason?: string;
|
|
30
|
-
}
|
|
31
|
-
/** Configuration for context-aware filtering - unified with Python */
|
|
32
|
-
interface ContextAwareConfig {
|
|
33
|
-
enableContextAware?: boolean;
|
|
34
|
-
contextWindow?: number;
|
|
35
|
-
confidenceThreshold?: number;
|
|
36
|
-
domainWhitelists?: Record<string, string[]>;
|
|
37
|
-
}
|
|
38
|
-
/** Main filter configuration options - unified with Python */
|
|
39
|
-
interface FilterConfig extends ContextAwareConfig {
|
|
40
|
-
languages?: Language[];
|
|
41
|
-
allLanguages?: boolean;
|
|
42
|
-
caseSensitive?: boolean;
|
|
43
|
-
wordBoundaries?: boolean;
|
|
44
|
-
customWords?: string[];
|
|
45
|
-
replaceWith?: string;
|
|
46
|
-
severityLevels?: boolean;
|
|
47
|
-
ignoreWords?: string[];
|
|
48
|
-
logProfanity?: boolean;
|
|
49
|
-
allowObfuscatedMatch?: boolean;
|
|
50
|
-
fuzzyToleranceLevel?: number;
|
|
51
|
-
}
|
|
52
|
-
/** Result with minimum severity filtering */
|
|
53
|
-
interface FilteredProfanityResult {
|
|
54
|
-
result: CheckProfanityResult;
|
|
55
|
-
filteredWords: string[];
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
declare class Filter {
|
|
59
|
-
private words;
|
|
60
|
-
private caseSensitive;
|
|
61
|
-
private wordBoundaries;
|
|
62
|
-
private replaceWith?;
|
|
63
|
-
private severityLevels;
|
|
64
|
-
private ignoreWords;
|
|
65
|
-
private logProfanity;
|
|
66
|
-
private allowObfuscatedMatch;
|
|
67
|
-
private fuzzyToleranceLevel;
|
|
68
|
-
private enableContextAware;
|
|
69
|
-
private contextWindow;
|
|
70
|
-
private confidenceThreshold;
|
|
71
|
-
private contextAnalyzer?;
|
|
72
|
-
private primaryLanguage;
|
|
73
|
-
constructor(config?: FilterConfig);
|
|
74
|
-
private debugLog;
|
|
75
|
-
private normalizeObfuscated;
|
|
76
|
-
private getRegex;
|
|
77
|
-
private isFuzzyToleranceMatch;
|
|
78
|
-
private evaluateSeverity;
|
|
79
|
-
isProfane(value: string): boolean;
|
|
80
|
-
matches(word: string): boolean;
|
|
81
|
-
checkProfanity(text: string): CheckProfanityResult;
|
|
82
|
-
checkProfanityWithMinSeverity(text: string, minSeverity?: SeverityLevel): {
|
|
83
|
-
filteredWords: string[];
|
|
84
|
-
result: CheckProfanityResult;
|
|
85
|
-
};
|
|
86
|
-
}
|
|
1
|
+
import { L as Language, S as SeverityLevel, C as CheckProfanityResult } from './types-BgQe4FSE.cjs';
|
|
2
|
+
export { c as ContextAwareConfig, F as Filter, a as FilterConfig, b as FilteredProfanityResult, H as HybridAnalysisResult, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, F as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-BgQe4FSE.cjs';
|
|
87
3
|
|
|
88
4
|
interface ProfanityCheckerConfig {
|
|
89
5
|
languages?: Language[];
|
|
@@ -93,6 +9,7 @@ interface ProfanityCheckerConfig {
|
|
|
93
9
|
customWords?: string[];
|
|
94
10
|
replaceWith?: string;
|
|
95
11
|
severityLevels?: boolean;
|
|
12
|
+
ignoreWords?: string[];
|
|
96
13
|
allowObfuscatedMatch?: boolean;
|
|
97
14
|
fuzzyToleranceLevel?: number;
|
|
98
15
|
minSeverity?: SeverityLevel;
|
|
@@ -117,4 +34,252 @@ declare const useProfanityChecker: (config?: ProfanityCheckerConfig) => {
|
|
|
117
34
|
isWordProfane: (word: string) => boolean;
|
|
118
35
|
};
|
|
119
36
|
|
|
120
|
-
|
|
37
|
+
/**
|
|
38
|
+
* @fileoverview Leetspeak detection and normalization utilities.
|
|
39
|
+
* Converts leetspeak/1337 speak text back to standard characters for profanity detection.
|
|
40
|
+
* @module utils/leetspeak
|
|
41
|
+
*/
|
|
42
|
+
/**
|
|
43
|
+
* Leetspeak detection intensity levels.
|
|
44
|
+
* - `basic`: Common substitutions only (0→o, 1→i, 3→e, 4→a, 5→s)
|
|
45
|
+
* - `moderate`: Basic + symbols (@→a, $→s, !→i) and repeated chars
|
|
46
|
+
* - `aggressive`: All known substitutions including multi-char patterns
|
|
47
|
+
*/
|
|
48
|
+
type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
|
|
49
|
+
/**
|
|
50
|
+
* Configuration options for leetspeak normalization.
|
|
51
|
+
*/
|
|
52
|
+
interface LeetspeakOptions {
|
|
53
|
+
/**
|
|
54
|
+
* Detection intensity level.
|
|
55
|
+
* @default 'moderate'
|
|
56
|
+
*/
|
|
57
|
+
level?: LeetspeakLevel;
|
|
58
|
+
/**
|
|
59
|
+
* Whether to collapse repeated characters (e.g., "fuuuuck" → "fuck").
|
|
60
|
+
* @default true
|
|
61
|
+
*/
|
|
62
|
+
collapseRepeated?: boolean;
|
|
63
|
+
/**
|
|
64
|
+
* Maximum allowed consecutive repeated characters before collapsing.
|
|
65
|
+
* @default 2
|
|
66
|
+
*/
|
|
67
|
+
maxRepeated?: number;
|
|
68
|
+
/**
|
|
69
|
+
* Whether to remove spaces between single characters (e.g., "f u c k" → "fuck").
|
|
70
|
+
* @default true
|
|
71
|
+
*/
|
|
72
|
+
removeSpacedChars?: boolean;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Normalizes leetspeak text to standard characters.
|
|
76
|
+
*
|
|
77
|
+
* @param text - The input text containing potential leetspeak
|
|
78
|
+
* @param options - Configuration options for normalization
|
|
79
|
+
* @returns The normalized text with leetspeak characters replaced
|
|
80
|
+
*
|
|
81
|
+
* @example
|
|
82
|
+
* ```typescript
|
|
83
|
+
* import { normalizeLeetspeak } from 'glin-profanity';
|
|
84
|
+
*
|
|
85
|
+
* normalizeLeetspeak('f4ck'); // Returns: 'fack'
|
|
86
|
+
* normalizeLeetspeak('sh!t'); // Returns: 'shit'
|
|
87
|
+
* normalizeLeetspeak('b1tch'); // Returns: 'bitch'
|
|
88
|
+
* normalizeLeetspeak('@ss'); // Returns: 'ass'
|
|
89
|
+
* normalizeLeetspeak('f u c k'); // Returns: 'fuck'
|
|
90
|
+
* normalizeLeetspeak('fuuuuck'); // Returns: 'fuck'
|
|
91
|
+
* ```
|
|
92
|
+
*/
|
|
93
|
+
declare function normalizeLeetspeak(text: string, options?: LeetspeakOptions): string;
|
|
94
|
+
/**
|
|
95
|
+
* Collapses sequences of spaced single characters into words.
|
|
96
|
+
* Handles patterns like "f u c k" → "fuck" and "s h i t" → "shit".
|
|
97
|
+
*
|
|
98
|
+
* @param text - The input text
|
|
99
|
+
* @returns Text with spaced characters collapsed
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```typescript
|
|
103
|
+
* collapseSpacedCharacters('f u c k you'); // Returns: 'fuck you'
|
|
104
|
+
* collapseSpacedCharacters('this is s h i t'); // Returns: 'this is shit'
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
declare function collapseSpacedCharacters(text: string): string;
|
|
108
|
+
/**
|
|
109
|
+
* Collapses repeated consecutive characters beyond a threshold.
|
|
110
|
+
* Handles patterns like "fuuuuck" → "fuck" and "shiiiit" → "shit".
|
|
111
|
+
*
|
|
112
|
+
* @param text - The input text
|
|
113
|
+
* @param maxRepeated - Maximum allowed consecutive repeated characters
|
|
114
|
+
* @returns Text with repeated characters collapsed
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* collapseRepeatedCharacters('fuuuuck', 2); // Returns: 'fuuck'
|
|
119
|
+
* collapseRepeatedCharacters('fuuuuck', 1); // Returns: 'fuck'
|
|
120
|
+
* ```
|
|
121
|
+
*/
|
|
122
|
+
declare function collapseRepeatedCharacters(text: string, maxRepeated?: number): string;
|
|
123
|
+
/**
|
|
124
|
+
* Detects if text contains potential leetspeak patterns.
|
|
125
|
+
* Useful for deciding whether to apply leetspeak normalization.
|
|
126
|
+
*
|
|
127
|
+
* @param text - The input text to analyze
|
|
128
|
+
* @returns True if leetspeak patterns are detected
|
|
129
|
+
*
|
|
130
|
+
* @example
|
|
131
|
+
* ```typescript
|
|
132
|
+
* containsLeetspeak('hello'); // Returns: false
|
|
133
|
+
* containsLeetspeak('h3ll0'); // Returns: true
|
|
134
|
+
* containsLeetspeak('f4ck'); // Returns: true
|
|
135
|
+
* containsLeetspeak('@ss'); // Returns: true
|
|
136
|
+
* ```
|
|
137
|
+
*/
|
|
138
|
+
declare function containsLeetspeak(text: string): boolean;
|
|
139
|
+
/**
|
|
140
|
+
* Creates a normalized variant generator for a word.
|
|
141
|
+
* Generates all possible leetspeak variants of a dictionary word.
|
|
142
|
+
*
|
|
143
|
+
* @param word - The base word to generate variants for
|
|
144
|
+
* @param level - The leetspeak level to use for variant generation
|
|
145
|
+
* @returns Array of possible leetspeak variants
|
|
146
|
+
*
|
|
147
|
+
* @example
|
|
148
|
+
* ```typescript
|
|
149
|
+
* generateLeetspeakVariants('ass');
|
|
150
|
+
* // Returns: ['ass', '@ss', 'a$$', '@$$', '4ss', '4$$', ...]
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
153
|
+
declare function generateLeetspeakVariants(word: string, level?: LeetspeakLevel): string[];
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* @fileoverview Unicode normalization utilities for profanity detection.
|
|
157
|
+
* Handles homoglyphs, full-width characters, diacritics, and other Unicode tricks.
|
|
158
|
+
* @module utils/unicode
|
|
159
|
+
*/
|
|
160
|
+
/**
|
|
161
|
+
* Configuration options for Unicode normalization.
|
|
162
|
+
*/
|
|
163
|
+
interface UnicodeNormalizationOptions {
|
|
164
|
+
/**
|
|
165
|
+
* Apply NFKD normalization to decompose characters.
|
|
166
|
+
* @default true
|
|
167
|
+
*/
|
|
168
|
+
nfkd?: boolean;
|
|
169
|
+
/**
|
|
170
|
+
* Convert homoglyphs (lookalike characters) to ASCII.
|
|
171
|
+
* @default true
|
|
172
|
+
*/
|
|
173
|
+
homoglyphs?: boolean;
|
|
174
|
+
/**
|
|
175
|
+
* Convert full-width characters to half-width.
|
|
176
|
+
* @default true
|
|
177
|
+
*/
|
|
178
|
+
fullWidth?: boolean;
|
|
179
|
+
/**
|
|
180
|
+
* Remove diacritical marks (accents, umlauts, etc.).
|
|
181
|
+
* @default true
|
|
182
|
+
*/
|
|
183
|
+
removeDiacritics?: boolean;
|
|
184
|
+
/**
|
|
185
|
+
* Remove zero-width characters (ZWJ, ZWNJ, etc.).
|
|
186
|
+
* @default true
|
|
187
|
+
*/
|
|
188
|
+
removeZeroWidth?: boolean;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Normalizes Unicode text for consistent profanity detection.
|
|
192
|
+
* Handles various Unicode tricks used to evade filters.
|
|
193
|
+
*
|
|
194
|
+
* @param text - The input text containing potential Unicode obfuscation
|
|
195
|
+
* @param options - Configuration options for normalization
|
|
196
|
+
* @returns The normalized text
|
|
197
|
+
*
|
|
198
|
+
* @example
|
|
199
|
+
* ```typescript
|
|
200
|
+
* import { normalizeUnicode } from 'glin-profanity';
|
|
201
|
+
*
|
|
202
|
+
* normalizeUnicode('fυck'); // Returns: 'fuck' (Greek upsilon → u)
|
|
203
|
+
* normalizeUnicode('fUck'); // Returns: 'fuck' (full-width U → u)
|
|
204
|
+
* normalizeUnicode('fück'); // Returns: 'fuck' (ü → u)
|
|
205
|
+
* normalizeUnicode('fùck'); // Returns: 'fuck' (ù → u)
|
|
206
|
+
* normalizeUnicode('fuck'); // Returns: 'fuck' (removes zero-width space)
|
|
207
|
+
* ```
|
|
208
|
+
*/
|
|
209
|
+
declare function normalizeUnicode(text: string, options?: UnicodeNormalizationOptions): string;
|
|
210
|
+
/**
|
|
211
|
+
* Removes zero-width and invisible characters from text.
|
|
212
|
+
*
|
|
213
|
+
* @param text - The input text
|
|
214
|
+
* @returns Text with zero-width characters removed
|
|
215
|
+
*/
|
|
216
|
+
declare function removeZeroWidthCharacters(text: string): string;
|
|
217
|
+
/**
|
|
218
|
+
* Converts full-width ASCII characters to half-width.
|
|
219
|
+
* Full-width characters (U+FF01 to U+FF5E) are used in CJK text
|
|
220
|
+
* but can also be used to evade filters.
|
|
221
|
+
*
|
|
222
|
+
* @param text - The input text
|
|
223
|
+
* @returns Text with full-width characters converted
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```typescript
|
|
227
|
+
* convertFullWidth('ABC'); // Returns: 'ABC'
|
|
228
|
+
* convertFullWidth('fuck'); // Returns: 'fuck'
|
|
229
|
+
* ```
|
|
230
|
+
*/
|
|
231
|
+
declare function convertFullWidth(text: string): string;
|
|
232
|
+
/**
|
|
233
|
+
* Converts homoglyph characters to their ASCII equivalents.
|
|
234
|
+
*
|
|
235
|
+
* @param text - The input text
|
|
236
|
+
* @returns Text with homoglyphs converted
|
|
237
|
+
*/
|
|
238
|
+
declare function convertHomoglyphs(text: string): string;
|
|
239
|
+
/**
|
|
240
|
+
* Applies NFKD normalization and optionally removes diacritical marks.
|
|
241
|
+
* NFKD decomposes characters into base characters and combining marks.
|
|
242
|
+
*
|
|
243
|
+
* @param text - The input text
|
|
244
|
+
* @param removeDiacritics - Whether to remove diacritical marks
|
|
245
|
+
* @returns Normalized text
|
|
246
|
+
*
|
|
247
|
+
* @example
|
|
248
|
+
* ```typescript
|
|
249
|
+
* normalizeNFKD('fück', true); // Returns: 'fuck'
|
|
250
|
+
* normalizeNFKD('café', true); // Returns: 'cafe'
|
|
251
|
+
* normalizeNFKD('naïve', true); // Returns: 'naive'
|
|
252
|
+
* ```
|
|
253
|
+
*/
|
|
254
|
+
declare function normalizeNFKD(text: string, removeDiacritics?: boolean): string;
|
|
255
|
+
/**
|
|
256
|
+
* Detects if text contains potential Unicode obfuscation.
|
|
257
|
+
* Useful for deciding whether to apply Unicode normalization.
|
|
258
|
+
*
|
|
259
|
+
* @param text - The input text to analyze
|
|
260
|
+
* @returns True if Unicode obfuscation patterns are detected
|
|
261
|
+
*
|
|
262
|
+
* @example
|
|
263
|
+
* ```typescript
|
|
264
|
+
* containsUnicodeObfuscation('hello'); // Returns: false
|
|
265
|
+
* containsUnicodeObfuscation('fυck'); // Returns: true (Greek letter)
|
|
266
|
+
* containsUnicodeObfuscation('fuck'); // Returns: true (zero-width)
|
|
267
|
+
* ```
|
|
268
|
+
*/
|
|
269
|
+
declare function containsUnicodeObfuscation(text: string): boolean;
|
|
270
|
+
/**
|
|
271
|
+
* Gets the character set being used in text.
|
|
272
|
+
* Helps identify mixed-script attacks (e.g., mixing Latin and Cyrillic).
|
|
273
|
+
*
|
|
274
|
+
* @param text - The input text
|
|
275
|
+
* @returns Object with detected character set information
|
|
276
|
+
*/
|
|
277
|
+
declare function detectCharacterSets(text: string): {
|
|
278
|
+
hasLatin: boolean;
|
|
279
|
+
hasCyrillic: boolean;
|
|
280
|
+
hasGreek: boolean;
|
|
281
|
+
hasFullWidth: boolean;
|
|
282
|
+
hasMixed: boolean;
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
export { CheckProfanityResult, Language, type LeetspeakLevel, type LeetspeakOptions, type ProfanityCheckResult, type ProfanityCheckerConfig, SeverityLevel, type UnicodeNormalizationOptions, checkProfanity, checkProfanityAsync, collapseRepeatedCharacters, collapseSpacedCharacters, containsLeetspeak, containsUnicodeObfuscation, convertFullWidth, convertHomoglyphs, detectCharacterSets, generateLeetspeakVariants, isWordProfane, normalizeLeetspeak, normalizeNFKD, normalizeUnicode, removeZeroWidthCharacters, useProfanityChecker };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,89 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
* Unified API that mirrors the Python package structure.
|
|
4
|
-
*/
|
|
5
|
-
/** Severity levels for profanity matches - unified with Python */
|
|
6
|
-
declare enum SeverityLevel {
|
|
7
|
-
EXACT = 1,
|
|
8
|
-
FUZZY = 2
|
|
9
|
-
}
|
|
10
|
-
/** Supported languages - unified list with Python */
|
|
11
|
-
type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
|
|
12
|
-
/** Represents a profanity match in text - unified with Python */
|
|
13
|
-
interface Match {
|
|
14
|
-
word: string;
|
|
15
|
-
index: number;
|
|
16
|
-
severity: SeverityLevel;
|
|
17
|
-
contextScore?: number;
|
|
18
|
-
reason?: string;
|
|
19
|
-
isWhitelisted?: boolean;
|
|
20
|
-
}
|
|
21
|
-
/** Result of profanity check operation - unified field names */
|
|
22
|
-
interface CheckProfanityResult {
|
|
23
|
-
containsProfanity: boolean;
|
|
24
|
-
profaneWords: string[];
|
|
25
|
-
processedText?: string;
|
|
26
|
-
severityMap?: Record<string, SeverityLevel>;
|
|
27
|
-
matches?: Match[];
|
|
28
|
-
contextScore?: number;
|
|
29
|
-
reason?: string;
|
|
30
|
-
}
|
|
31
|
-
/** Configuration for context-aware filtering - unified with Python */
|
|
32
|
-
interface ContextAwareConfig {
|
|
33
|
-
enableContextAware?: boolean;
|
|
34
|
-
contextWindow?: number;
|
|
35
|
-
confidenceThreshold?: number;
|
|
36
|
-
domainWhitelists?: Record<string, string[]>;
|
|
37
|
-
}
|
|
38
|
-
/** Main filter configuration options - unified with Python */
|
|
39
|
-
interface FilterConfig extends ContextAwareConfig {
|
|
40
|
-
languages?: Language[];
|
|
41
|
-
allLanguages?: boolean;
|
|
42
|
-
caseSensitive?: boolean;
|
|
43
|
-
wordBoundaries?: boolean;
|
|
44
|
-
customWords?: string[];
|
|
45
|
-
replaceWith?: string;
|
|
46
|
-
severityLevels?: boolean;
|
|
47
|
-
ignoreWords?: string[];
|
|
48
|
-
logProfanity?: boolean;
|
|
49
|
-
allowObfuscatedMatch?: boolean;
|
|
50
|
-
fuzzyToleranceLevel?: number;
|
|
51
|
-
}
|
|
52
|
-
/** Result with minimum severity filtering */
|
|
53
|
-
interface FilteredProfanityResult {
|
|
54
|
-
result: CheckProfanityResult;
|
|
55
|
-
filteredWords: string[];
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
declare class Filter {
|
|
59
|
-
private words;
|
|
60
|
-
private caseSensitive;
|
|
61
|
-
private wordBoundaries;
|
|
62
|
-
private replaceWith?;
|
|
63
|
-
private severityLevels;
|
|
64
|
-
private ignoreWords;
|
|
65
|
-
private logProfanity;
|
|
66
|
-
private allowObfuscatedMatch;
|
|
67
|
-
private fuzzyToleranceLevel;
|
|
68
|
-
private enableContextAware;
|
|
69
|
-
private contextWindow;
|
|
70
|
-
private confidenceThreshold;
|
|
71
|
-
private contextAnalyzer?;
|
|
72
|
-
private primaryLanguage;
|
|
73
|
-
constructor(config?: FilterConfig);
|
|
74
|
-
private debugLog;
|
|
75
|
-
private normalizeObfuscated;
|
|
76
|
-
private getRegex;
|
|
77
|
-
private isFuzzyToleranceMatch;
|
|
78
|
-
private evaluateSeverity;
|
|
79
|
-
isProfane(value: string): boolean;
|
|
80
|
-
matches(word: string): boolean;
|
|
81
|
-
checkProfanity(text: string): CheckProfanityResult;
|
|
82
|
-
checkProfanityWithMinSeverity(text: string, minSeverity?: SeverityLevel): {
|
|
83
|
-
filteredWords: string[];
|
|
84
|
-
result: CheckProfanityResult;
|
|
85
|
-
};
|
|
86
|
-
}
|
|
1
|
+
import { L as Language, S as SeverityLevel, C as CheckProfanityResult } from './types-BgQe4FSE.js';
|
|
2
|
+
export { c as ContextAwareConfig, F as Filter, a as FilterConfig, b as FilteredProfanityResult, H as HybridAnalysisResult, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, F as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-BgQe4FSE.js';
|
|
87
3
|
|
|
88
4
|
interface ProfanityCheckerConfig {
|
|
89
5
|
languages?: Language[];
|
|
@@ -93,6 +9,7 @@ interface ProfanityCheckerConfig {
|
|
|
93
9
|
customWords?: string[];
|
|
94
10
|
replaceWith?: string;
|
|
95
11
|
severityLevels?: boolean;
|
|
12
|
+
ignoreWords?: string[];
|
|
96
13
|
allowObfuscatedMatch?: boolean;
|
|
97
14
|
fuzzyToleranceLevel?: number;
|
|
98
15
|
minSeverity?: SeverityLevel;
|
|
@@ -117,4 +34,252 @@ declare const useProfanityChecker: (config?: ProfanityCheckerConfig) => {
|
|
|
117
34
|
isWordProfane: (word: string) => boolean;
|
|
118
35
|
};
|
|
119
36
|
|
|
120
|
-
|
|
37
|
+
/**
|
|
38
|
+
* @fileoverview Leetspeak detection and normalization utilities.
|
|
39
|
+
* Converts leetspeak/1337 speak text back to standard characters for profanity detection.
|
|
40
|
+
* @module utils/leetspeak
|
|
41
|
+
*/
|
|
42
|
+
/**
|
|
43
|
+
* Leetspeak detection intensity levels.
|
|
44
|
+
* - `basic`: Common substitutions only (0→o, 1→i, 3→e, 4→a, 5→s)
|
|
45
|
+
* - `moderate`: Basic + symbols (@→a, $→s, !→i) and repeated chars
|
|
46
|
+
* - `aggressive`: All known substitutions including multi-char patterns
|
|
47
|
+
*/
|
|
48
|
+
type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
|
|
49
|
+
/**
|
|
50
|
+
* Configuration options for leetspeak normalization.
|
|
51
|
+
*/
|
|
52
|
+
interface LeetspeakOptions {
|
|
53
|
+
/**
|
|
54
|
+
* Detection intensity level.
|
|
55
|
+
* @default 'moderate'
|
|
56
|
+
*/
|
|
57
|
+
level?: LeetspeakLevel;
|
|
58
|
+
/**
|
|
59
|
+
* Whether to collapse repeated characters (e.g., "fuuuuck" → "fuck").
|
|
60
|
+
* @default true
|
|
61
|
+
*/
|
|
62
|
+
collapseRepeated?: boolean;
|
|
63
|
+
/**
|
|
64
|
+
* Maximum allowed consecutive repeated characters before collapsing.
|
|
65
|
+
* @default 2
|
|
66
|
+
*/
|
|
67
|
+
maxRepeated?: number;
|
|
68
|
+
/**
|
|
69
|
+
* Whether to remove spaces between single characters (e.g., "f u c k" → "fuck").
|
|
70
|
+
* @default true
|
|
71
|
+
*/
|
|
72
|
+
removeSpacedChars?: boolean;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Normalizes leetspeak text to standard characters.
|
|
76
|
+
*
|
|
77
|
+
* @param text - The input text containing potential leetspeak
|
|
78
|
+
* @param options - Configuration options for normalization
|
|
79
|
+
* @returns The normalized text with leetspeak characters replaced
|
|
80
|
+
*
|
|
81
|
+
* @example
|
|
82
|
+
* ```typescript
|
|
83
|
+
* import { normalizeLeetspeak } from 'glin-profanity';
|
|
84
|
+
*
|
|
85
|
+
* normalizeLeetspeak('f4ck'); // Returns: 'fack'
|
|
86
|
+
* normalizeLeetspeak('sh!t'); // Returns: 'shit'
|
|
87
|
+
* normalizeLeetspeak('b1tch'); // Returns: 'bitch'
|
|
88
|
+
* normalizeLeetspeak('@ss'); // Returns: 'ass'
|
|
89
|
+
* normalizeLeetspeak('f u c k'); // Returns: 'fuck'
|
|
90
|
+
* normalizeLeetspeak('fuuuuck'); // Returns: 'fuck'
|
|
91
|
+
* ```
|
|
92
|
+
*/
|
|
93
|
+
declare function normalizeLeetspeak(text: string, options?: LeetspeakOptions): string;
|
|
94
|
+
/**
|
|
95
|
+
* Collapses sequences of spaced single characters into words.
|
|
96
|
+
* Handles patterns like "f u c k" → "fuck" and "s h i t" → "shit".
|
|
97
|
+
*
|
|
98
|
+
* @param text - The input text
|
|
99
|
+
* @returns Text with spaced characters collapsed
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```typescript
|
|
103
|
+
* collapseSpacedCharacters('f u c k you'); // Returns: 'fuck you'
|
|
104
|
+
* collapseSpacedCharacters('this is s h i t'); // Returns: 'this is shit'
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
declare function collapseSpacedCharacters(text: string): string;
|
|
108
|
+
/**
|
|
109
|
+
* Collapses repeated consecutive characters beyond a threshold.
|
|
110
|
+
* Handles patterns like "fuuuuck" → "fuck" and "shiiiit" → "shit".
|
|
111
|
+
*
|
|
112
|
+
* @param text - The input text
|
|
113
|
+
* @param maxRepeated - Maximum allowed consecutive repeated characters
|
|
114
|
+
* @returns Text with repeated characters collapsed
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* collapseRepeatedCharacters('fuuuuck', 2); // Returns: 'fuuck'
|
|
119
|
+
* collapseRepeatedCharacters('fuuuuck', 1); // Returns: 'fuck'
|
|
120
|
+
* ```
|
|
121
|
+
*/
|
|
122
|
+
declare function collapseRepeatedCharacters(text: string, maxRepeated?: number): string;
|
|
123
|
+
/**
|
|
124
|
+
* Detects if text contains potential leetspeak patterns.
|
|
125
|
+
* Useful for deciding whether to apply leetspeak normalization.
|
|
126
|
+
*
|
|
127
|
+
* @param text - The input text to analyze
|
|
128
|
+
* @returns True if leetspeak patterns are detected
|
|
129
|
+
*
|
|
130
|
+
* @example
|
|
131
|
+
* ```typescript
|
|
132
|
+
* containsLeetspeak('hello'); // Returns: false
|
|
133
|
+
* containsLeetspeak('h3ll0'); // Returns: true
|
|
134
|
+
* containsLeetspeak('f4ck'); // Returns: true
|
|
135
|
+
* containsLeetspeak('@ss'); // Returns: true
|
|
136
|
+
* ```
|
|
137
|
+
*/
|
|
138
|
+
declare function containsLeetspeak(text: string): boolean;
|
|
139
|
+
/**
|
|
140
|
+
* Creates a normalized variant generator for a word.
|
|
141
|
+
* Generates all possible leetspeak variants of a dictionary word.
|
|
142
|
+
*
|
|
143
|
+
* @param word - The base word to generate variants for
|
|
144
|
+
* @param level - The leetspeak level to use for variant generation
|
|
145
|
+
* @returns Array of possible leetspeak variants
|
|
146
|
+
*
|
|
147
|
+
* @example
|
|
148
|
+
* ```typescript
|
|
149
|
+
* generateLeetspeakVariants('ass');
|
|
150
|
+
* // Returns: ['ass', '@ss', 'a$$', '@$$', '4ss', '4$$', ...]
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
153
|
+
declare function generateLeetspeakVariants(word: string, level?: LeetspeakLevel): string[];
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* @fileoverview Unicode normalization utilities for profanity detection.
|
|
157
|
+
* Handles homoglyphs, full-width characters, diacritics, and other Unicode tricks.
|
|
158
|
+
* @module utils/unicode
|
|
159
|
+
*/
|
|
160
|
+
/**
|
|
161
|
+
* Configuration options for Unicode normalization.
|
|
162
|
+
*/
|
|
163
|
+
interface UnicodeNormalizationOptions {
|
|
164
|
+
/**
|
|
165
|
+
* Apply NFKD normalization to decompose characters.
|
|
166
|
+
* @default true
|
|
167
|
+
*/
|
|
168
|
+
nfkd?: boolean;
|
|
169
|
+
/**
|
|
170
|
+
* Convert homoglyphs (lookalike characters) to ASCII.
|
|
171
|
+
* @default true
|
|
172
|
+
*/
|
|
173
|
+
homoglyphs?: boolean;
|
|
174
|
+
/**
|
|
175
|
+
* Convert full-width characters to half-width.
|
|
176
|
+
* @default true
|
|
177
|
+
*/
|
|
178
|
+
fullWidth?: boolean;
|
|
179
|
+
/**
|
|
180
|
+
* Remove diacritical marks (accents, umlauts, etc.).
|
|
181
|
+
* @default true
|
|
182
|
+
*/
|
|
183
|
+
removeDiacritics?: boolean;
|
|
184
|
+
/**
|
|
185
|
+
* Remove zero-width characters (ZWJ, ZWNJ, etc.).
|
|
186
|
+
* @default true
|
|
187
|
+
*/
|
|
188
|
+
removeZeroWidth?: boolean;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Normalizes Unicode text for consistent profanity detection.
|
|
192
|
+
* Handles various Unicode tricks used to evade filters.
|
|
193
|
+
*
|
|
194
|
+
* @param text - The input text containing potential Unicode obfuscation
|
|
195
|
+
* @param options - Configuration options for normalization
|
|
196
|
+
* @returns The normalized text
|
|
197
|
+
*
|
|
198
|
+
* @example
|
|
199
|
+
* ```typescript
|
|
200
|
+
* import { normalizeUnicode } from 'glin-profanity';
|
|
201
|
+
*
|
|
202
|
+
* normalizeUnicode('fυck'); // Returns: 'fuck' (Greek upsilon → u)
|
|
203
|
+
* normalizeUnicode('fUck'); // Returns: 'fuck' (full-width U → u)
|
|
204
|
+
* normalizeUnicode('fück'); // Returns: 'fuck' (ü → u)
|
|
205
|
+
* normalizeUnicode('fùck'); // Returns: 'fuck' (ù → u)
|
|
206
|
+
* normalizeUnicode('fuck'); // Returns: 'fuck' (removes zero-width space)
|
|
207
|
+
* ```
|
|
208
|
+
*/
|
|
209
|
+
declare function normalizeUnicode(text: string, options?: UnicodeNormalizationOptions): string;
|
|
210
|
+
/**
|
|
211
|
+
* Removes zero-width and invisible characters from text.
|
|
212
|
+
*
|
|
213
|
+
* @param text - The input text
|
|
214
|
+
* @returns Text with zero-width characters removed
|
|
215
|
+
*/
|
|
216
|
+
declare function removeZeroWidthCharacters(text: string): string;
|
|
217
|
+
/**
|
|
218
|
+
* Converts full-width ASCII characters to half-width.
|
|
219
|
+
* Full-width characters (U+FF01 to U+FF5E) are used in CJK text
|
|
220
|
+
* but can also be used to evade filters.
|
|
221
|
+
*
|
|
222
|
+
* @param text - The input text
|
|
223
|
+
* @returns Text with full-width characters converted
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```typescript
|
|
227
|
+
* convertFullWidth('ABC'); // Returns: 'ABC'
|
|
228
|
+
* convertFullWidth('fuck'); // Returns: 'fuck'
|
|
229
|
+
* ```
|
|
230
|
+
*/
|
|
231
|
+
declare function convertFullWidth(text: string): string;
|
|
232
|
+
/**
|
|
233
|
+
* Converts homoglyph characters to their ASCII equivalents.
|
|
234
|
+
*
|
|
235
|
+
* @param text - The input text
|
|
236
|
+
* @returns Text with homoglyphs converted
|
|
237
|
+
*/
|
|
238
|
+
declare function convertHomoglyphs(text: string): string;
|
|
239
|
+
/**
|
|
240
|
+
* Applies NFKD normalization and optionally removes diacritical marks.
|
|
241
|
+
* NFKD decomposes characters into base characters and combining marks.
|
|
242
|
+
*
|
|
243
|
+
* @param text - The input text
|
|
244
|
+
* @param removeDiacritics - Whether to remove diacritical marks
|
|
245
|
+
* @returns Normalized text
|
|
246
|
+
*
|
|
247
|
+
* @example
|
|
248
|
+
* ```typescript
|
|
249
|
+
* normalizeNFKD('fück', true); // Returns: 'fuck'
|
|
250
|
+
* normalizeNFKD('café', true); // Returns: 'cafe'
|
|
251
|
+
* normalizeNFKD('naïve', true); // Returns: 'naive'
|
|
252
|
+
* ```
|
|
253
|
+
*/
|
|
254
|
+
declare function normalizeNFKD(text: string, removeDiacritics?: boolean): string;
|
|
255
|
+
/**
|
|
256
|
+
* Detects if text contains potential Unicode obfuscation.
|
|
257
|
+
* Useful for deciding whether to apply Unicode normalization.
|
|
258
|
+
*
|
|
259
|
+
* @param text - The input text to analyze
|
|
260
|
+
* @returns True if Unicode obfuscation patterns are detected
|
|
261
|
+
*
|
|
262
|
+
* @example
|
|
263
|
+
* ```typescript
|
|
264
|
+
* containsUnicodeObfuscation('hello'); // Returns: false
|
|
265
|
+
* containsUnicodeObfuscation('fυck'); // Returns: true (Greek letter)
|
|
266
|
+
* containsUnicodeObfuscation('fuck'); // Returns: true (zero-width)
|
|
267
|
+
* ```
|
|
268
|
+
*/
|
|
269
|
+
declare function containsUnicodeObfuscation(text: string): boolean;
|
|
270
|
+
/**
|
|
271
|
+
* Gets the character set being used in text.
|
|
272
|
+
* Helps identify mixed-script attacks (e.g., mixing Latin and Cyrillic).
|
|
273
|
+
*
|
|
274
|
+
* @param text - The input text
|
|
275
|
+
* @returns Object with detected character set information
|
|
276
|
+
*/
|
|
277
|
+
declare function detectCharacterSets(text: string): {
|
|
278
|
+
hasLatin: boolean;
|
|
279
|
+
hasCyrillic: boolean;
|
|
280
|
+
hasGreek: boolean;
|
|
281
|
+
hasFullWidth: boolean;
|
|
282
|
+
hasMixed: boolean;
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
export { CheckProfanityResult, Language, type LeetspeakLevel, type LeetspeakOptions, type ProfanityCheckResult, type ProfanityCheckerConfig, SeverityLevel, type UnicodeNormalizationOptions, checkProfanity, checkProfanityAsync, collapseRepeatedCharacters, collapseSpacedCharacters, containsLeetspeak, containsUnicodeObfuscation, convertFullWidth, convertHomoglyphs, detectCharacterSets, generateLeetspeakVariants, isWordProfane, normalizeLeetspeak, normalizeNFKD, normalizeUnicode, removeZeroWidthCharacters, useProfanityChecker };
|