glin-profanity 2.3.8 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1,89 +1,5 @@
1
- /**
2
- * Type definitions for glin-profanity JavaScript/TypeScript package.
3
- * Unified API that mirrors the Python package structure.
4
- */
5
- /** Severity levels for profanity matches - unified with Python */
6
- declare enum SeverityLevel {
7
- EXACT = 1,
8
- FUZZY = 2
9
- }
10
- /** Supported languages - unified list with Python */
11
- type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
12
- /** Represents a profanity match in text - unified with Python */
13
- interface Match {
14
- word: string;
15
- index: number;
16
- severity: SeverityLevel;
17
- contextScore?: number;
18
- reason?: string;
19
- isWhitelisted?: boolean;
20
- }
21
- /** Result of profanity check operation - unified field names */
22
- interface CheckProfanityResult {
23
- containsProfanity: boolean;
24
- profaneWords: string[];
25
- processedText?: string;
26
- severityMap?: Record<string, SeverityLevel>;
27
- matches?: Match[];
28
- contextScore?: number;
29
- reason?: string;
30
- }
31
- /** Configuration for context-aware filtering - unified with Python */
32
- interface ContextAwareConfig {
33
- enableContextAware?: boolean;
34
- contextWindow?: number;
35
- confidenceThreshold?: number;
36
- domainWhitelists?: Record<string, string[]>;
37
- }
38
- /** Main filter configuration options - unified with Python */
39
- interface FilterConfig extends ContextAwareConfig {
40
- languages?: Language[];
41
- allLanguages?: boolean;
42
- caseSensitive?: boolean;
43
- wordBoundaries?: boolean;
44
- customWords?: string[];
45
- replaceWith?: string;
46
- severityLevels?: boolean;
47
- ignoreWords?: string[];
48
- logProfanity?: boolean;
49
- allowObfuscatedMatch?: boolean;
50
- fuzzyToleranceLevel?: number;
51
- }
52
- /** Result with minimum severity filtering */
53
- interface FilteredProfanityResult {
54
- result: CheckProfanityResult;
55
- filteredWords: string[];
56
- }
57
-
58
- declare class Filter {
59
- private words;
60
- private caseSensitive;
61
- private wordBoundaries;
62
- private replaceWith?;
63
- private severityLevels;
64
- private ignoreWords;
65
- private logProfanity;
66
- private allowObfuscatedMatch;
67
- private fuzzyToleranceLevel;
68
- private enableContextAware;
69
- private contextWindow;
70
- private confidenceThreshold;
71
- private contextAnalyzer?;
72
- private primaryLanguage;
73
- constructor(config?: FilterConfig);
74
- private debugLog;
75
- private normalizeObfuscated;
76
- private getRegex;
77
- private isFuzzyToleranceMatch;
78
- private evaluateSeverity;
79
- isProfane(value: string): boolean;
80
- matches(word: string): boolean;
81
- checkProfanity(text: string): CheckProfanityResult;
82
- checkProfanityWithMinSeverity(text: string, minSeverity?: SeverityLevel): {
83
- filteredWords: string[];
84
- result: CheckProfanityResult;
85
- };
86
- }
1
+ import { L as Language, S as SeverityLevel, C as CheckProfanityResult } from './types-BgQe4FSE.cjs';
2
+ export { c as ContextAwareConfig, F as Filter, a as FilterConfig, b as FilteredProfanityResult, H as HybridAnalysisResult, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, F as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-BgQe4FSE.cjs';
87
3
 
88
4
  interface ProfanityCheckerConfig {
89
5
  languages?: Language[];
@@ -93,6 +9,7 @@ interface ProfanityCheckerConfig {
93
9
  customWords?: string[];
94
10
  replaceWith?: string;
95
11
  severityLevels?: boolean;
12
+ ignoreWords?: string[];
96
13
  allowObfuscatedMatch?: boolean;
97
14
  fuzzyToleranceLevel?: number;
98
15
  minSeverity?: SeverityLevel;
@@ -117,4 +34,252 @@ declare const useProfanityChecker: (config?: ProfanityCheckerConfig) => {
117
34
  isWordProfane: (word: string) => boolean;
118
35
  };
119
36
 
120
- export { type CheckProfanityResult, type ContextAwareConfig, Filter, type FilterConfig, type FilteredProfanityResult, type Language, type Match, type ProfanityCheckResult, type ProfanityCheckerConfig, Filter as ProfanityFilter, SeverityLevel, checkProfanity, checkProfanityAsync, isWordProfane, useProfanityChecker };
37
+ /**
38
+ * @fileoverview Leetspeak detection and normalization utilities.
39
+ * Converts leetspeak/1337 speak text back to standard characters for profanity detection.
40
+ * @module utils/leetspeak
41
+ */
42
+ /**
43
+ * Leetspeak detection intensity levels.
44
+ * - `basic`: Common substitutions only (0→o, 1→i, 3→e, 4→a, 5→s)
45
+ * - `moderate`: Basic + symbols (@→a, $→s, !→i) and repeated chars
46
+ * - `aggressive`: All known substitutions including multi-char patterns
47
+ */
48
+ type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
49
+ /**
50
+ * Configuration options for leetspeak normalization.
51
+ */
52
+ interface LeetspeakOptions {
53
+ /**
54
+ * Detection intensity level.
55
+ * @default 'moderate'
56
+ */
57
+ level?: LeetspeakLevel;
58
+ /**
59
+ * Whether to collapse repeated characters (e.g., "fuuuuck" → "fuck").
60
+ * @default true
61
+ */
62
+ collapseRepeated?: boolean;
63
+ /**
64
+ * Maximum allowed consecutive repeated characters before collapsing.
65
+ * @default 2
66
+ */
67
+ maxRepeated?: number;
68
+ /**
69
+ * Whether to remove spaces between single characters (e.g., "f u c k" → "fuck").
70
+ * @default true
71
+ */
72
+ removeSpacedChars?: boolean;
73
+ }
74
+ /**
75
+ * Normalizes leetspeak text to standard characters.
76
+ *
77
+ * @param text - The input text containing potential leetspeak
78
+ * @param options - Configuration options for normalization
79
+ * @returns The normalized text with leetspeak characters replaced
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * import { normalizeLeetspeak } from 'glin-profanity';
84
+ *
85
+ * normalizeLeetspeak('f4ck'); // Returns: 'fack'
86
+ * normalizeLeetspeak('sh!t'); // Returns: 'shit'
87
+ * normalizeLeetspeak('b1tch'); // Returns: 'bitch'
88
+ * normalizeLeetspeak('@ss'); // Returns: 'ass'
89
+ * normalizeLeetspeak('f u c k'); // Returns: 'fuck'
90
+ * normalizeLeetspeak('fuuuuck'); // Returns: 'fuck'
91
+ * ```
92
+ */
93
+ declare function normalizeLeetspeak(text: string, options?: LeetspeakOptions): string;
94
+ /**
95
+ * Collapses sequences of spaced single characters into words.
96
+ * Handles patterns like "f u c k" → "fuck" and "s h i t" → "shit".
97
+ *
98
+ * @param text - The input text
99
+ * @returns Text with spaced characters collapsed
100
+ *
101
+ * @example
102
+ * ```typescript
103
+ * collapseSpacedCharacters('f u c k you'); // Returns: 'fuck you'
104
+ * collapseSpacedCharacters('this is s h i t'); // Returns: 'this is shit'
105
+ * ```
106
+ */
107
+ declare function collapseSpacedCharacters(text: string): string;
108
+ /**
109
+ * Collapses repeated consecutive characters beyond a threshold.
110
+ * Handles patterns like "fuuuuck" → "fuck" and "shiiiit" → "shit".
111
+ *
112
+ * @param text - The input text
113
+ * @param maxRepeated - Maximum allowed consecutive repeated characters
114
+ * @returns Text with repeated characters collapsed
115
+ *
116
+ * @example
117
+ * ```typescript
118
+ * collapseRepeatedCharacters('fuuuuck', 2); // Returns: 'fuuck'
119
+ * collapseRepeatedCharacters('fuuuuck', 1); // Returns: 'fuck'
120
+ * ```
121
+ */
122
+ declare function collapseRepeatedCharacters(text: string, maxRepeated?: number): string;
123
+ /**
124
+ * Detects if text contains potential leetspeak patterns.
125
+ * Useful for deciding whether to apply leetspeak normalization.
126
+ *
127
+ * @param text - The input text to analyze
128
+ * @returns True if leetspeak patterns are detected
129
+ *
130
+ * @example
131
+ * ```typescript
132
+ * containsLeetspeak('hello'); // Returns: false
133
+ * containsLeetspeak('h3ll0'); // Returns: true
134
+ * containsLeetspeak('f4ck'); // Returns: true
135
+ * containsLeetspeak('@ss'); // Returns: true
136
+ * ```
137
+ */
138
+ declare function containsLeetspeak(text: string): boolean;
139
+ /**
140
+ * Creates a normalized variant generator for a word.
141
+ * Generates all possible leetspeak variants of a dictionary word.
142
+ *
143
+ * @param word - The base word to generate variants for
144
+ * @param level - The leetspeak level to use for variant generation
145
+ * @returns Array of possible leetspeak variants
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * generateLeetspeakVariants('ass');
150
+ * // Returns: ['ass', '@ss', 'a$$', '@$$', '4ss', '4$$', ...]
151
+ * ```
152
+ */
153
+ declare function generateLeetspeakVariants(word: string, level?: LeetspeakLevel): string[];
154
+
155
+ /**
156
+ * @fileoverview Unicode normalization utilities for profanity detection.
157
+ * Handles homoglyphs, full-width characters, diacritics, and other Unicode tricks.
158
+ * @module utils/unicode
159
+ */
160
+ /**
161
+ * Configuration options for Unicode normalization.
162
+ */
163
+ interface UnicodeNormalizationOptions {
164
+ /**
165
+ * Apply NFKD normalization to decompose characters.
166
+ * @default true
167
+ */
168
+ nfkd?: boolean;
169
+ /**
170
+ * Convert homoglyphs (lookalike characters) to ASCII.
171
+ * @default true
172
+ */
173
+ homoglyphs?: boolean;
174
+ /**
175
+ * Convert full-width characters to half-width.
176
+ * @default true
177
+ */
178
+ fullWidth?: boolean;
179
+ /**
180
+ * Remove diacritical marks (accents, umlauts, etc.).
181
+ * @default true
182
+ */
183
+ removeDiacritics?: boolean;
184
+ /**
185
+ * Remove zero-width characters (ZWJ, ZWNJ, etc.).
186
+ * @default true
187
+ */
188
+ removeZeroWidth?: boolean;
189
+ }
190
+ /**
191
+ * Normalizes Unicode text for consistent profanity detection.
192
+ * Handles various Unicode tricks used to evade filters.
193
+ *
194
+ * @param text - The input text containing potential Unicode obfuscation
195
+ * @param options - Configuration options for normalization
196
+ * @returns The normalized text
197
+ *
198
+ * @example
199
+ * ```typescript
200
+ * import { normalizeUnicode } from 'glin-profanity';
201
+ *
202
+ * normalizeUnicode('fυck'); // Returns: 'fuck' (Greek upsilon → u)
203
+ * normalizeUnicode('fUck'); // Returns: 'fuck' (full-width U → u)
204
+ * normalizeUnicode('fück'); // Returns: 'fuck' (ü → u)
205
+ * normalizeUnicode('fùck'); // Returns: 'fuck' (ù → u)
206
+ * normalizeUnicode('f​uck'); // Returns: 'fuck' (removes zero-width space)
207
+ * ```
208
+ */
209
+ declare function normalizeUnicode(text: string, options?: UnicodeNormalizationOptions): string;
210
+ /**
211
+ * Removes zero-width and invisible characters from text.
212
+ *
213
+ * @param text - The input text
214
+ * @returns Text with zero-width characters removed
215
+ */
216
+ declare function removeZeroWidthCharacters(text: string): string;
217
+ /**
218
+ * Converts full-width ASCII characters to half-width.
219
+ * Full-width characters (U+FF01 to U+FF5E) are used in CJK text
220
+ * but can also be used to evade filters.
221
+ *
222
+ * @param text - The input text
223
+ * @returns Text with full-width characters converted
224
+ *
225
+ * @example
226
+ * ```typescript
227
+ * convertFullWidth('ABC'); // Returns: 'ABC'
228
+ * convertFullWidth('fuck'); // Returns: 'fuck'
229
+ * ```
230
+ */
231
+ declare function convertFullWidth(text: string): string;
232
+ /**
233
+ * Converts homoglyph characters to their ASCII equivalents.
234
+ *
235
+ * @param text - The input text
236
+ * @returns Text with homoglyphs converted
237
+ */
238
+ declare function convertHomoglyphs(text: string): string;
239
+ /**
240
+ * Applies NFKD normalization and optionally removes diacritical marks.
241
+ * NFKD decomposes characters into base characters and combining marks.
242
+ *
243
+ * @param text - The input text
244
+ * @param removeDiacritics - Whether to remove diacritical marks
245
+ * @returns Normalized text
246
+ *
247
+ * @example
248
+ * ```typescript
249
+ * normalizeNFKD('fück', true); // Returns: 'fuck'
250
+ * normalizeNFKD('café', true); // Returns: 'cafe'
251
+ * normalizeNFKD('naïve', true); // Returns: 'naive'
252
+ * ```
253
+ */
254
+ declare function normalizeNFKD(text: string, removeDiacritics?: boolean): string;
255
+ /**
256
+ * Detects if text contains potential Unicode obfuscation.
257
+ * Useful for deciding whether to apply Unicode normalization.
258
+ *
259
+ * @param text - The input text to analyze
260
+ * @returns True if Unicode obfuscation patterns are detected
261
+ *
262
+ * @example
263
+ * ```typescript
264
+ * containsUnicodeObfuscation('hello'); // Returns: false
265
+ * containsUnicodeObfuscation('fυck'); // Returns: true (Greek letter)
266
+ * containsUnicodeObfuscation('f​uck'); // Returns: true (zero-width)
267
+ * ```
268
+ */
269
+ declare function containsUnicodeObfuscation(text: string): boolean;
270
+ /**
271
+ * Gets the character set being used in text.
272
+ * Helps identify mixed-script attacks (e.g., mixing Latin and Cyrillic).
273
+ *
274
+ * @param text - The input text
275
+ * @returns Object with detected character set information
276
+ */
277
+ declare function detectCharacterSets(text: string): {
278
+ hasLatin: boolean;
279
+ hasCyrillic: boolean;
280
+ hasGreek: boolean;
281
+ hasFullWidth: boolean;
282
+ hasMixed: boolean;
283
+ };
284
+
285
+ export { CheckProfanityResult, Language, type LeetspeakLevel, type LeetspeakOptions, type ProfanityCheckResult, type ProfanityCheckerConfig, SeverityLevel, type UnicodeNormalizationOptions, checkProfanity, checkProfanityAsync, collapseRepeatedCharacters, collapseSpacedCharacters, containsLeetspeak, containsUnicodeObfuscation, convertFullWidth, convertHomoglyphs, detectCharacterSets, generateLeetspeakVariants, isWordProfane, normalizeLeetspeak, normalizeNFKD, normalizeUnicode, removeZeroWidthCharacters, useProfanityChecker };
package/dist/index.d.ts CHANGED
@@ -1,89 +1,5 @@
1
- /**
2
- * Type definitions for glin-profanity JavaScript/TypeScript package.
3
- * Unified API that mirrors the Python package structure.
4
- */
5
- /** Severity levels for profanity matches - unified with Python */
6
- declare enum SeverityLevel {
7
- EXACT = 1,
8
- FUZZY = 2
9
- }
10
- /** Supported languages - unified list with Python */
11
- type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
12
- /** Represents a profanity match in text - unified with Python */
13
- interface Match {
14
- word: string;
15
- index: number;
16
- severity: SeverityLevel;
17
- contextScore?: number;
18
- reason?: string;
19
- isWhitelisted?: boolean;
20
- }
21
- /** Result of profanity check operation - unified field names */
22
- interface CheckProfanityResult {
23
- containsProfanity: boolean;
24
- profaneWords: string[];
25
- processedText?: string;
26
- severityMap?: Record<string, SeverityLevel>;
27
- matches?: Match[];
28
- contextScore?: number;
29
- reason?: string;
30
- }
31
- /** Configuration for context-aware filtering - unified with Python */
32
- interface ContextAwareConfig {
33
- enableContextAware?: boolean;
34
- contextWindow?: number;
35
- confidenceThreshold?: number;
36
- domainWhitelists?: Record<string, string[]>;
37
- }
38
- /** Main filter configuration options - unified with Python */
39
- interface FilterConfig extends ContextAwareConfig {
40
- languages?: Language[];
41
- allLanguages?: boolean;
42
- caseSensitive?: boolean;
43
- wordBoundaries?: boolean;
44
- customWords?: string[];
45
- replaceWith?: string;
46
- severityLevels?: boolean;
47
- ignoreWords?: string[];
48
- logProfanity?: boolean;
49
- allowObfuscatedMatch?: boolean;
50
- fuzzyToleranceLevel?: number;
51
- }
52
- /** Result with minimum severity filtering */
53
- interface FilteredProfanityResult {
54
- result: CheckProfanityResult;
55
- filteredWords: string[];
56
- }
57
-
58
- declare class Filter {
59
- private words;
60
- private caseSensitive;
61
- private wordBoundaries;
62
- private replaceWith?;
63
- private severityLevels;
64
- private ignoreWords;
65
- private logProfanity;
66
- private allowObfuscatedMatch;
67
- private fuzzyToleranceLevel;
68
- private enableContextAware;
69
- private contextWindow;
70
- private confidenceThreshold;
71
- private contextAnalyzer?;
72
- private primaryLanguage;
73
- constructor(config?: FilterConfig);
74
- private debugLog;
75
- private normalizeObfuscated;
76
- private getRegex;
77
- private isFuzzyToleranceMatch;
78
- private evaluateSeverity;
79
- isProfane(value: string): boolean;
80
- matches(word: string): boolean;
81
- checkProfanity(text: string): CheckProfanityResult;
82
- checkProfanityWithMinSeverity(text: string, minSeverity?: SeverityLevel): {
83
- filteredWords: string[];
84
- result: CheckProfanityResult;
85
- };
86
- }
1
+ import { L as Language, S as SeverityLevel, C as CheckProfanityResult } from './types-BgQe4FSE.js';
2
+ export { c as ContextAwareConfig, F as Filter, a as FilterConfig, b as FilteredProfanityResult, H as HybridAnalysisResult, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, F as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-BgQe4FSE.js';
87
3
 
88
4
  interface ProfanityCheckerConfig {
89
5
  languages?: Language[];
@@ -93,6 +9,7 @@ interface ProfanityCheckerConfig {
93
9
  customWords?: string[];
94
10
  replaceWith?: string;
95
11
  severityLevels?: boolean;
12
+ ignoreWords?: string[];
96
13
  allowObfuscatedMatch?: boolean;
97
14
  fuzzyToleranceLevel?: number;
98
15
  minSeverity?: SeverityLevel;
@@ -117,4 +34,252 @@ declare const useProfanityChecker: (config?: ProfanityCheckerConfig) => {
117
34
  isWordProfane: (word: string) => boolean;
118
35
  };
119
36
 
120
- export { type CheckProfanityResult, type ContextAwareConfig, Filter, type FilterConfig, type FilteredProfanityResult, type Language, type Match, type ProfanityCheckResult, type ProfanityCheckerConfig, Filter as ProfanityFilter, SeverityLevel, checkProfanity, checkProfanityAsync, isWordProfane, useProfanityChecker };
37
+ /**
38
+ * @fileoverview Leetspeak detection and normalization utilities.
39
+ * Converts leetspeak/1337 speak text back to standard characters for profanity detection.
40
+ * @module utils/leetspeak
41
+ */
42
+ /**
43
+ * Leetspeak detection intensity levels.
44
+ * - `basic`: Common substitutions only (0→o, 1→i, 3→e, 4→a, 5→s)
45
+ * - `moderate`: Basic + symbols (@→a, $→s, !→i) and repeated chars
46
+ * - `aggressive`: All known substitutions including multi-char patterns
47
+ */
48
+ type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
49
+ /**
50
+ * Configuration options for leetspeak normalization.
51
+ */
52
+ interface LeetspeakOptions {
53
+ /**
54
+ * Detection intensity level.
55
+ * @default 'moderate'
56
+ */
57
+ level?: LeetspeakLevel;
58
+ /**
59
+ * Whether to collapse repeated characters (e.g., "fuuuuck" → "fuck").
60
+ * @default true
61
+ */
62
+ collapseRepeated?: boolean;
63
+ /**
64
+ * Maximum allowed consecutive repeated characters before collapsing.
65
+ * @default 2
66
+ */
67
+ maxRepeated?: number;
68
+ /**
69
+ * Whether to remove spaces between single characters (e.g., "f u c k" → "fuck").
70
+ * @default true
71
+ */
72
+ removeSpacedChars?: boolean;
73
+ }
74
+ /**
75
+ * Normalizes leetspeak text to standard characters.
76
+ *
77
+ * @param text - The input text containing potential leetspeak
78
+ * @param options - Configuration options for normalization
79
+ * @returns The normalized text with leetspeak characters replaced
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * import { normalizeLeetspeak } from 'glin-profanity';
84
+ *
85
+ * normalizeLeetspeak('f4ck'); // Returns: 'fack'
86
+ * normalizeLeetspeak('sh!t'); // Returns: 'shit'
87
+ * normalizeLeetspeak('b1tch'); // Returns: 'bitch'
88
+ * normalizeLeetspeak('@ss'); // Returns: 'ass'
89
+ * normalizeLeetspeak('f u c k'); // Returns: 'fuck'
90
+ * normalizeLeetspeak('fuuuuck'); // Returns: 'fuck'
91
+ * ```
92
+ */
93
+ declare function normalizeLeetspeak(text: string, options?: LeetspeakOptions): string;
94
+ /**
95
+ * Collapses sequences of spaced single characters into words.
96
+ * Handles patterns like "f u c k" → "fuck" and "s h i t" → "shit".
97
+ *
98
+ * @param text - The input text
99
+ * @returns Text with spaced characters collapsed
100
+ *
101
+ * @example
102
+ * ```typescript
103
+ * collapseSpacedCharacters('f u c k you'); // Returns: 'fuck you'
104
+ * collapseSpacedCharacters('this is s h i t'); // Returns: 'this is shit'
105
+ * ```
106
+ */
107
+ declare function collapseSpacedCharacters(text: string): string;
108
+ /**
109
+ * Collapses repeated consecutive characters beyond a threshold.
110
+ * Handles patterns like "fuuuuck" → "fuck" and "shiiiit" → "shit".
111
+ *
112
+ * @param text - The input text
113
+ * @param maxRepeated - Maximum allowed consecutive repeated characters
114
+ * @returns Text with repeated characters collapsed
115
+ *
116
+ * @example
117
+ * ```typescript
118
+ * collapseRepeatedCharacters('fuuuuck', 2); // Returns: 'fuuck'
119
+ * collapseRepeatedCharacters('fuuuuck', 1); // Returns: 'fuck'
120
+ * ```
121
+ */
122
+ declare function collapseRepeatedCharacters(text: string, maxRepeated?: number): string;
123
+ /**
124
+ * Detects if text contains potential leetspeak patterns.
125
+ * Useful for deciding whether to apply leetspeak normalization.
126
+ *
127
+ * @param text - The input text to analyze
128
+ * @returns True if leetspeak patterns are detected
129
+ *
130
+ * @example
131
+ * ```typescript
132
+ * containsLeetspeak('hello'); // Returns: false
133
+ * containsLeetspeak('h3ll0'); // Returns: true
134
+ * containsLeetspeak('f4ck'); // Returns: true
135
+ * containsLeetspeak('@ss'); // Returns: true
136
+ * ```
137
+ */
138
+ declare function containsLeetspeak(text: string): boolean;
139
+ /**
140
+ * Creates a normalized variant generator for a word.
141
+ * Generates all possible leetspeak variants of a dictionary word.
142
+ *
143
+ * @param word - The base word to generate variants for
144
+ * @param level - The leetspeak level to use for variant generation
145
+ * @returns Array of possible leetspeak variants
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * generateLeetspeakVariants('ass');
150
+ * // Returns: ['ass', '@ss', 'a$$', '@$$', '4ss', '4$$', ...]
151
+ * ```
152
+ */
153
+ declare function generateLeetspeakVariants(word: string, level?: LeetspeakLevel): string[];
154
+
155
+ /**
156
+ * @fileoverview Unicode normalization utilities for profanity detection.
157
+ * Handles homoglyphs, full-width characters, diacritics, and other Unicode tricks.
158
+ * @module utils/unicode
159
+ */
160
+ /**
161
+ * Configuration options for Unicode normalization.
162
+ */
163
+ interface UnicodeNormalizationOptions {
164
+ /**
165
+ * Apply NFKD normalization to decompose characters.
166
+ * @default true
167
+ */
168
+ nfkd?: boolean;
169
+ /**
170
+ * Convert homoglyphs (lookalike characters) to ASCII.
171
+ * @default true
172
+ */
173
+ homoglyphs?: boolean;
174
+ /**
175
+ * Convert full-width characters to half-width.
176
+ * @default true
177
+ */
178
+ fullWidth?: boolean;
179
+ /**
180
+ * Remove diacritical marks (accents, umlauts, etc.).
181
+ * @default true
182
+ */
183
+ removeDiacritics?: boolean;
184
+ /**
185
+ * Remove zero-width characters (ZWJ, ZWNJ, etc.).
186
+ * @default true
187
+ */
188
+ removeZeroWidth?: boolean;
189
+ }
190
+ /**
191
+ * Normalizes Unicode text for consistent profanity detection.
192
+ * Handles various Unicode tricks used to evade filters.
193
+ *
194
+ * @param text - The input text containing potential Unicode obfuscation
195
+ * @param options - Configuration options for normalization
196
+ * @returns The normalized text
197
+ *
198
+ * @example
199
+ * ```typescript
200
+ * import { normalizeUnicode } from 'glin-profanity';
201
+ *
202
+ * normalizeUnicode('fυck'); // Returns: 'fuck' (Greek upsilon → u)
203
+ * normalizeUnicode('fUck'); // Returns: 'fuck' (full-width U → u)
204
+ * normalizeUnicode('fück'); // Returns: 'fuck' (ü → u)
205
+ * normalizeUnicode('fùck'); // Returns: 'fuck' (ù → u)
206
+ * normalizeUnicode('f​uck'); // Returns: 'fuck' (removes zero-width space)
207
+ * ```
208
+ */
209
+ declare function normalizeUnicode(text: string, options?: UnicodeNormalizationOptions): string;
210
+ /**
211
+ * Removes zero-width and invisible characters from text.
212
+ *
213
+ * @param text - The input text
214
+ * @returns Text with zero-width characters removed
215
+ */
216
+ declare function removeZeroWidthCharacters(text: string): string;
217
+ /**
218
+ * Converts full-width ASCII characters to half-width.
219
+ * Full-width characters (U+FF01 to U+FF5E) are used in CJK text
220
+ * but can also be used to evade filters.
221
+ *
222
+ * @param text - The input text
223
+ * @returns Text with full-width characters converted
224
+ *
225
+ * @example
226
+ * ```typescript
227
+ * convertFullWidth('ABC'); // Returns: 'ABC'
228
+ * convertFullWidth('fuck'); // Returns: 'fuck'
229
+ * ```
230
+ */
231
+ declare function convertFullWidth(text: string): string;
232
+ /**
233
+ * Converts homoglyph characters to their ASCII equivalents.
234
+ *
235
+ * @param text - The input text
236
+ * @returns Text with homoglyphs converted
237
+ */
238
+ declare function convertHomoglyphs(text: string): string;
239
+ /**
240
+ * Applies NFKD normalization and optionally removes diacritical marks.
241
+ * NFKD decomposes characters into base characters and combining marks.
242
+ *
243
+ * @param text - The input text
244
+ * @param removeDiacritics - Whether to remove diacritical marks
245
+ * @returns Normalized text
246
+ *
247
+ * @example
248
+ * ```typescript
249
+ * normalizeNFKD('fück', true); // Returns: 'fuck'
250
+ * normalizeNFKD('café', true); // Returns: 'cafe'
251
+ * normalizeNFKD('naïve', true); // Returns: 'naive'
252
+ * ```
253
+ */
254
+ declare function normalizeNFKD(text: string, removeDiacritics?: boolean): string;
255
+ /**
256
+ * Detects if text contains potential Unicode obfuscation.
257
+ * Useful for deciding whether to apply Unicode normalization.
258
+ *
259
+ * @param text - The input text to analyze
260
+ * @returns True if Unicode obfuscation patterns are detected
261
+ *
262
+ * @example
263
+ * ```typescript
264
+ * containsUnicodeObfuscation('hello'); // Returns: false
265
+ * containsUnicodeObfuscation('fυck'); // Returns: true (Greek letter)
266
+ * containsUnicodeObfuscation('f​uck'); // Returns: true (zero-width)
267
+ * ```
268
+ */
269
+ declare function containsUnicodeObfuscation(text: string): boolean;
270
+ /**
271
+ * Gets the character set being used in text.
272
+ * Helps identify mixed-script attacks (e.g., mixing Latin and Cyrillic).
273
+ *
274
+ * @param text - The input text
275
+ * @returns Object with detected character set information
276
+ */
277
+ declare function detectCharacterSets(text: string): {
278
+ hasLatin: boolean;
279
+ hasCyrillic: boolean;
280
+ hasGreek: boolean;
281
+ hasFullWidth: boolean;
282
+ hasMixed: boolean;
283
+ };
284
+
285
+ export { CheckProfanityResult, Language, type LeetspeakLevel, type LeetspeakOptions, type ProfanityCheckResult, type ProfanityCheckerConfig, SeverityLevel, type UnicodeNormalizationOptions, checkProfanity, checkProfanityAsync, collapseRepeatedCharacters, collapseSpacedCharacters, containsLeetspeak, containsUnicodeObfuscation, convertFullWidth, convertHomoglyphs, detectCharacterSets, generateLeetspeakVariants, isWordProfane, normalizeLeetspeak, normalizeNFKD, normalizeUnicode, removeZeroWidthCharacters, useProfanityChecker };