glin-profanity 3.1.5 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +84 -566
  2. package/dist/{types-CdDqSZY7.d.cts → Filter-BGcyIAvO.d.ts} +4 -162
  3. package/dist/{types-CdDqSZY7.d.ts → Filter-D34Wsmrj.d.cts} +4 -162
  4. package/dist/frameworks/index.cjs +5257 -0
  5. package/dist/frameworks/index.d.cts +2 -0
  6. package/dist/frameworks/index.d.ts +2 -0
  7. package/dist/frameworks/index.js +5252 -0
  8. package/dist/frameworks/nextjs.cjs +5257 -0
  9. package/dist/frameworks/nextjs.d.cts +173 -0
  10. package/dist/frameworks/nextjs.d.ts +173 -0
  11. package/dist/frameworks/nextjs.js +5252 -0
  12. package/dist/index.cjs +151 -85
  13. package/dist/index.d.cts +5 -29
  14. package/dist/index.d.ts +5 -29
  15. package/dist/index.js +152 -85
  16. package/dist/integrations/index.cjs +6110 -0
  17. package/dist/integrations/index.d.cts +5 -0
  18. package/dist/integrations/index.d.ts +5 -0
  19. package/dist/integrations/index.js +6082 -0
  20. package/dist/integrations/langchain.cjs +5252 -0
  21. package/dist/integrations/langchain.d.cts +231 -0
  22. package/dist/integrations/langchain.d.ts +231 -0
  23. package/dist/integrations/langchain.js +5239 -0
  24. package/dist/integrations/openai.cjs +5367 -0
  25. package/dist/integrations/openai.d.cts +167 -0
  26. package/dist/integrations/openai.d.ts +167 -0
  27. package/dist/integrations/openai.js +5362 -0
  28. package/dist/integrations/semantic.cjs +5314 -0
  29. package/dist/integrations/semantic.d.cts +268 -0
  30. package/dist/integrations/semantic.d.ts +268 -0
  31. package/dist/integrations/semantic.js +5309 -0
  32. package/dist/integrations/vercel-ai.cjs +5282 -0
  33. package/dist/integrations/vercel-ai.d.cts +224 -0
  34. package/dist/integrations/vercel-ai.d.ts +224 -0
  35. package/dist/integrations/vercel-ai.js +5273 -0
  36. package/dist/ml/index.cjs +358 -56
  37. package/dist/ml/index.d.cts +5 -2
  38. package/dist/ml/index.d.ts +5 -2
  39. package/dist/ml/index.js +354 -57
  40. package/dist/ml/transformers.cjs +5237 -0
  41. package/dist/ml/transformers.d.cts +232 -0
  42. package/dist/ml/transformers.d.ts +232 -0
  43. package/dist/ml/transformers.js +5231 -0
  44. package/dist/multimodal/audio.cjs +5269 -0
  45. package/dist/multimodal/audio.d.cts +255 -0
  46. package/dist/multimodal/audio.d.ts +255 -0
  47. package/dist/multimodal/audio.js +5264 -0
  48. package/dist/multimodal/index.cjs +5432 -0
  49. package/dist/multimodal/index.d.cts +4 -0
  50. package/dist/multimodal/index.d.ts +4 -0
  51. package/dist/multimodal/index.js +5422 -0
  52. package/dist/multimodal/ocr.cjs +5193 -0
  53. package/dist/multimodal/ocr.d.cts +157 -0
  54. package/dist/multimodal/ocr.d.ts +157 -0
  55. package/dist/multimodal/ocr.js +5187 -0
  56. package/dist/react.cjs +5133 -0
  57. package/dist/react.d.cts +13 -0
  58. package/dist/react.d.ts +13 -0
  59. package/dist/react.js +5131 -0
  60. package/dist/types-B9c_ik4k.d.cts +88 -0
  61. package/dist/types-B9c_ik4k.d.ts +88 -0
  62. package/dist/types-BuKh9tvV.d.ts +20 -0
  63. package/dist/types-Ct_ueYqw.d.cts +76 -0
  64. package/dist/types-Ct_ueYqw.d.ts +76 -0
  65. package/dist/types-DI8nzwWc.d.cts +20 -0
  66. package/package.json +170 -3
@@ -1,89 +1,4 @@
1
- /**
2
- * Type definitions for glin-profanity JavaScript/TypeScript package.
3
- * Unified API that mirrors the Python package structure.
4
- */
5
- /** Severity levels for profanity matches - unified with Python */
6
- declare enum SeverityLevel {
7
- EXACT = 1,
8
- FUZZY = 2
9
- }
10
- /** Supported languages - unified list with Python */
11
- type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'dutch' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
12
- /** Represents a profanity match in text - unified with Python */
13
- interface Match {
14
- word: string;
15
- index: number;
16
- severity: SeverityLevel;
17
- contextScore?: number;
18
- reason?: string;
19
- isWhitelisted?: boolean;
20
- }
21
- /** Result of profanity check operation - unified field names */
22
- interface CheckProfanityResult {
23
- containsProfanity: boolean;
24
- profaneWords: string[];
25
- processedText?: string;
26
- severityMap?: Record<string, SeverityLevel>;
27
- matches?: Match[];
28
- contextScore?: number;
29
- reason?: string;
30
- }
31
- /** Configuration for context-aware filtering - unified with Python */
32
- interface ContextAwareConfig {
33
- enableContextAware?: boolean;
34
- contextWindow?: number;
35
- confidenceThreshold?: number;
36
- domainWhitelists?: Record<string, string[]>;
37
- }
38
- /** Leetspeak detection intensity levels */
39
- type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
40
- /** Main filter configuration options - unified with Python */
41
- interface FilterConfig extends ContextAwareConfig {
42
- languages?: Language[];
43
- allLanguages?: boolean;
44
- caseSensitive?: boolean;
45
- wordBoundaries?: boolean;
46
- customWords?: string[];
47
- replaceWith?: string;
48
- severityLevels?: boolean;
49
- ignoreWords?: string[];
50
- logProfanity?: boolean;
51
- allowObfuscatedMatch?: boolean;
52
- fuzzyToleranceLevel?: number;
53
- /**
54
- * Enable leetspeak detection (e.g., "f4ck" → "fuck").
55
- * @default false
56
- */
57
- detectLeetspeak?: boolean;
58
- /**
59
- * Leetspeak detection intensity level.
60
- * - `basic`: Numbers only (0→o, 1→i, 3→e, 4→a, 5→s)
61
- * - `moderate`: Basic + symbols (@→a, $→s, !→i)
62
- * - `aggressive`: All known substitutions
63
- * @default 'moderate'
64
- */
65
- leetspeakLevel?: LeetspeakLevel;
66
- /**
67
- * Enable Unicode normalization to detect homoglyphs and obfuscation.
68
- * @default true
69
- */
70
- normalizeUnicode?: boolean;
71
- /**
72
- * Cache profanity check results for repeated strings.
73
- * @default false
74
- */
75
- cacheResults?: boolean;
76
- /**
77
- * Maximum cache size when caching is enabled.
78
- * @default 1000
79
- */
80
- maxCacheSize?: number;
81
- }
82
- /** Result with minimum severity filtering */
83
- interface FilteredProfanityResult {
84
- result: CheckProfanityResult;
85
- filteredWords: string[];
86
- }
1
+ import { F as FilterConfig, C as CheckProfanityResult, S as SeverityLevel } from './types-B9c_ik4k.js';
87
2
 
88
3
  /**
89
4
  * Core profanity filter class.
@@ -123,6 +38,7 @@ declare class Filter {
123
38
  private cacheResults;
124
39
  private maxCacheSize;
125
40
  private cache;
41
+ private regexCache;
126
42
  /**
127
43
  * Creates a new Filter instance with the specified configuration.
128
44
  *
@@ -157,6 +73,7 @@ declare class Filter {
157
73
  * Applies Unicode normalization, leetspeak detection, and obfuscation handling.
158
74
  *
159
75
  * @param text - The input text to normalize
76
+ * @param aggressive - If true, collapses to single chars (for repeated char detection)
160
77
  * @returns The normalized text
161
78
  */
162
79
  private normalizeText;
@@ -272,79 +189,4 @@ declare class Filter {
272
189
  };
273
190
  }
274
191
 
275
- /**
276
- * Type definitions for ML-based profanity detection.
277
- */
278
- /**
279
- * Toxicity categories detected by the TensorFlow.js model.
280
- * These map to the civil comments dataset labels.
281
- */
282
- type ToxicityLabel = 'identity_attack' | 'insult' | 'obscene' | 'severe_toxicity' | 'sexual_explicit' | 'threat' | 'toxicity';
283
- /**
284
- * Result from a single toxicity prediction.
285
- */
286
- interface ToxicityPrediction {
287
- /** The toxicity category */
288
- label: ToxicityLabel;
289
- /** Whether the text matches this category (null if below threshold) */
290
- match: boolean | null;
291
- /** Probability scores [non-toxic, toxic] */
292
- probabilities: [number, number];
293
- }
294
- /**
295
- * Result from ML-based toxicity analysis.
296
- */
297
- interface MLAnalysisResult {
298
- /** Whether any toxicity was detected */
299
- isToxic: boolean;
300
- /** Overall toxicity score (0-1) */
301
- overallScore: number;
302
- /** Predictions for each category */
303
- predictions: ToxicityPrediction[];
304
- /** Categories that matched */
305
- matchedCategories: ToxicityLabel[];
306
- /** Processing time in milliseconds */
307
- processingTimeMs: number;
308
- }
309
- /**
310
- * Configuration for the ML toxicity detector.
311
- */
312
- interface MLDetectorConfig {
313
- /**
314
- * Minimum confidence threshold for predictions.
315
- * Values below this threshold will return null for match.
316
- * @default 0.85
317
- */
318
- threshold?: number;
319
- /**
320
- * Specific toxicity categories to check.
321
- * If not specified, all categories are checked.
322
- */
323
- labels?: ToxicityLabel[];
324
- /**
325
- * Whether to load the model immediately on instantiation.
326
- * If false, model will be loaded on first use.
327
- * @default false
328
- */
329
- preloadModel?: boolean;
330
- }
331
- /**
332
- * Combined result from both rule-based and ML detection.
333
- */
334
- interface HybridAnalysisResult {
335
- /** Rule-based detection result */
336
- ruleBasedResult: {
337
- containsProfanity: boolean;
338
- profaneWords: string[];
339
- };
340
- /** ML-based detection result (null if ML not enabled) */
341
- mlResult: MLAnalysisResult | null;
342
- /** Combined decision */
343
- isToxic: boolean;
344
- /** Confidence score for the decision */
345
- confidence: number;
346
- /** Reason for the decision */
347
- reason: string;
348
- }
349
-
350
- export { type CheckProfanityResult as C, type FilterConfig as F, type HybridAnalysisResult as H, type Language as L, type Match as M, SeverityLevel as S, type ToxicityLabel as T, Filter as a, type FilteredProfanityResult as b, type ContextAwareConfig as c, type ToxicityPrediction as d, type MLAnalysisResult as e, type MLDetectorConfig as f };
192
+ export { Filter as F };
@@ -1,89 +1,4 @@
1
- /**
2
- * Type definitions for glin-profanity JavaScript/TypeScript package.
3
- * Unified API that mirrors the Python package structure.
4
- */
5
- /** Severity levels for profanity matches - unified with Python */
6
- declare enum SeverityLevel {
7
- EXACT = 1,
8
- FUZZY = 2
9
- }
10
- /** Supported languages - unified list with Python */
11
- type Language = 'arabic' | 'chinese' | 'czech' | 'danish' | 'dutch' | 'english' | 'esperanto' | 'finnish' | 'french' | 'german' | 'hindi' | 'hungarian' | 'italian' | 'japanese' | 'korean' | 'norwegian' | 'persian' | 'polish' | 'portuguese' | 'russian' | 'spanish' | 'swedish' | 'thai' | 'turkish';
12
- /** Represents a profanity match in text - unified with Python */
13
- interface Match {
14
- word: string;
15
- index: number;
16
- severity: SeverityLevel;
17
- contextScore?: number;
18
- reason?: string;
19
- isWhitelisted?: boolean;
20
- }
21
- /** Result of profanity check operation - unified field names */
22
- interface CheckProfanityResult {
23
- containsProfanity: boolean;
24
- profaneWords: string[];
25
- processedText?: string;
26
- severityMap?: Record<string, SeverityLevel>;
27
- matches?: Match[];
28
- contextScore?: number;
29
- reason?: string;
30
- }
31
- /** Configuration for context-aware filtering - unified with Python */
32
- interface ContextAwareConfig {
33
- enableContextAware?: boolean;
34
- contextWindow?: number;
35
- confidenceThreshold?: number;
36
- domainWhitelists?: Record<string, string[]>;
37
- }
38
- /** Leetspeak detection intensity levels */
39
- type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
40
- /** Main filter configuration options - unified with Python */
41
- interface FilterConfig extends ContextAwareConfig {
42
- languages?: Language[];
43
- allLanguages?: boolean;
44
- caseSensitive?: boolean;
45
- wordBoundaries?: boolean;
46
- customWords?: string[];
47
- replaceWith?: string;
48
- severityLevels?: boolean;
49
- ignoreWords?: string[];
50
- logProfanity?: boolean;
51
- allowObfuscatedMatch?: boolean;
52
- fuzzyToleranceLevel?: number;
53
- /**
54
- * Enable leetspeak detection (e.g., "f4ck" → "fuck").
55
- * @default false
56
- */
57
- detectLeetspeak?: boolean;
58
- /**
59
- * Leetspeak detection intensity level.
60
- * - `basic`: Numbers only (0→o, 1→i, 3→e, 4→a, 5→s)
61
- * - `moderate`: Basic + symbols (@→a, $→s, !→i)
62
- * - `aggressive`: All known substitutions
63
- * @default 'moderate'
64
- */
65
- leetspeakLevel?: LeetspeakLevel;
66
- /**
67
- * Enable Unicode normalization to detect homoglyphs and obfuscation.
68
- * @default true
69
- */
70
- normalizeUnicode?: boolean;
71
- /**
72
- * Cache profanity check results for repeated strings.
73
- * @default false
74
- */
75
- cacheResults?: boolean;
76
- /**
77
- * Maximum cache size when caching is enabled.
78
- * @default 1000
79
- */
80
- maxCacheSize?: number;
81
- }
82
- /** Result with minimum severity filtering */
83
- interface FilteredProfanityResult {
84
- result: CheckProfanityResult;
85
- filteredWords: string[];
86
- }
1
+ import { F as FilterConfig, C as CheckProfanityResult, S as SeverityLevel } from './types-B9c_ik4k.cjs';
87
2
 
88
3
  /**
89
4
  * Core profanity filter class.
@@ -123,6 +38,7 @@ declare class Filter {
123
38
  private cacheResults;
124
39
  private maxCacheSize;
125
40
  private cache;
41
+ private regexCache;
126
42
  /**
127
43
  * Creates a new Filter instance with the specified configuration.
128
44
  *
@@ -157,6 +73,7 @@ declare class Filter {
157
73
  * Applies Unicode normalization, leetspeak detection, and obfuscation handling.
158
74
  *
159
75
  * @param text - The input text to normalize
76
+ * @param aggressive - If true, collapses to single chars (for repeated char detection)
160
77
  * @returns The normalized text
161
78
  */
162
79
  private normalizeText;
@@ -272,79 +189,4 @@ declare class Filter {
272
189
  };
273
190
  }
274
191
 
275
- /**
276
- * Type definitions for ML-based profanity detection.
277
- */
278
- /**
279
- * Toxicity categories detected by the TensorFlow.js model.
280
- * These map to the civil comments dataset labels.
281
- */
282
- type ToxicityLabel = 'identity_attack' | 'insult' | 'obscene' | 'severe_toxicity' | 'sexual_explicit' | 'threat' | 'toxicity';
283
- /**
284
- * Result from a single toxicity prediction.
285
- */
286
- interface ToxicityPrediction {
287
- /** The toxicity category */
288
- label: ToxicityLabel;
289
- /** Whether the text matches this category (null if below threshold) */
290
- match: boolean | null;
291
- /** Probability scores [non-toxic, toxic] */
292
- probabilities: [number, number];
293
- }
294
- /**
295
- * Result from ML-based toxicity analysis.
296
- */
297
- interface MLAnalysisResult {
298
- /** Whether any toxicity was detected */
299
- isToxic: boolean;
300
- /** Overall toxicity score (0-1) */
301
- overallScore: number;
302
- /** Predictions for each category */
303
- predictions: ToxicityPrediction[];
304
- /** Categories that matched */
305
- matchedCategories: ToxicityLabel[];
306
- /** Processing time in milliseconds */
307
- processingTimeMs: number;
308
- }
309
- /**
310
- * Configuration for the ML toxicity detector.
311
- */
312
- interface MLDetectorConfig {
313
- /**
314
- * Minimum confidence threshold for predictions.
315
- * Values below this threshold will return null for match.
316
- * @default 0.85
317
- */
318
- threshold?: number;
319
- /**
320
- * Specific toxicity categories to check.
321
- * If not specified, all categories are checked.
322
- */
323
- labels?: ToxicityLabel[];
324
- /**
325
- * Whether to load the model immediately on instantiation.
326
- * If false, model will be loaded on first use.
327
- * @default false
328
- */
329
- preloadModel?: boolean;
330
- }
331
- /**
332
- * Combined result from both rule-based and ML detection.
333
- */
334
- interface HybridAnalysisResult {
335
- /** Rule-based detection result */
336
- ruleBasedResult: {
337
- containsProfanity: boolean;
338
- profaneWords: string[];
339
- };
340
- /** ML-based detection result (null if ML not enabled) */
341
- mlResult: MLAnalysisResult | null;
342
- /** Combined decision */
343
- isToxic: boolean;
344
- /** Confidence score for the decision */
345
- confidence: number;
346
- /** Reason for the decision */
347
- reason: string;
348
- }
349
-
350
- export { type CheckProfanityResult as C, type FilterConfig as F, type HybridAnalysisResult as H, type Language as L, type Match as M, SeverityLevel as S, type ToxicityLabel as T, Filter as a, type FilteredProfanityResult as b, type ContextAwareConfig as c, type ToxicityPrediction as d, type MLAnalysisResult as e, type MLDetectorConfig as f };
192
+ export { Filter as F };