glin-profanity 2.3.7 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,557 @@
1
+ import {
2
+ Filter
3
+ } from "../chunk-KNHWF6MX.js";
4
+
5
+ // src/ml/ToxicityDetector.ts
6
+ var _ToxicityDetector = class _ToxicityDetector {
7
+ /**
8
+ * Creates a new ToxicityDetector instance.
9
+ *
10
+ * @param config - Configuration options
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * // Basic usage with default threshold (0.85)
15
+ * const detector = new ToxicityDetector();
16
+ *
17
+ * // Custom threshold for higher precision
18
+ * const strictDetector = new ToxicityDetector({ threshold: 0.95 });
19
+ *
20
+ * // Check only specific categories
21
+ * const customDetector = new ToxicityDetector({
22
+ * threshold: 0.8,
23
+ * labels: ['insult', 'threat', 'obscene'],
24
+ * });
25
+ * ```
26
+ */
27
+ constructor(config = {}) {
28
+ this.model = null;
29
+ this.loadingPromise = null;
30
+ this.isAvailable = null;
31
+ this.config = {
32
+ threshold: config.threshold ?? 0.85,
33
+ labels: config.labels ?? _ToxicityDetector.ALL_LABELS,
34
+ preloadModel: config.preloadModel ?? false
35
+ };
36
+ if (this.config.preloadModel) {
37
+ this.loadModel().catch(() => {
38
+ });
39
+ }
40
+ }
41
+ /**
42
+ * Dynamic import wrapper to avoid TypeScript static analysis issues.
43
+ * Uses Function constructor to bypass module resolution at compile time.
44
+ * @internal
45
+ */
46
+ dynamicImport(moduleName) {
47
+ return new Function("m", "return import(m)")(moduleName);
48
+ }
49
+ /**
50
+ * Checks if TensorFlow.js and the toxicity model are available.
51
+ * This performs a lazy check on first call and caches the result.
52
+ *
53
+ * @returns True if ML dependencies are available
54
+ */
55
+ async checkAvailability() {
56
+ if (this.isAvailable !== null) {
57
+ return this.isAvailable;
58
+ }
59
+ try {
60
+ await this.dynamicImport("@tensorflow/tfjs");
61
+ await this.dynamicImport("@tensorflow-models/toxicity");
62
+ this.isAvailable = true;
63
+ } catch {
64
+ this.isAvailable = false;
65
+ }
66
+ return this.isAvailable;
67
+ }
68
+ /**
69
+ * Loads the toxicity model.
70
+ * This is called automatically on first analyze() call if not called explicitly.
71
+ *
72
+ * @returns The loaded model
73
+ * @throws Error if TensorFlow.js dependencies are not installed
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * const detector = new ToxicityDetector();
78
+ *
79
+ * // Explicitly preload model (optional)
80
+ * await detector.loadModel();
81
+ *
82
+ * // Or let it load automatically on first use
83
+ * const result = await detector.analyze('text');
84
+ * ```
85
+ */
86
+ async loadModel() {
87
+ if (this.model) {
88
+ return this.model;
89
+ }
90
+ if (this.loadingPromise) {
91
+ return this.loadingPromise;
92
+ }
93
+ this.loadingPromise = this.doLoadModel();
94
+ try {
95
+ this.model = await this.loadingPromise;
96
+ return this.model;
97
+ } finally {
98
+ this.loadingPromise = null;
99
+ }
100
+ }
101
+ async doLoadModel() {
102
+ try {
103
+ const toxicityModule = await this.dynamicImport(
104
+ "@tensorflow-models/toxicity"
105
+ );
106
+ const loadFn = toxicityModule.load;
107
+ const model = await loadFn(
108
+ this.config.threshold,
109
+ this.config.labels
110
+ );
111
+ return model;
112
+ } catch (error) {
113
+ const message = error instanceof Error ? error.message : "Unknown error";
114
+ if (message.includes("Cannot find module") || message.includes("MODULE_NOT_FOUND")) {
115
+ throw new Error(
116
+ "TensorFlow.js dependencies not installed. Install with: npm install @tensorflow/tfjs @tensorflow-models/toxicity"
117
+ );
118
+ }
119
+ throw new Error(`Failed to load toxicity model: ${message}`);
120
+ }
121
+ }
122
+ /**
123
+ * Analyzes text for toxicity using the ML model.
124
+ *
125
+ * @param text - Text to analyze
126
+ * @returns Analysis result with predictions and scores
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const detector = new ToxicityDetector();
131
+ * const result = await detector.analyze('you are stupid');
132
+ *
133
+ * console.log(result.isToxic); // true
134
+ * console.log(result.overallScore); // 0.92
135
+ * console.log(result.matchedCategories); // ['insult', 'toxicity']
136
+ * ```
137
+ */
138
+ async analyze(text) {
139
+ const startTime = performance.now();
140
+ const model = await this.loadModel();
141
+ const predictions = await model.classify([text]);
142
+ const processedPredictions = predictions.map(
143
+ (pred) => ({
144
+ label: pred.label,
145
+ match: pred.results[0].match,
146
+ probabilities: [
147
+ pred.results[0].probabilities[0],
148
+ pred.results[0].probabilities[1]
149
+ ]
150
+ })
151
+ );
152
+ const matchedCategories = processedPredictions.filter((p) => p.match === true).map((p) => p.label);
153
+ const overallScore = Math.max(
154
+ ...processedPredictions.map((p) => p.probabilities[1])
155
+ );
156
+ const processingTimeMs = performance.now() - startTime;
157
+ return {
158
+ isToxic: matchedCategories.length > 0,
159
+ overallScore,
160
+ predictions: processedPredictions,
161
+ matchedCategories,
162
+ processingTimeMs
163
+ };
164
+ }
165
+ /**
166
+ * Analyzes multiple texts in a batch for better performance.
167
+ *
168
+ * @param texts - Array of texts to analyze
169
+ * @returns Array of analysis results
170
+ *
171
+ * @example
172
+ * ```typescript
173
+ * const detector = new ToxicityDetector();
174
+ * const results = await detector.analyzeBatch([
175
+ * 'hello friend',
176
+ * 'you are terrible',
177
+ * 'great work!',
178
+ * ]);
179
+ *
180
+ * results.forEach((result, i) => {
181
+ * console.log(`Text ${i}: ${result.isToxic ? 'toxic' : 'clean'}`);
182
+ * });
183
+ * ```
184
+ */
185
+ async analyzeBatch(texts) {
186
+ if (texts.length === 0) {
187
+ return [];
188
+ }
189
+ const startTime = performance.now();
190
+ const model = await this.loadModel();
191
+ const predictions = await model.classify(texts);
192
+ const totalTimeMs = performance.now() - startTime;
193
+ const perTextTimeMs = totalTimeMs / texts.length;
194
+ return texts.map((_, textIndex) => {
195
+ const processedPredictions = predictions.map(
196
+ (pred) => ({
197
+ label: pred.label,
198
+ match: pred.results[textIndex].match,
199
+ probabilities: [
200
+ pred.results[textIndex].probabilities[0],
201
+ pred.results[textIndex].probabilities[1]
202
+ ]
203
+ })
204
+ );
205
+ const matchedCategories = processedPredictions.filter((p) => p.match === true).map((p) => p.label);
206
+ const overallScore = Math.max(
207
+ ...processedPredictions.map((p) => p.probabilities[1])
208
+ );
209
+ return {
210
+ isToxic: matchedCategories.length > 0,
211
+ overallScore,
212
+ predictions: processedPredictions,
213
+ matchedCategories,
214
+ processingTimeMs: perTextTimeMs
215
+ };
216
+ });
217
+ }
218
+ /**
219
+ * Simple boolean check for toxicity.
220
+ *
221
+ * @param text - Text to check
222
+ * @returns True if text is detected as toxic
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * const detector = new ToxicityDetector();
227
+ *
228
+ * if (await detector.isToxic('some user input')) {
229
+ * console.log('Content flagged as toxic');
230
+ * }
231
+ * ```
232
+ */
233
+ async isToxic(text) {
234
+ const result = await this.analyze(text);
235
+ return result.isToxic;
236
+ }
237
+ /**
238
+ * Gets the toxicity score for text (0-1).
239
+ *
240
+ * @param text - Text to score
241
+ * @returns Toxicity score from 0 (clean) to 1 (highly toxic)
242
+ */
243
+ async getScore(text) {
244
+ const result = await this.analyze(text);
245
+ return result.overallScore;
246
+ }
247
+ /**
248
+ * Disposes of the model to free memory.
249
+ * The model will be reloaded on next analyze() call.
250
+ */
251
+ dispose() {
252
+ this.model = null;
253
+ this.loadingPromise = null;
254
+ }
255
+ /**
256
+ * Gets the current configuration.
257
+ */
258
+ getConfig() {
259
+ return { ...this.config };
260
+ }
261
+ /**
262
+ * Checks if the model is currently loaded.
263
+ */
264
+ isModelLoaded() {
265
+ return this.model !== null;
266
+ }
267
+ };
268
+ /**
269
+ * All available toxicity labels.
270
+ */
271
+ _ToxicityDetector.ALL_LABELS = [
272
+ "identity_attack",
273
+ "insult",
274
+ "obscene",
275
+ "severe_toxicity",
276
+ "sexual_explicit",
277
+ "threat",
278
+ "toxicity"
279
+ ];
280
+ var ToxicityDetector = _ToxicityDetector;
281
+
282
+ // src/ml/HybridFilter.ts
283
+ var HybridFilter = class {
284
+ /**
285
+ * Creates a new HybridFilter instance.
286
+ *
287
+ * @param config - Configuration options
288
+ */
289
+ constructor(config = {}) {
290
+ this.mlDetector = null;
291
+ this.mlInitialized = false;
292
+ const {
293
+ enableML = false,
294
+ mlThreshold = 0.85,
295
+ mlLabels,
296
+ preloadML = false,
297
+ combinationMode = "or",
298
+ borderlineThreshold = 0.5,
299
+ ...filterConfig
300
+ } = config;
301
+ this.config = {
302
+ enableML,
303
+ mlThreshold,
304
+ mlLabels,
305
+ preloadML,
306
+ combinationMode,
307
+ borderlineThreshold
308
+ };
309
+ this.ruleFilter = new Filter(filterConfig);
310
+ if (enableML) {
311
+ this.mlDetector = new ToxicityDetector({
312
+ threshold: mlThreshold,
313
+ labels: mlLabels,
314
+ preloadModel: preloadML
315
+ });
316
+ }
317
+ }
318
+ /**
319
+ * Initializes the hybrid filter, loading the ML model if enabled.
320
+ * Call this before using async methods for best performance.
321
+ *
322
+ * @example
323
+ * ```typescript
324
+ * const filter = new HybridFilter({ enableML: true });
325
+ * await filter.initialize();
326
+ * // Now ready for fast async checks
327
+ * ```
328
+ */
329
+ async initialize() {
330
+ if (this.mlDetector && !this.mlInitialized) {
331
+ await this.mlDetector.loadModel();
332
+ this.mlInitialized = true;
333
+ }
334
+ }
335
+ /**
336
+ * Checks if ML is available and initialized.
337
+ */
338
+ isMLReady() {
339
+ return this.mlDetector?.isModelLoaded() ?? false;
340
+ }
341
+ /**
342
+ * Synchronous profanity check using only rule-based detection.
343
+ * Use this for fast, synchronous checks when ML isn't needed.
344
+ *
345
+ * @param text - Text to check
346
+ * @returns True if profanity detected
347
+ */
348
+ isProfane(text) {
349
+ return this.ruleFilter.isProfane(text);
350
+ }
351
+ /**
352
+ * Synchronous detailed check using only rule-based detection.
353
+ *
354
+ * @param text - Text to check
355
+ * @returns Detailed profanity check result
356
+ */
357
+ checkProfanity(text) {
358
+ return this.ruleFilter.checkProfanity(text);
359
+ }
360
+ /**
361
+ * Async profanity check using both rule-based and ML detection.
362
+ *
363
+ * @param text - Text to check
364
+ * @returns Combined analysis result
365
+ *
366
+ * @example
367
+ * ```typescript
368
+ * const filter = new HybridFilter({
369
+ * enableML: true,
370
+ * combinationMode: 'or',
371
+ * });
372
+ * await filter.initialize();
373
+ *
374
+ * const result = await filter.checkProfanityAsync('some text');
375
+ * if (result.isToxic) {
376
+ * console.log('Reason:', result.reason);
377
+ * console.log('Confidence:', result.confidence);
378
+ * }
379
+ * ```
380
+ */
381
+ async checkProfanityAsync(text) {
382
+ const ruleResult = this.ruleFilter.checkProfanity(text);
383
+ let mlResult = null;
384
+ if (this.mlDetector) {
385
+ try {
386
+ mlResult = await this.mlDetector.analyze(text);
387
+ } catch (error) {
388
+ console.warn("[glin-profanity] ML analysis failed:", error);
389
+ }
390
+ }
391
+ const { isToxic, confidence, reason } = this.combineResults(
392
+ ruleResult,
393
+ mlResult
394
+ );
395
+ return {
396
+ ruleBasedResult: {
397
+ containsProfanity: ruleResult.containsProfanity,
398
+ profaneWords: ruleResult.profaneWords
399
+ },
400
+ mlResult,
401
+ isToxic,
402
+ confidence,
403
+ reason
404
+ };
405
+ }
406
+ /**
407
+ * Simple async boolean check for toxicity.
408
+ *
409
+ * @param text - Text to check
410
+ * @returns True if toxic
411
+ */
412
+ async isToxicAsync(text) {
413
+ const result = await this.checkProfanityAsync(text);
414
+ return result.isToxic;
415
+ }
416
+ /**
417
+ * Analyzes text with ML only (if available).
418
+ *
419
+ * @param text - Text to analyze
420
+ * @returns ML analysis result or null if ML not available
421
+ */
422
+ async analyzeWithML(text) {
423
+ if (!this.mlDetector) {
424
+ return null;
425
+ }
426
+ return this.mlDetector.analyze(text);
427
+ }
428
+ /**
429
+ * Batch analysis for multiple texts.
430
+ *
431
+ * @param texts - Array of texts to analyze
432
+ * @returns Array of hybrid analysis results
433
+ */
434
+ async checkProfanityBatchAsync(texts) {
435
+ const ruleResults = texts.map((text) => this.ruleFilter.checkProfanity(text));
436
+ let mlResults = null;
437
+ if (this.mlDetector) {
438
+ try {
439
+ mlResults = await this.mlDetector.analyzeBatch(texts);
440
+ } catch (error) {
441
+ console.warn("[glin-profanity] ML batch analysis failed:", error);
442
+ }
443
+ }
444
+ return texts.map((_, i) => {
445
+ const ruleResult = ruleResults[i];
446
+ const mlResult = mlResults?.[i] ?? null;
447
+ const { isToxic, confidence, reason } = this.combineResults(
448
+ ruleResult,
449
+ mlResult
450
+ );
451
+ return {
452
+ ruleBasedResult: {
453
+ containsProfanity: ruleResult.containsProfanity,
454
+ profaneWords: ruleResult.profaneWords
455
+ },
456
+ mlResult,
457
+ isToxic,
458
+ confidence,
459
+ reason
460
+ };
461
+ });
462
+ }
463
+ combineResults(ruleResult, mlResult) {
464
+ const ruleDetected = ruleResult.containsProfanity;
465
+ const mlDetected = mlResult?.isToxic ?? false;
466
+ const mlScore = mlResult?.overallScore ?? 0;
467
+ switch (this.config.combinationMode) {
468
+ case "and":
469
+ if (mlResult === null) {
470
+ return {
471
+ isToxic: ruleDetected,
472
+ confidence: ruleDetected ? 0.7 : 0.9,
473
+ reason: ruleDetected ? `Rule-based detection (ML unavailable): ${ruleResult.profaneWords.join(", ")}` : "No profanity detected (rule-based only)"
474
+ };
475
+ }
476
+ return {
477
+ isToxic: ruleDetected && mlDetected,
478
+ confidence: Math.min(ruleDetected ? 0.9 : 0.5, mlScore),
479
+ reason: ruleDetected && mlDetected ? `Both rule-based and ML detected toxicity: ${ruleResult.profaneWords.join(", ")} (ML: ${mlResult.matchedCategories.join(", ")})` : `Detection disagreement - Rule: ${ruleDetected}, ML: ${mlDetected}`
480
+ };
481
+ case "ml-override":
482
+ if (mlResult === null) {
483
+ return {
484
+ isToxic: ruleDetected,
485
+ confidence: ruleDetected ? 0.7 : 0.8,
486
+ reason: ruleDetected ? `Rule-based detection: ${ruleResult.profaneWords.join(", ")}` : "No profanity detected (rule-based)"
487
+ };
488
+ }
489
+ return {
490
+ isToxic: mlDetected,
491
+ confidence: mlScore,
492
+ reason: mlDetected ? `ML detected toxicity: ${mlResult.matchedCategories.join(", ")}` : "ML analysis: no toxicity detected"
493
+ };
494
+ case "rules-first":
495
+ if (ruleDetected) {
496
+ return {
497
+ isToxic: true,
498
+ confidence: mlResult ? Math.max(0.8, mlScore) : 0.8,
499
+ reason: `Rule-based detection: ${ruleResult.profaneWords.join(", ")}${mlDetected ? ` (confirmed by ML: ${mlResult?.matchedCategories.join(", ")})` : ""}`
500
+ };
501
+ }
502
+ if (mlResult && mlScore >= this.config.borderlineThreshold) {
503
+ return {
504
+ isToxic: mlDetected,
505
+ confidence: mlScore,
506
+ reason: mlDetected ? `ML detected (rules missed): ${mlResult.matchedCategories.join(", ")}` : "Clean text (verified by ML)"
507
+ };
508
+ }
509
+ return {
510
+ isToxic: false,
511
+ confidence: 0.85,
512
+ reason: "No profanity detected (rule-based)"
513
+ };
514
+ case "or":
515
+ default:
516
+ const isToxic = ruleDetected || mlDetected;
517
+ let reason;
518
+ let confidence;
519
+ if (ruleDetected && mlDetected) {
520
+ reason = `Both detected: rules (${ruleResult.profaneWords.join(", ")}), ML (${mlResult?.matchedCategories.join(", ")})`;
521
+ confidence = Math.max(0.95, mlScore);
522
+ } else if (ruleDetected) {
523
+ reason = `Rule-based detection: ${ruleResult.profaneWords.join(", ")}`;
524
+ confidence = 0.85;
525
+ } else if (mlDetected) {
526
+ reason = `ML detected: ${mlResult?.matchedCategories.join(", ")}`;
527
+ confidence = mlScore;
528
+ } else {
529
+ reason = "No toxicity detected";
530
+ confidence = mlResult ? 1 - mlScore : 0.8;
531
+ }
532
+ return { isToxic, confidence, reason };
533
+ }
534
+ }
535
+ /**
536
+ * Gets the underlying rule-based filter.
537
+ */
538
+ getRuleFilter() {
539
+ return this.ruleFilter;
540
+ }
541
+ /**
542
+ * Gets the underlying ML detector (if enabled).
543
+ */
544
+ getMLDetector() {
545
+ return this.mlDetector;
546
+ }
547
+ /**
548
+ * Disposes of resources (ML model).
549
+ */
550
+ dispose() {
551
+ this.mlDetector?.dispose();
552
+ }
553
+ };
554
+ export {
555
+ HybridFilter,
556
+ ToxicityDetector
557
+ };