npm - glin-profanity - Versions diffs - 2.3.7 → 3.0.1 - Mend

glin-profanity 2.3.7 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +197 -0
package/dist/chunk-KNHWF6MX.js +5050 -0
package/dist/index.cjs +2041 -272
package/dist/index.d.cts +252 -87
package/dist/index.d.ts +252 -87
package/dist/index.js +50 -3306
package/dist/ml/index.cjs +5513 -0
package/dist/ml/index.d.cts +357 -0
package/dist/ml/index.d.ts +357 -0
package/dist/ml/index.js +557 -0
package/dist/types-BgQe4FSE.d.cts +350 -0
package/dist/types-BgQe4FSE.d.ts +350 -0
package/package.json +38 -3

package/dist/ml/index.js ADDED Viewed

@@ -0,0 +1,557 @@
+import {
+  Filter
+} from "../chunk-KNHWF6MX.js";
+// src/ml/ToxicityDetector.ts
+var _ToxicityDetector = class _ToxicityDetector {
+  /**
+   * Creates a new ToxicityDetector instance.
+   *
+   * @param config - Configuration options
+   *
+   * @example
+   * ```typescript
+   * // Basic usage with default threshold (0.85)
+   * const detector = new ToxicityDetector();
+   *
+   * // Custom threshold for higher precision
+   * const strictDetector = new ToxicityDetector({ threshold: 0.95 });
+   *
+   * // Check only specific categories
+   * const customDetector = new ToxicityDetector({
+   *   threshold: 0.8,
+   *   labels: ['insult', 'threat', 'obscene'],
+   * });
+   * ```
+   */
+  constructor(config = {}) {
+    this.model = null;
+    this.loadingPromise = null;
+    this.isAvailable = null;
+    this.config = {
+      threshold: config.threshold ?? 0.85,
+      labels: config.labels ?? _ToxicityDetector.ALL_LABELS,
+      preloadModel: config.preloadModel ?? false
+    };
+    if (this.config.preloadModel) {
+      this.loadModel().catch(() => {
+      });
+    }
+  }
+  /**
+   * Dynamic import wrapper to avoid TypeScript static analysis issues.
+   * Uses Function constructor to bypass module resolution at compile time.
+   * @internal
+   */
+  dynamicImport(moduleName) {
+    return new Function("m", "return import(m)")(moduleName);
+  }
+  /**
+   * Checks if TensorFlow.js and the toxicity model are available.
+   * This performs a lazy check on first call and caches the result.
+   *
+   * @returns True if ML dependencies are available
+   */
+  async checkAvailability() {
+    if (this.isAvailable !== null) {
+      return this.isAvailable;
+    }
+    try {
+      await this.dynamicImport("@tensorflow/tfjs");
+      await this.dynamicImport("@tensorflow-models/toxicity");
+      this.isAvailable = true;
+    } catch {
+      this.isAvailable = false;
+    }
+    return this.isAvailable;
+  }
+  /**
+   * Loads the toxicity model.
+   * This is called automatically on first analyze() call if not called explicitly.
+   *
+   * @returns The loaded model
+   * @throws Error if TensorFlow.js dependencies are not installed
+   *
+   * @example
+   * ```typescript
+   * const detector = new ToxicityDetector();
+   *
+   * // Explicitly preload model (optional)
+   * await detector.loadModel();
+   *
+   * // Or let it load automatically on first use
+   * const result = await detector.analyze('text');
+   * ```
+   */
+  async loadModel() {
+    if (this.model) {
+      return this.model;
+    }
+    if (this.loadingPromise) {
+      return this.loadingPromise;
+    }
+    this.loadingPromise = this.doLoadModel();
+    try {
+      this.model = await this.loadingPromise;
+      return this.model;
+    } finally {
+      this.loadingPromise = null;
+    }
+  }
+  async doLoadModel() {
+    try {
+      const toxicityModule = await this.dynamicImport(
+        "@tensorflow-models/toxicity"
+      );
+      const loadFn = toxicityModule.load;
+      const model = await loadFn(
+        this.config.threshold,
+        this.config.labels
+      );
+      return model;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : "Unknown error";
+      if (message.includes("Cannot find module") || message.includes("MODULE_NOT_FOUND")) {
+        throw new Error(
+          "TensorFlow.js dependencies not installed. Install with: npm install @tensorflow/tfjs @tensorflow-models/toxicity"
+        );
+      }
+      throw new Error(`Failed to load toxicity model: ${message}`);
+    }
+  }
+  /**
+   * Analyzes text for toxicity using the ML model.
+   *
+   * @param text - Text to analyze
+   * @returns Analysis result with predictions and scores
+   *
+   * @example
+   * ```typescript
+   * const detector = new ToxicityDetector();
+   * const result = await detector.analyze('you are stupid');
+   *
+   * console.log(result.isToxic);           // true
+   * console.log(result.overallScore);      // 0.92
+   * console.log(result.matchedCategories); // ['insult', 'toxicity']
+   * ```
+   */
+  async analyze(text) {
+    const startTime = performance.now();
+    const model = await this.loadModel();
+    const predictions = await model.classify([text]);
+    const processedPredictions = predictions.map(
+      (pred) => ({
+        label: pred.label,
+        match: pred.results[0].match,
+        probabilities: [
+          pred.results[0].probabilities[0],
+          pred.results[0].probabilities[1]
+        ]
+      })
+    );
+    const matchedCategories = processedPredictions.filter((p) => p.match === true).map((p) => p.label);
+    const overallScore = Math.max(
+      ...processedPredictions.map((p) => p.probabilities[1])
+    );
+    const processingTimeMs = performance.now() - startTime;
+    return {
+      isToxic: matchedCategories.length > 0,
+      overallScore,
+      predictions: processedPredictions,
+      matchedCategories,
+      processingTimeMs
+    };
+  }
+  /**
+   * Analyzes multiple texts in a batch for better performance.
+   *
+   * @param texts - Array of texts to analyze
+   * @returns Array of analysis results
+   *
+   * @example
+   * ```typescript
+   * const detector = new ToxicityDetector();
+   * const results = await detector.analyzeBatch([
+   *   'hello friend',
+   *   'you are terrible',
+   *   'great work!',
+   * ]);
+   *
+   * results.forEach((result, i) => {
+   *   console.log(`Text ${i}: ${result.isToxic ? 'toxic' : 'clean'}`);
+   * });
+   * ```
+   */
+  async analyzeBatch(texts) {
+    if (texts.length === 0) {
+      return [];
+    }
+    const startTime = performance.now();
+    const model = await this.loadModel();
+    const predictions = await model.classify(texts);
+    const totalTimeMs = performance.now() - startTime;
+    const perTextTimeMs = totalTimeMs / texts.length;
+    return texts.map((_, textIndex) => {
+      const processedPredictions = predictions.map(
+        (pred) => ({
+          label: pred.label,
+          match: pred.results[textIndex].match,
+          probabilities: [
+            pred.results[textIndex].probabilities[0],
+            pred.results[textIndex].probabilities[1]
+          ]
+        })
+      );
+      const matchedCategories = processedPredictions.filter((p) => p.match === true).map((p) => p.label);
+      const overallScore = Math.max(
+        ...processedPredictions.map((p) => p.probabilities[1])
+      );
+      return {
+        isToxic: matchedCategories.length > 0,
+        overallScore,
+        predictions: processedPredictions,
+        matchedCategories,
+        processingTimeMs: perTextTimeMs
+      };
+    });
+  }
+  /**
+   * Simple boolean check for toxicity.
+   *
+   * @param text - Text to check
+   * @returns True if text is detected as toxic
+   *
+   * @example
+   * ```typescript
+   * const detector = new ToxicityDetector();
+   *
+   * if (await detector.isToxic('some user input')) {
+   *   console.log('Content flagged as toxic');
+   * }
+   * ```
+   */
+  async isToxic(text) {
+    const result = await this.analyze(text);
+    return result.isToxic;
+  }
+  /**
+   * Gets the toxicity score for text (0-1).
+   *
+   * @param text - Text to score
+   * @returns Toxicity score from 0 (clean) to 1 (highly toxic)
+   */
+  async getScore(text) {
+    const result = await this.analyze(text);
+    return result.overallScore;
+  }
+  /**
+   * Disposes of the model to free memory.
+   * The model will be reloaded on next analyze() call.
+   */
+  dispose() {
+    this.model = null;
+    this.loadingPromise = null;
+  }
+  /**
+   * Gets the current configuration.
+   */
+  getConfig() {
+    return { ...this.config };
+  }
+  /**
+   * Checks if the model is currently loaded.
+   */
+  isModelLoaded() {
+    return this.model !== null;
+  }
+};
+/**
+ * All available toxicity labels.
+ */
+_ToxicityDetector.ALL_LABELS = [
+  "identity_attack",
+  "insult",
+  "obscene",
+  "severe_toxicity",
+  "sexual_explicit",
+  "threat",
+  "toxicity"
+];
+var ToxicityDetector = _ToxicityDetector;
+// src/ml/HybridFilter.ts
+var HybridFilter = class {
+  /**
+   * Creates a new HybridFilter instance.
+   *
+   * @param config - Configuration options
+   */
+  constructor(config = {}) {
+    this.mlDetector = null;
+    this.mlInitialized = false;
+    const {
+      enableML = false,
+      mlThreshold = 0.85,
+      mlLabels,
+      preloadML = false,
+      combinationMode = "or",
+      borderlineThreshold = 0.5,
+      ...filterConfig
+    } = config;
+    this.config = {
+      enableML,
+      mlThreshold,
+      mlLabels,
+      preloadML,
+      combinationMode,
+      borderlineThreshold
+    };
+    this.ruleFilter = new Filter(filterConfig);
+    if (enableML) {
+      this.mlDetector = new ToxicityDetector({
+        threshold: mlThreshold,
+        labels: mlLabels,
+        preloadModel: preloadML
+      });
+    }
+  }
+  /**
+   * Initializes the hybrid filter, loading the ML model if enabled.
+   * Call this before using async methods for best performance.
+   *
+   * @example
+   * ```typescript
+   * const filter = new HybridFilter({ enableML: true });
+   * await filter.initialize();
+   * // Now ready for fast async checks
+   * ```
+   */
+  async initialize() {
+    if (this.mlDetector && !this.mlInitialized) {
+      await this.mlDetector.loadModel();
+      this.mlInitialized = true;
+    }
+  }
+  /**
+   * Checks if ML is available and initialized.
+   */
+  isMLReady() {
+    return this.mlDetector?.isModelLoaded() ?? false;
+  }
+  /**
+   * Synchronous profanity check using only rule-based detection.
+   * Use this for fast, synchronous checks when ML isn't needed.
+   *
+   * @param text - Text to check
+   * @returns True if profanity detected
+   */
+  isProfane(text) {
+    return this.ruleFilter.isProfane(text);
+  }
+  /**
+   * Synchronous detailed check using only rule-based detection.
+   *
+   * @param text - Text to check
+   * @returns Detailed profanity check result
+   */
+  checkProfanity(text) {
+    return this.ruleFilter.checkProfanity(text);
+  }
+  /**
+   * Async profanity check using both rule-based and ML detection.
+   *
+   * @param text - Text to check
+   * @returns Combined analysis result
+   *
+   * @example
+   * ```typescript
+   * const filter = new HybridFilter({
+   *   enableML: true,
+   *   combinationMode: 'or',
+   * });
+   * await filter.initialize();
+   *
+   * const result = await filter.checkProfanityAsync('some text');
+   * if (result.isToxic) {
+   *   console.log('Reason:', result.reason);
+   *   console.log('Confidence:', result.confidence);
+   * }
+   * ```
+   */
+  async checkProfanityAsync(text) {
+    const ruleResult = this.ruleFilter.checkProfanity(text);
+    let mlResult = null;
+    if (this.mlDetector) {
+      try {
+        mlResult = await this.mlDetector.analyze(text);
+      } catch (error) {
+        console.warn("[glin-profanity] ML analysis failed:", error);
+      }
+    }
+    const { isToxic, confidence, reason } = this.combineResults(
+      ruleResult,
+      mlResult
+    );
+    return {
+      ruleBasedResult: {
+        containsProfanity: ruleResult.containsProfanity,
+        profaneWords: ruleResult.profaneWords
+      },
+      mlResult,
+      isToxic,
+      confidence,
+      reason
+    };
+  }
+  /**
+   * Simple async boolean check for toxicity.
+   *
+   * @param text - Text to check
+   * @returns True if toxic
+   */
+  async isToxicAsync(text) {
+    const result = await this.checkProfanityAsync(text);
+    return result.isToxic;
+  }
+  /**
+   * Analyzes text with ML only (if available).
+   *
+   * @param text - Text to analyze
+   * @returns ML analysis result or null if ML not available
+   */
+  async analyzeWithML(text) {
+    if (!this.mlDetector) {
+      return null;
+    }
+    return this.mlDetector.analyze(text);
+  }
+  /**
+   * Batch analysis for multiple texts.
+   *
+   * @param texts - Array of texts to analyze
+   * @returns Array of hybrid analysis results
+   */
+  async checkProfanityBatchAsync(texts) {
+    const ruleResults = texts.map((text) => this.ruleFilter.checkProfanity(text));
+    let mlResults = null;
+    if (this.mlDetector) {
+      try {
+        mlResults = await this.mlDetector.analyzeBatch(texts);
+      } catch (error) {
+        console.warn("[glin-profanity] ML batch analysis failed:", error);
+      }
+    }
+    return texts.map((_, i) => {
+      const ruleResult = ruleResults[i];
+      const mlResult = mlResults?.[i] ?? null;
+      const { isToxic, confidence, reason } = this.combineResults(
+        ruleResult,
+        mlResult
+      );
+      return {
+        ruleBasedResult: {
+          containsProfanity: ruleResult.containsProfanity,
+          profaneWords: ruleResult.profaneWords
+        },
+        mlResult,
+        isToxic,
+        confidence,
+        reason
+      };
+    });
+  }
+  combineResults(ruleResult, mlResult) {
+    const ruleDetected = ruleResult.containsProfanity;
+    const mlDetected = mlResult?.isToxic ?? false;
+    const mlScore = mlResult?.overallScore ?? 0;
+    switch (this.config.combinationMode) {
+      case "and":
+        if (mlResult === null) {
+          return {
+            isToxic: ruleDetected,
+            confidence: ruleDetected ? 0.7 : 0.9,
+            reason: ruleDetected ? `Rule-based detection (ML unavailable): ${ruleResult.profaneWords.join(", ")}` : "No profanity detected (rule-based only)"
+          };
+        }
+        return {
+          isToxic: ruleDetected && mlDetected,
+          confidence: Math.min(ruleDetected ? 0.9 : 0.5, mlScore),
+          reason: ruleDetected && mlDetected ? `Both rule-based and ML detected toxicity: ${ruleResult.profaneWords.join(", ")} (ML: ${mlResult.matchedCategories.join(", ")})` : `Detection disagreement - Rule: ${ruleDetected}, ML: ${mlDetected}`
+        };
+      case "ml-override":
+        if (mlResult === null) {
+          return {
+            isToxic: ruleDetected,
+            confidence: ruleDetected ? 0.7 : 0.8,
+            reason: ruleDetected ? `Rule-based detection: ${ruleResult.profaneWords.join(", ")}` : "No profanity detected (rule-based)"
+          };
+        }
+        return {
+          isToxic: mlDetected,
+          confidence: mlScore,
+          reason: mlDetected ? `ML detected toxicity: ${mlResult.matchedCategories.join(", ")}` : "ML analysis: no toxicity detected"
+        };
+      case "rules-first":
+        if (ruleDetected) {
+          return {
+            isToxic: true,
+            confidence: mlResult ? Math.max(0.8, mlScore) : 0.8,
+            reason: `Rule-based detection: ${ruleResult.profaneWords.join(", ")}${mlDetected ? ` (confirmed by ML: ${mlResult?.matchedCategories.join(", ")})` : ""}`
+          };
+        }
+        if (mlResult && mlScore >= this.config.borderlineThreshold) {
+          return {
+            isToxic: mlDetected,
+            confidence: mlScore,
+            reason: mlDetected ? `ML detected (rules missed): ${mlResult.matchedCategories.join(", ")}` : "Clean text (verified by ML)"
+          };
+        }
+        return {
+          isToxic: false,
+          confidence: 0.85,
+          reason: "No profanity detected (rule-based)"
+        };
+      case "or":
+      default:
+        const isToxic = ruleDetected || mlDetected;
+        let reason;
+        let confidence;
+        if (ruleDetected && mlDetected) {
+          reason = `Both detected: rules (${ruleResult.profaneWords.join(", ")}), ML (${mlResult?.matchedCategories.join(", ")})`;
+          confidence = Math.max(0.95, mlScore);
+        } else if (ruleDetected) {
+          reason = `Rule-based detection: ${ruleResult.profaneWords.join(", ")}`;
+          confidence = 0.85;
+        } else if (mlDetected) {
+          reason = `ML detected: ${mlResult?.matchedCategories.join(", ")}`;
+          confidence = mlScore;
+        } else {
+          reason = "No toxicity detected";
+          confidence = mlResult ? 1 - mlScore : 0.8;
+        }
+        return { isToxic, confidence, reason };
+    }
+  }
+  /**
+   * Gets the underlying rule-based filter.
+   */
+  getRuleFilter() {
+    return this.ruleFilter;
+  }
+  /**
+   * Gets the underlying ML detector (if enabled).
+   */
+  getMLDetector() {
+    return this.mlDetector;
+  }
+  /**
+   * Disposes of resources (ML model).
+   */
+  dispose() {
+    this.mlDetector?.dispose();
+  }
+};
+export {
+  HybridFilter,
+  ToxicityDetector
+};