npm - glin-profanity - Versions diffs - 2.3.8 → 3.1.0 - Mend

glin-profanity 2.3.8 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +197 -0
package/dist/chunk-KNHWF6MX.js +5050 -0
package/dist/index.cjs +2041 -272
package/dist/index.d.cts +252 -87
package/dist/index.d.ts +252 -87
package/dist/index.js +50 -3306
package/dist/ml/index.cjs +5513 -0
package/dist/ml/index.d.cts +357 -0
package/dist/ml/index.d.ts +357 -0
package/dist/ml/index.js +557 -0
package/dist/types-BgQe4FSE.d.cts +350 -0
package/dist/types-BgQe4FSE.d.ts +350 -0
package/package.json +38 -3

package/dist/ml/index.d.cts ADDED Viewed

@@ -0,0 +1,357 @@
+import { T as ToxicityLabel, f as MLDetectorConfig, e as MLAnalysisResult, a as FilterConfig, C as CheckProfanityResult, H as HybridAnalysisResult, F as Filter } from '../types-BgQe4FSE.cjs';
+export { d as ToxicityPrediction } from '../types-BgQe4FSE.cjs';
+/**
+ * ML-based toxicity detection using TensorFlow.js.
+ *
+ * This module provides optional ML-based profanity/toxicity detection
+ * using the TensorFlow.js toxicity model trained on the civil comments dataset.
+ *
+ * IMPORTANT: This requires optional peer dependencies:
+ * - @tensorflow/tfjs
+ * - @tensorflow-models/toxicity
+ *
+ * Install with: npm install @tensorflow/tfjs @tensorflow-models/toxicity
+ *
+ * @example
+ * ```typescript
+ * import { ToxicityDetector } from 'glin-profanity/ml';
+ *
+ * const detector = new ToxicityDetector({ threshold: 0.9 });
+ * await detector.loadModel();
+ *
+ * const result = await detector.analyze('some text to check');
+ * console.log(result.isToxic);
+ * ```
+ */
+interface ToxicityModelPrediction {
+    label: string;
+    results: Array<{
+        match: boolean | null;
+        probabilities: Float32Array | number[];
+    }>;
+}
+interface ToxicityModel {
+    classify(sentences: string[]): Promise<ToxicityModelPrediction[]>;
+}
+/**
+ * ML-based toxicity detector using TensorFlow.js.
+ *
+ * This class provides neural network-based toxicity detection that can
+ * identify various types of harmful content including insults, threats,
+ * identity attacks, and obscenity.
+ *
+ * The model is loaded lazily and cached for subsequent calls.
+ */
+declare class ToxicityDetector {
+    private model;
+    private loadingPromise;
+    private config;
+    private isAvailable;
+    /**
+     * All available toxicity labels.
+     */
+    static readonly ALL_LABELS: ToxicityLabel[];
+    /**
+     * Creates a new ToxicityDetector instance.
+     *
+     * @param config - Configuration options
+     *
+     * @example
+     * ```typescript
+     * // Basic usage with default threshold (0.85)
+     * const detector = new ToxicityDetector();
+     *
+     * // Custom threshold for higher precision
+     * const strictDetector = new ToxicityDetector({ threshold: 0.95 });
+     *
+     * // Check only specific categories
+     * const customDetector = new ToxicityDetector({
+     *   threshold: 0.8,
+     *   labels: ['insult', 'threat', 'obscene'],
+     * });
+     * ```
+     */
+    constructor(config?: MLDetectorConfig);
+    /**
+     * Dynamic import wrapper to avoid TypeScript static analysis issues.
+     * Uses Function constructor to bypass module resolution at compile time.
+     * @internal
+     */
+    private dynamicImport;
+    /**
+     * Checks if TensorFlow.js and the toxicity model are available.
+     * This performs a lazy check on first call and caches the result.
+     *
+     * @returns True if ML dependencies are available
+     */
+    checkAvailability(): Promise<boolean>;
+    /**
+     * Loads the toxicity model.
+     * This is called automatically on first analyze() call if not called explicitly.
+     *
+     * @returns The loaded model
+     * @throws Error if TensorFlow.js dependencies are not installed
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     *
+     * // Explicitly preload model (optional)
+     * await detector.loadModel();
+     *
+     * // Or let it load automatically on first use
+     * const result = await detector.analyze('text');
+     * ```
+     */
+    loadModel(): Promise<ToxicityModel>;
+    private doLoadModel;
+    /**
+     * Analyzes text for toxicity using the ML model.
+     *
+     * @param text - Text to analyze
+     * @returns Analysis result with predictions and scores
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     * const result = await detector.analyze('you are stupid');
+     *
+     * console.log(result.isToxic);           // true
+     * console.log(result.overallScore);      // 0.92
+     * console.log(result.matchedCategories); // ['insult', 'toxicity']
+     * ```
+     */
+    analyze(text: string): Promise<MLAnalysisResult>;
+    /**
+     * Analyzes multiple texts in a batch for better performance.
+     *
+     * @param texts - Array of texts to analyze
+     * @returns Array of analysis results
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     * const results = await detector.analyzeBatch([
+     *   'hello friend',
+     *   'you are terrible',
+     *   'great work!',
+     * ]);
+     *
+     * results.forEach((result, i) => {
+     *   console.log(`Text ${i}: ${result.isToxic ? 'toxic' : 'clean'}`);
+     * });
+     * ```
+     */
+    analyzeBatch(texts: string[]): Promise<MLAnalysisResult[]>;
+    /**
+     * Simple boolean check for toxicity.
+     *
+     * @param text - Text to check
+     * @returns True if text is detected as toxic
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     *
+     * if (await detector.isToxic('some user input')) {
+     *   console.log('Content flagged as toxic');
+     * }
+     * ```
+     */
+    isToxic(text: string): Promise<boolean>;
+    /**
+     * Gets the toxicity score for text (0-1).
+     *
+     * @param text - Text to score
+     * @returns Toxicity score from 0 (clean) to 1 (highly toxic)
+     */
+    getScore(text: string): Promise<number>;
+    /**
+     * Disposes of the model to free memory.
+     * The model will be reloaded on next analyze() call.
+     */
+    dispose(): void;
+    /**
+     * Gets the current configuration.
+     */
+    getConfig(): Required<MLDetectorConfig>;
+    /**
+     * Checks if the model is currently loaded.
+     */
+    isModelLoaded(): boolean;
+}
+/**
+ * Hybrid filter combining rule-based and ML-based detection.
+ *
+ * This class provides the best of both worlds:
+ * - Fast rule-based detection for common profanity
+ * - ML-based detection for contextual toxicity
+ *
+ * @example
+ * ```typescript
+ * import { HybridFilter } from 'glin-profanity/ml';
+ *
+ * const filter = new HybridFilter({
+ *   // Rule-based config
+ *   languages: ['english'],
+ *   detectLeetspeak: true,
+ *   // ML config
+ *   enableML: true,
+ *   mlThreshold: 0.85,
+ * });
+ *
+ * await filter.initialize();
+ *
+ * const result = await filter.checkProfanityAsync('some text');
+ * console.log(result.isToxic);
+ * ```
+ */
+/**
+ * Hybrid filter configuration.
+ */
+interface HybridFilterConfig extends FilterConfig {
+    /**
+     * Enable ML-based detection.
+     * Requires @tensorflow/tfjs and @tensorflow-models/toxicity.
+     * @default false
+     */
+    enableML?: boolean;
+    /**
+     * ML confidence threshold.
+     * @default 0.85
+     */
+    mlThreshold?: number;
+    /**
+     * Specific ML toxicity categories to check.
+     */
+    mlLabels?: ToxicityLabel[];
+    /**
+     * Preload ML model on initialization.
+     * @default false
+     */
+    preloadML?: boolean;
+    /**
+     * How to combine rule-based and ML results.
+     * - 'or': Flag if either method detects toxicity (more sensitive)
+     * - 'and': Flag only if both methods detect toxicity (more precise)
+     * - 'ml-override': Use ML result if available, fallback to rules
+     * - 'rules-first': Use rules for speed, ML for borderline cases
+     * @default 'or'
+     */
+    combinationMode?: 'or' | 'and' | 'ml-override' | 'rules-first';
+    /**
+     * Score threshold for "borderline" cases in rules-first mode.
+     * If rule-based detection is uncertain (near this threshold),
+     * ML will be used for confirmation.
+     * @default 0.5
+     */
+    borderlineThreshold?: number;
+}
+/**
+ * Hybrid profanity filter combining rule-based and ML detection.
+ */
+declare class HybridFilter {
+    private ruleFilter;
+    private mlDetector;
+    private config;
+    private mlInitialized;
+    /**
+     * Creates a new HybridFilter instance.
+     *
+     * @param config - Configuration options
+     */
+    constructor(config?: HybridFilterConfig);
+    /**
+     * Initializes the hybrid filter, loading the ML model if enabled.
+     * Call this before using async methods for best performance.
+     *
+     * @example
+     * ```typescript
+     * const filter = new HybridFilter({ enableML: true });
+     * await filter.initialize();
+     * // Now ready for fast async checks
+     * ```
+     */
+    initialize(): Promise<void>;
+    /**
+     * Checks if ML is available and initialized.
+     */
+    isMLReady(): boolean;
+    /**
+     * Synchronous profanity check using only rule-based detection.
+     * Use this for fast, synchronous checks when ML isn't needed.
+     *
+     * @param text - Text to check
+     * @returns True if profanity detected
+     */
+    isProfane(text: string): boolean;
+    /**
+     * Synchronous detailed check using only rule-based detection.
+     *
+     * @param text - Text to check
+     * @returns Detailed profanity check result
+     */
+    checkProfanity(text: string): CheckProfanityResult;
+    /**
+     * Async profanity check using both rule-based and ML detection.
+     *
+     * @param text - Text to check
+     * @returns Combined analysis result
+     *
+     * @example
+     * ```typescript
+     * const filter = new HybridFilter({
+     *   enableML: true,
+     *   combinationMode: 'or',
+     * });
+     * await filter.initialize();
+     *
+     * const result = await filter.checkProfanityAsync('some text');
+     * if (result.isToxic) {
+     *   console.log('Reason:', result.reason);
+     *   console.log('Confidence:', result.confidence);
+     * }
+     * ```
+     */
+    checkProfanityAsync(text: string): Promise<HybridAnalysisResult>;
+    /**
+     * Simple async boolean check for toxicity.
+     *
+     * @param text - Text to check
+     * @returns True if toxic
+     */
+    isToxicAsync(text: string): Promise<boolean>;
+    /**
+     * Analyzes text with ML only (if available).
+     *
+     * @param text - Text to analyze
+     * @returns ML analysis result or null if ML not available
+     */
+    analyzeWithML(text: string): Promise<MLAnalysisResult | null>;
+    /**
+     * Batch analysis for multiple texts.
+     *
+     * @param texts - Array of texts to analyze
+     * @returns Array of hybrid analysis results
+     */
+    checkProfanityBatchAsync(texts: string[]): Promise<HybridAnalysisResult[]>;
+    private combineResults;
+    /**
+     * Gets the underlying rule-based filter.
+     */
+    getRuleFilter(): Filter;
+    /**
+     * Gets the underlying ML detector (if enabled).
+     */
+    getMLDetector(): ToxicityDetector | null;
+    /**
+     * Disposes of resources (ML model).
+     */
+    dispose(): void;
+}
+export { HybridAnalysisResult, HybridFilter, type HybridFilterConfig, MLAnalysisResult, MLDetectorConfig, ToxicityDetector, ToxicityLabel };

package/dist/ml/index.d.ts ADDED Viewed

@@ -0,0 +1,357 @@
+import { T as ToxicityLabel, f as MLDetectorConfig, e as MLAnalysisResult, a as FilterConfig, C as CheckProfanityResult, H as HybridAnalysisResult, F as Filter } from '../types-BgQe4FSE.js';
+export { d as ToxicityPrediction } from '../types-BgQe4FSE.js';
+/**
+ * ML-based toxicity detection using TensorFlow.js.
+ *
+ * This module provides optional ML-based profanity/toxicity detection
+ * using the TensorFlow.js toxicity model trained on the civil comments dataset.
+ *
+ * IMPORTANT: This requires optional peer dependencies:
+ * - @tensorflow/tfjs
+ * - @tensorflow-models/toxicity
+ *
+ * Install with: npm install @tensorflow/tfjs @tensorflow-models/toxicity
+ *
+ * @example
+ * ```typescript
+ * import { ToxicityDetector } from 'glin-profanity/ml';
+ *
+ * const detector = new ToxicityDetector({ threshold: 0.9 });
+ * await detector.loadModel();
+ *
+ * const result = await detector.analyze('some text to check');
+ * console.log(result.isToxic);
+ * ```
+ */
+interface ToxicityModelPrediction {
+    label: string;
+    results: Array<{
+        match: boolean | null;
+        probabilities: Float32Array | number[];
+    }>;
+}
+interface ToxicityModel {
+    classify(sentences: string[]): Promise<ToxicityModelPrediction[]>;
+}
+/**
+ * ML-based toxicity detector using TensorFlow.js.
+ *
+ * This class provides neural network-based toxicity detection that can
+ * identify various types of harmful content including insults, threats,
+ * identity attacks, and obscenity.
+ *
+ * The model is loaded lazily and cached for subsequent calls.
+ */
+declare class ToxicityDetector {
+    private model;
+    private loadingPromise;
+    private config;
+    private isAvailable;
+    /**
+     * All available toxicity labels.
+     */
+    static readonly ALL_LABELS: ToxicityLabel[];
+    /**
+     * Creates a new ToxicityDetector instance.
+     *
+     * @param config - Configuration options
+     *
+     * @example
+     * ```typescript
+     * // Basic usage with default threshold (0.85)
+     * const detector = new ToxicityDetector();
+     *
+     * // Custom threshold for higher precision
+     * const strictDetector = new ToxicityDetector({ threshold: 0.95 });
+     *
+     * // Check only specific categories
+     * const customDetector = new ToxicityDetector({
+     *   threshold: 0.8,
+     *   labels: ['insult', 'threat', 'obscene'],
+     * });
+     * ```
+     */
+    constructor(config?: MLDetectorConfig);
+    /**
+     * Dynamic import wrapper to avoid TypeScript static analysis issues.
+     * Uses Function constructor to bypass module resolution at compile time.
+     * @internal
+     */
+    private dynamicImport;
+    /**
+     * Checks if TensorFlow.js and the toxicity model are available.
+     * This performs a lazy check on first call and caches the result.
+     *
+     * @returns True if ML dependencies are available
+     */
+    checkAvailability(): Promise<boolean>;
+    /**
+     * Loads the toxicity model.
+     * This is called automatically on first analyze() call if not called explicitly.
+     *
+     * @returns The loaded model
+     * @throws Error if TensorFlow.js dependencies are not installed
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     *
+     * // Explicitly preload model (optional)
+     * await detector.loadModel();
+     *
+     * // Or let it load automatically on first use
+     * const result = await detector.analyze('text');
+     * ```
+     */
+    loadModel(): Promise<ToxicityModel>;
+    private doLoadModel;
+    /**
+     * Analyzes text for toxicity using the ML model.
+     *
+     * @param text - Text to analyze
+     * @returns Analysis result with predictions and scores
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     * const result = await detector.analyze('you are stupid');
+     *
+     * console.log(result.isToxic);           // true
+     * console.log(result.overallScore);      // 0.92
+     * console.log(result.matchedCategories); // ['insult', 'toxicity']
+     * ```
+     */
+    analyze(text: string): Promise<MLAnalysisResult>;
+    /**
+     * Analyzes multiple texts in a batch for better performance.
+     *
+     * @param texts - Array of texts to analyze
+     * @returns Array of analysis results
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     * const results = await detector.analyzeBatch([
+     *   'hello friend',
+     *   'you are terrible',
+     *   'great work!',
+     * ]);
+     *
+     * results.forEach((result, i) => {
+     *   console.log(`Text ${i}: ${result.isToxic ? 'toxic' : 'clean'}`);
+     * });
+     * ```
+     */
+    analyzeBatch(texts: string[]): Promise<MLAnalysisResult[]>;
+    /**
+     * Simple boolean check for toxicity.
+     *
+     * @param text - Text to check
+     * @returns True if text is detected as toxic
+     *
+     * @example
+     * ```typescript
+     * const detector = new ToxicityDetector();
+     *
+     * if (await detector.isToxic('some user input')) {
+     *   console.log('Content flagged as toxic');
+     * }
+     * ```
+     */
+    isToxic(text: string): Promise<boolean>;
+    /**
+     * Gets the toxicity score for text (0-1).
+     *
+     * @param text - Text to score
+     * @returns Toxicity score from 0 (clean) to 1 (highly toxic)
+     */
+    getScore(text: string): Promise<number>;
+    /**
+     * Disposes of the model to free memory.
+     * The model will be reloaded on next analyze() call.
+     */
+    dispose(): void;
+    /**
+     * Gets the current configuration.
+     */
+    getConfig(): Required<MLDetectorConfig>;
+    /**
+     * Checks if the model is currently loaded.
+     */
+    isModelLoaded(): boolean;
+}
+/**
+ * Hybrid filter combining rule-based and ML-based detection.
+ *
+ * This class provides the best of both worlds:
+ * - Fast rule-based detection for common profanity
+ * - ML-based detection for contextual toxicity
+ *
+ * @example
+ * ```typescript
+ * import { HybridFilter } from 'glin-profanity/ml';
+ *
+ * const filter = new HybridFilter({
+ *   // Rule-based config
+ *   languages: ['english'],
+ *   detectLeetspeak: true,
+ *   // ML config
+ *   enableML: true,
+ *   mlThreshold: 0.85,
+ * });
+ *
+ * await filter.initialize();
+ *
+ * const result = await filter.checkProfanityAsync('some text');
+ * console.log(result.isToxic);
+ * ```
+ */
+/**
+ * Hybrid filter configuration.
+ */
+interface HybridFilterConfig extends FilterConfig {
+    /**
+     * Enable ML-based detection.
+     * Requires @tensorflow/tfjs and @tensorflow-models/toxicity.
+     * @default false
+     */
+    enableML?: boolean;
+    /**
+     * ML confidence threshold.
+     * @default 0.85
+     */
+    mlThreshold?: number;
+    /**
+     * Specific ML toxicity categories to check.
+     */
+    mlLabels?: ToxicityLabel[];
+    /**
+     * Preload ML model on initialization.
+     * @default false
+     */
+    preloadML?: boolean;
+    /**
+     * How to combine rule-based and ML results.
+     * - 'or': Flag if either method detects toxicity (more sensitive)
+     * - 'and': Flag only if both methods detect toxicity (more precise)
+     * - 'ml-override': Use ML result if available, fallback to rules
+     * - 'rules-first': Use rules for speed, ML for borderline cases
+     * @default 'or'
+     */
+    combinationMode?: 'or' | 'and' | 'ml-override' | 'rules-first';
+    /**
+     * Score threshold for "borderline" cases in rules-first mode.
+     * If rule-based detection is uncertain (near this threshold),
+     * ML will be used for confirmation.
+     * @default 0.5
+     */
+    borderlineThreshold?: number;
+}
+/**
+ * Hybrid profanity filter combining rule-based and ML detection.
+ */
+declare class HybridFilter {
+    private ruleFilter;
+    private mlDetector;
+    private config;
+    private mlInitialized;
+    /**
+     * Creates a new HybridFilter instance.
+     *
+     * @param config - Configuration options
+     */
+    constructor(config?: HybridFilterConfig);
+    /**
+     * Initializes the hybrid filter, loading the ML model if enabled.
+     * Call this before using async methods for best performance.
+     *
+     * @example
+     * ```typescript
+     * const filter = new HybridFilter({ enableML: true });
+     * await filter.initialize();
+     * // Now ready for fast async checks
+     * ```
+     */
+    initialize(): Promise<void>;
+    /**
+     * Checks if ML is available and initialized.
+     */
+    isMLReady(): boolean;
+    /**
+     * Synchronous profanity check using only rule-based detection.
+     * Use this for fast, synchronous checks when ML isn't needed.
+     *
+     * @param text - Text to check
+     * @returns True if profanity detected
+     */
+    isProfane(text: string): boolean;
+    /**
+     * Synchronous detailed check using only rule-based detection.
+     *
+     * @param text - Text to check
+     * @returns Detailed profanity check result
+     */
+    checkProfanity(text: string): CheckProfanityResult;
+    /**
+     * Async profanity check using both rule-based and ML detection.
+     *
+     * @param text - Text to check
+     * @returns Combined analysis result
+     *
+     * @example
+     * ```typescript
+     * const filter = new HybridFilter({
+     *   enableML: true,
+     *   combinationMode: 'or',
+     * });
+     * await filter.initialize();
+     *
+     * const result = await filter.checkProfanityAsync('some text');
+     * if (result.isToxic) {
+     *   console.log('Reason:', result.reason);
+     *   console.log('Confidence:', result.confidence);
+     * }
+     * ```
+     */
+    checkProfanityAsync(text: string): Promise<HybridAnalysisResult>;
+    /**
+     * Simple async boolean check for toxicity.
+     *
+     * @param text - Text to check
+     * @returns True if toxic
+     */
+    isToxicAsync(text: string): Promise<boolean>;
+    /**
+     * Analyzes text with ML only (if available).
+     *
+     * @param text - Text to analyze
+     * @returns ML analysis result or null if ML not available
+     */
+    analyzeWithML(text: string): Promise<MLAnalysisResult | null>;
+    /**
+     * Batch analysis for multiple texts.
+     *
+     * @param texts - Array of texts to analyze
+     * @returns Array of hybrid analysis results
+     */
+    checkProfanityBatchAsync(texts: string[]): Promise<HybridAnalysisResult[]>;
+    private combineResults;
+    /**
+     * Gets the underlying rule-based filter.
+     */
+    getRuleFilter(): Filter;
+    /**
+     * Gets the underlying ML detector (if enabled).
+     */
+    getMLDetector(): ToxicityDetector | null;
+    /**
+     * Disposes of resources (ML model).
+     */
+    dispose(): void;
+}
+export { HybridAnalysisResult, HybridFilter, type HybridFilterConfig, MLAnalysisResult, MLDetectorConfig, ToxicityDetector, ToxicityLabel };