npm - @framers/agentos-ext-ml-classifiers - Versions diffs - 0.1.0 → 0.3.1 - Mend

@framers/agentos-ext-ml-classifiers 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/.github/workflows/ci.yml +20 -0
package/.github/workflows/release.yml +37 -0
package/.releaserc.json +9 -0
package/LICENSE +96 -21
package/README.md +72 -0
package/dist/MLClassifierGuardrail.d.ts +88 -117
package/dist/MLClassifierGuardrail.d.ts.map +1 -1
package/dist/MLClassifierGuardrail.js +263 -264
package/dist/MLClassifierGuardrail.js.map +1 -1
package/dist/index.d.ts +16 -90
package/dist/index.d.ts.map +1 -1
package/dist/index.js +36 -309
package/dist/index.js.map +1 -1
package/dist/keyword-classifier.d.ts +26 -0
package/dist/keyword-classifier.d.ts.map +1 -0
package/dist/keyword-classifier.js +113 -0
package/dist/keyword-classifier.js.map +1 -0
package/dist/llm-classifier.d.ts +27 -0
package/dist/llm-classifier.d.ts.map +1 -0
package/dist/llm-classifier.js +129 -0
package/dist/llm-classifier.js.map +1 -0
package/dist/tools/ClassifyContentTool.d.ts +53 -80
package/dist/tools/ClassifyContentTool.d.ts.map +1 -1
package/dist/tools/ClassifyContentTool.js +52 -103
package/dist/tools/ClassifyContentTool.js.map +1 -1
package/dist/types.d.ts +77 -277
package/dist/types.d.ts.map +1 -1
package/dist/types.js +9 -55
package/dist/types.js.map +1 -1
package/package.json +10 -24
package/scripts/fix-esm-imports.mjs +181 -0
package/src/MLClassifierGuardrail.ts +306 -310
package/src/index.ts +35 -339
package/src/keyword-classifier.ts +130 -0
package/src/llm-classifier.ts +163 -0
package/src/tools/ClassifyContentTool.ts +75 -132
package/src/types.ts +78 -325
package/test/llm-tier.spec.ts +267 -0
package/test/ml-classifiers.spec.ts +57 -0
package/test/onnx-tier.spec.ts +255 -0
package/test/tier-fallthrough.spec.ts +185 -0
package/tsconfig.json +20 -0
package/vitest.config.ts +35 -0
package/dist/ClassifierOrchestrator.d.ts +0 -126
package/dist/ClassifierOrchestrator.d.ts.map +0 -1
package/dist/ClassifierOrchestrator.js +0 -239
package/dist/ClassifierOrchestrator.js.map +0 -1
package/dist/IContentClassifier.d.ts +0 -117
package/dist/IContentClassifier.d.ts.map +0 -1
package/dist/IContentClassifier.js +0 -22
package/dist/IContentClassifier.js.map +0 -1
package/dist/SlidingWindowBuffer.d.ts +0 -213
package/dist/SlidingWindowBuffer.d.ts.map +0 -1
package/dist/SlidingWindowBuffer.js +0 -246
package/dist/SlidingWindowBuffer.js.map +0 -1
package/dist/classifiers/InjectionClassifier.d.ts +0 -126
package/dist/classifiers/InjectionClassifier.d.ts.map +0 -1
package/dist/classifiers/InjectionClassifier.js +0 -210
package/dist/classifiers/InjectionClassifier.js.map +0 -1
package/dist/classifiers/JailbreakClassifier.d.ts +0 -124
package/dist/classifiers/JailbreakClassifier.d.ts.map +0 -1
package/dist/classifiers/JailbreakClassifier.js +0 -208
package/dist/classifiers/JailbreakClassifier.js.map +0 -1
package/dist/classifiers/ToxicityClassifier.d.ts +0 -125
package/dist/classifiers/ToxicityClassifier.d.ts.map +0 -1
package/dist/classifiers/ToxicityClassifier.js +0 -212
package/dist/classifiers/ToxicityClassifier.js.map +0 -1
package/dist/classifiers/WorkerClassifierProxy.d.ts +0 -158
package/dist/classifiers/WorkerClassifierProxy.d.ts.map +0 -1
package/dist/classifiers/WorkerClassifierProxy.js +0 -268
package/dist/classifiers/WorkerClassifierProxy.js.map +0 -1
package/dist/worker/classifier-worker.d.ts +0 -49
package/dist/worker/classifier-worker.d.ts.map +0 -1
package/dist/worker/classifier-worker.js +0 -180
package/dist/worker/classifier-worker.js.map +0 -1
package/src/ClassifierOrchestrator.ts +0 -290
package/src/IContentClassifier.ts +0 -124
package/src/SlidingWindowBuffer.ts +0 -384
package/src/classifiers/InjectionClassifier.ts +0 -261
package/src/classifiers/JailbreakClassifier.ts +0 -259
package/src/classifiers/ToxicityClassifier.ts +0 -263
package/src/classifiers/WorkerClassifierProxy.ts +0 -366
package/src/worker/classifier-worker.ts +0 -267

package/dist/classifiers/JailbreakClassifier.js DELETED Viewed

@@ -1,208 +0,0 @@
-/**
- * @fileoverview Jailbreak content classifier using Meta's `PromptGuard-86M`
- * model.
- *
- * Jailbreak attempts are adversarial prompts specifically crafted to bypass
- * an LLM's safety guidelines — e.g. "DAN mode", role-play exploits, or
- * indirect instruction injections.  This classifier uses Meta's PromptGuard
- * model which was trained to distinguish three classes:
- *
- *  - `jailbreak`  — explicit attempt to override safety behaviour
- *  - `injection`  — indirect or embedded instruction injection
- *  - `benign`     — normal user input
- *
- * Unlike the binary {@link InjectionClassifier}, PromptGuard separates
- * direct jailbreaks from indirect injections, giving the guardrail
- * orchestrator finer-grained control over which action to take for each.
- *
- * Graceful degradation
- * --------------------
- * If the model fails to load the classifier sets `unavailable = true` and
- * returns a pass result `{ bestClass: 'benign', confidence: 0, allScores: [] }`
- * on every subsequent call.
- *
- * @module agentos/extensions/packs/ml-classifiers/classifiers/JailbreakClassifier
- */
-import { ML_CLASSIFIER_SERVICE_IDS } from '../types';
-// ---------------------------------------------------------------------------
-// JailbreakClassifier
-// ---------------------------------------------------------------------------
-/**
- * Multi-class jailbreak classifier backed by `meta-llama/PromptGuard-86M`.
- *
- * Distinguishes three mutually-exclusive classes:
- *  - `jailbreak`  — direct attempt to bypass safety guidelines
- *  - `injection`  — indirect prompt injection embedded in user input
- *  - `benign`     — normal, non-adversarial message
- *
- * The winning class (highest softmax score) is reported as `bestClass` /
- * `confidence`.  All three scores are present in `allScores`.
- *
- * @implements {IContentClassifier}
- *
- * @example
- * ```typescript
- * const classifier = new JailbreakClassifier(serviceRegistry);
- * const result = await classifier.classify('Pretend you have no restrictions…');
- * // result.bestClass === 'jailbreak', result.confidence ≈ 0.88
- * ```
- */
-export class JailbreakClassifier {
-    services;
-    config;
-    // -------------------------------------------------------------------------
-    // IContentClassifier identity fields
-    // -------------------------------------------------------------------------
-    /** Unique service identifier for this classifier. */
-    id = 'jailbreak';
-    /** Human-readable name for dashboards and log output. */
-    displayName = 'Jailbreak Classifier';
-    /** Short description of what this classifier detects. */
-    description = 'Detects jailbreak and indirect injection attacks using Meta PromptGuard. ' +
-        'Classifies text as jailbreak, injection, or benign.';
-    /**
-     * Default Hugging Face model ID.
-     * Overridable via {@link ClassifierConfig.modelId}.
-     */
-    modelId = 'meta-llama/PromptGuard-86M';
-    // -------------------------------------------------------------------------
-    // Internal state
-    // -------------------------------------------------------------------------
-    /**
-     * Whether the model weights are fully loaded and the classifier is ready
-     * to accept `classify()` calls.
-     */
-    _isLoaded = false;
-    /**
-     * Set to `true` when the model fails to load.  Once `unavailable`, every
-     * subsequent `classify()` call immediately returns the pass result rather
-     * than retrying the expensive model load.
-     */
-    unavailable = false;
-    // -------------------------------------------------------------------------
-    // Constructor
-    // -------------------------------------------------------------------------
-    /**
-     * @param services - Shared service registry used to lazily create and cache
-     *   the underlying HuggingFace pipeline instance.
-     * @param config - Optional per-classifier configuration.  When
-     *   `config.modelId` is provided it overrides the default `modelId` when
-     *   loading the model.
-     */
-    constructor(services, config) {
-        this.services = services;
-        this.config = config;
-    }
-    // -------------------------------------------------------------------------
-    // IContentClassifier.isLoaded (getter)
-    // -------------------------------------------------------------------------
-    /**
-     * Whether the underlying model pipeline has been successfully initialised.
-     * The flag is set to `true` after the first successful `classify()` call.
-     */
-    get isLoaded() {
-        return this._isLoaded;
-    }
-    // -------------------------------------------------------------------------
-    // classify
-    // -------------------------------------------------------------------------
-    /**
-     * Run jailbreak inference on `text`.
-     *
-     * Lazily loads the pipeline on the first call via the shared service
-     * registry, then calls it with `{ topk: null }` to retrieve scores for all
-     * three classes.
-     *
-     * @param text - The text to evaluate.
-     * @returns A promise that resolves with the classification result.  If the
-     *   model is unavailable the pass result is returned instead of throwing.
-     */
-    async classify(text) {
-        // Return the pass result immediately if the model previously failed to load.
-        if (this.unavailable) {
-            return this.passResult();
-        }
-        // Lazily obtain (or create) the HuggingFace pipeline from the shared
-        // registry — the model is only downloaded and initialised once.
-        let pipeline;
-        try {
-            pipeline = await this.services.getOrCreate(ML_CLASSIFIER_SERVICE_IDS.JAILBREAK_PIPELINE, async () => {
-                // Dynamic import so the ONNX runtime is excluded from the initial
-                // bundle and environments without the package are unaffected.
-                const { pipeline: createPipeline } = await import('@huggingface/transformers');
-                return createPipeline('text-classification',
-                // Honour a caller-supplied model override; fall back to the default.
-                this.config?.modelId ?? this.modelId, { quantized: true });
-            }, {
-                /** Release ONNX/WASM resources when the registry entry is evicted. */
-                dispose: async (p) => p?.dispose?.(),
-                /** Tags used for diagnostics and capability discovery. */
-                tags: ['ml', 'classifier', 'jailbreak', 'onnx'],
-            });
-            // Mark the classifier as ready now that the pipeline is available.
-            this._isLoaded = true;
-        }
-        catch {
-            // Model failed to load — mark as unavailable and return the pass result.
-            this.unavailable = true;
-            return this.passResult();
-        }
-        // Run inference and request scores for all three classes.
-        const raw = await pipeline(text, { topk: null });
-        return this.mapResult(raw);
-    }
-    // -------------------------------------------------------------------------
-    // dispose (optional IContentClassifier lifecycle hook)
-    // -------------------------------------------------------------------------
-    /**
-     * Release the pipeline instance from the shared service registry.
-     *
-     * Idempotent — safe to call multiple times.
-     */
-    async dispose() {
-        await this.services.release(ML_CLASSIFIER_SERVICE_IDS.JAILBREAK_PIPELINE);
-        this._isLoaded = false;
-    }
-    // -------------------------------------------------------------------------
-    // Private helpers
-    // -------------------------------------------------------------------------
-    /**
-     * Returns a "pass" result used when the model is unavailable.
-     *
-     * A pass result reports `bestClass: 'benign'` with zero confidence so the
-     * guardrail orchestrator will always choose {@link GuardrailAction.ALLOW}.
-     */
-    passResult() {
-        return { bestClass: 'benign', confidence: 0, allScores: [] };
-    }
-    /**
-     * Map the raw pipeline output to a {@link ClassificationResult}.
-     *
-     * For multi-class classification the label with the highest softmax score
-     * becomes `bestClass` / `confidence`.  All three labels are included in
-     * `allScores`.
-     *
-     * @param raw - Array returned by the pipeline when called with `topk: null`.
-     */
-    mapResult(raw) {
-        if (!raw || raw.length === 0) {
-            return this.passResult();
-        }
-        // Find the class with the highest probability (winner-takes-all).
-        let best = raw[0];
-        for (const item of raw) {
-            if (item.score > best.score) {
-                best = item;
-            }
-        }
-        return {
-            bestClass: best.label,
-            confidence: best.score,
-            allScores: raw.map((item) => ({
-                classLabel: item.label,
-                score: item.score,
-            })),
-        };
-    }
-}
-//# sourceMappingURL=JailbreakClassifier.js.map

package/dist/classifiers/JailbreakClassifier.js.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"JailbreakClassifier.js","sourceRoot":"","sources":["../../src/classifiers/JailbreakClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAMH,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAiBrD,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,OAAO,mBAAmB;IAmDX;IACA;IAnDnB,4EAA4E;IAC5E,qCAAqC;IACrC,4EAA4E;IAE5E,qDAAqD;IAC5C,EAAE,GAAG,WAAW,CAAC;IAE1B,yDAAyD;IAChD,WAAW,GAAG,sBAAsB,CAAC;IAE9C,yDAAyD;IAChD,WAAW,GAClB,2EAA2E;QAC3E,qDAAqD,CAAC;IAExD;;;OAGG;IACM,OAAO,GAAG,4BAA4B,CAAC;IAEhD,4EAA4E;IAC5E,iBAAiB;IACjB,4EAA4E;IAE5E;;;OAGG;IACK,SAAS,GAAG,KAAK,CAAC;IAE1B;;;;OAIG;IACK,WAAW,GAAG,KAAK,CAAC;IAE5B,4EAA4E;IAC5E,cAAc;IACd,4EAA4E;IAE5E;;;;;;OAMG;IACH,YACmB,QAAgC,EAChC,MAAyB;QADzB,aAAQ,GAAR,QAAQ,CAAwB;QAChC,WAAM,GAAN,MAAM,CAAmB;IACzC,CAAC;IAEJ,4EAA4E;IAC5E,uCAAuC;IACvC,4EAA4E;IAE5E;;;OAGG;IACH,IAAI,QAAQ;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,4EAA4E;IAC5E,WAAW;IACX,4EAA4E;IAE5E;;;;;;;;;;OAUG;IACH,KAAK,CAAC,QAAQ,CAAC,IAAY;QACzB,6EAA6E;QAC7E,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC;QAC3B,CAAC;QAED,qEAAqE;QACrE,gEAAgE;QAChE,IAAI,QAAqE,CAAC;QAC1E,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,CACxC,yBAAyB,CAAC,kBAAkB,EAC5C,KAAK,IAAI,EAAE;gBACT,kEAAkE;gBAClE,8DAA8D;gBAC9D,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAC/C,2BAA2B,CAC5B,CAAC;gBACF,OAAO,cAAc,CACnB,qBAAqB;gBACrB,qEAAqE;gBACrE,IAAI,CAAC,MAAM,EAAE,OAAO,IAAI,IAAI,CAAC,OAAO,EACpC,EAAE,SAAS,EAAE,IAAI,EAAE,CACpB,CAAC;YACJ,CAAC,EACD;gBACE,sEAAsE;gBACtE,OAAO,EAAE,KAAK,EAAE,CAAM,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;gBACzC,0DAA0D;gBAC1D,IAAI,EAAE,CAAC,IAAI,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,CAAC;aAChD,CACF,CAAC;YAEF,mEAAmE;YACnE,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,yEAAyE;YACzE,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;YACxB,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC;QAC3B,CAAC;QAED,0DAA0D;QAC1D,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,4EAA4E;IAC5E,uDAAuD;IACvD,4EAA4E;IAE5E;;;;OAIG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,yBAAyB,CAAC,kBAAkB,CAAC,CAAC;QAC1E,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E;;;;;OAKG;IACK,UAAU;QAChB,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;IAC/D,CAAC;IAED;;;;;;;;OAQG;IACK,SAAS,CAAC,GAAe;QAC/B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC;QAC3B,CAAC;QAED,kEAAkE;QAClE,IAAI,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAClB,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;YACvB,IAAI,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC5B,IAAI,GAAG,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,KAAK;YACrB,UAAU,EAAE,IAAI,CAAC,KAAK;YACtB,SAAS,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBAC5B,UAAU,EAAE,IAAI,CAAC,KAAK;gBACtB,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;CACF"}

package/dist/classifiers/ToxicityClassifier.d.ts DELETED Viewed

@@ -1,125 +0,0 @@
-/**
- * @fileoverview Toxicity content classifier using the `unitary/toxic-bert` model.
- *
- * This classifier uses a multi-label BERT-based model trained on the Jigsaw
- * Toxic Comment dataset.  It assigns independent confidence scores to six
- * toxicity categories and surfaces the highest-scoring label as `bestClass`.
- *
- * The model is loaded lazily the first time `classify()` is called and
- * cached in the shared service registry so it is only initialised once even
- * if multiple parts of the system hold a reference to this classifier.
- *
- * Graceful degradation
- * --------------------
- * If the model fails to load (e.g. network unavailable, ONNX runtime missing)
- * the classifier sets `unavailable = true` and returns a **pass result**
- * `{ bestClass: 'benign', confidence: 0, allScores: [] }` on every subsequent
- * call instead of throwing.  This ensures the guardrail pipeline degrades
- * gracefully rather than crashing the agent.
- *
- * @module agentos/extensions/packs/ml-classifiers/classifiers/ToxicityClassifier
- */
-import type { ClassificationResult } from '@framers/agentos';
-import type { ISharedServiceRegistry } from '@framers/agentos';
-import type { IContentClassifier } from '../IContentClassifier';
-import type { ClassifierConfig } from '../types';
-/**
- * Multi-label toxicity classifier backed by `unitary/toxic-bert`.
- *
- * Evaluates text against six toxicity categories:
- *  - `toxic`
- *  - `severe_toxic`
- *  - `obscene`
- *  - `threat`
- *  - `insult`
- *  - `identity_hate`
- *
- * Each category receives an independent confidence score.  The label with
- * the highest score is reported as `bestClass` and its score as `confidence`.
- * All six scores are included in `allScores` so the pack orchestrator can
- * apply per-label thresholds.
- *
- * @implements {IContentClassifier}
- *
- * @example
- * ```typescript
- * const classifier = new ToxicityClassifier(serviceRegistry);
- * const result = await classifier.classify('You are terrible!');
- * // result.bestClass === 'insult', result.confidence ≈ 0.87
- * ```
- */
-export declare class ToxicityClassifier implements IContentClassifier {
-    private readonly services;
-    private readonly config?;
-    /** Unique service identifier for this classifier. */
-    readonly id = "toxicity";
-    /** Human-readable name for dashboards and log output. */
-    readonly displayName = "Toxicity Classifier";
-    /** Short description of what this classifier detects. */
-    readonly description: string;
-    /**
-     * Default Hugging Face model ID.
-     * Overridable via {@link ClassifierConfig.modelId}.
-     */
-    readonly modelId = "unitary/toxic-bert";
-    /**
-     * Whether the model weights are fully loaded and the classifier is ready
-     * to accept `classify()` calls.
-     */
-    private _isLoaded;
-    /**
-     * Set to `true` when the model fails to load.  Once `unavailable`, every
-     * subsequent `classify()` call immediately returns the pass result rather
-     * than retrying the expensive model load.
-     */
-    private unavailable;
-    /**
-     * @param services - Shared service registry used to lazily create and cache
-     *   the underlying HuggingFace pipeline instance.
-     * @param config - Optional per-classifier configuration.  When
-     *   `config.modelId` is provided it overrides the default `modelId` when
-     *   loading the model.
-     */
-    constructor(services: ISharedServiceRegistry, config?: ClassifierConfig | undefined);
-    /**
-     * Whether the underlying model pipeline has been successfully initialised.
-     * The flag is set to `true` after the first successful `classify()` call.
-     */
-    get isLoaded(): boolean;
-    /**
-     * Run toxicity inference on `text`.
-     *
-     * Lazily loads the pipeline on the first call via the shared service
-     * registry, then calls it with `{ topk: null }` to retrieve scores for
-     * every label.
-     *
-     * @param text - The text to evaluate.
-     * @returns A promise that resolves with the classification result.  If the
-     *   model is unavailable the pass result is returned instead of throwing.
-     */
-    classify(text: string): Promise<ClassificationResult>;
-    /**
-     * Release the pipeline instance from the shared service registry.
-     *
-     * Idempotent — safe to call multiple times.
-     */
-    dispose(): Promise<void>;
-    /**
-     * Returns a "pass" result used when the model is unavailable.
-     *
-     * A pass result reports `bestClass: 'benign'` with zero confidence so the
-     * guardrail orchestrator will always choose {@link GuardrailAction.ALLOW}.
-     */
-    private passResult;
-    /**
-     * Map the raw pipeline output (array of `{ label, score }` objects) to a
-     * {@link ClassificationResult}.
-     *
-     * The label with the highest score becomes `bestClass` / `confidence`.
-     * Every label is included in `allScores` for downstream threshold logic.
-     *
-     * @param raw - Array returned by the pipeline when called with `topk: null`.
-     */
-    private mapResult;
-}
-//# sourceMappingURL=ToxicityClassifier.d.ts.map

package/dist/classifiers/ToxicityClassifier.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"ToxicityClassifier.d.ts","sourceRoot":"","sources":["../../src/classifiers/ToxicityClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC/D,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAsBjD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,qBAAa,kBAAmB,YAAW,kBAAkB;IAmDzD,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;IA/C1B,qDAAqD;IACrD,QAAQ,CAAC,EAAE,cAAc;IAEzB,yDAAyD;IACzD,QAAQ,CAAC,WAAW,yBAAyB;IAE7C,yDAAyD;IACzD,QAAQ,CAAC,WAAW,SAEiD;IAErE;;;OAGG;IACH,QAAQ,CAAC,OAAO,wBAAwB;IAMxC;;;OAGG;IACH,OAAO,CAAC,SAAS,CAAS;IAE1B;;;;OAIG;IACH,OAAO,CAAC,WAAW,CAAS;IAM5B;;;;;;OAMG;gBAEgB,QAAQ,EAAE,sBAAsB,EAChC,MAAM,CAAC,EAAE,gBAAgB,YAAA;IAO5C;;;OAGG;IACH,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAMD;;;;;;;;;;OAUG;IACG,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAoD3D;;;;OAIG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAS9B;;;;;OAKG;IACH,OAAO,CAAC,UAAU;IAIlB;;;;;;;;OAQG;IACH,OAAO,CAAC,SAAS;CAuBlB"}

package/dist/classifiers/ToxicityClassifier.js DELETED Viewed

@@ -1,212 +0,0 @@
-/**
- * @fileoverview Toxicity content classifier using the `unitary/toxic-bert` model.
- *
- * This classifier uses a multi-label BERT-based model trained on the Jigsaw
- * Toxic Comment dataset.  It assigns independent confidence scores to six
- * toxicity categories and surfaces the highest-scoring label as `bestClass`.
- *
- * The model is loaded lazily the first time `classify()` is called and
- * cached in the shared service registry so it is only initialised once even
- * if multiple parts of the system hold a reference to this classifier.
- *
- * Graceful degradation
- * --------------------
- * If the model fails to load (e.g. network unavailable, ONNX runtime missing)
- * the classifier sets `unavailable = true` and returns a **pass result**
- * `{ bestClass: 'benign', confidence: 0, allScores: [] }` on every subsequent
- * call instead of throwing.  This ensures the guardrail pipeline degrades
- * gracefully rather than crashing the agent.
- *
- * @module agentos/extensions/packs/ml-classifiers/classifiers/ToxicityClassifier
- */
-import { ML_CLASSIFIER_SERVICE_IDS } from '../types';
-// ---------------------------------------------------------------------------
-// ToxicityClassifier
-// ---------------------------------------------------------------------------
-/**
- * Multi-label toxicity classifier backed by `unitary/toxic-bert`.
- *
- * Evaluates text against six toxicity categories:
- *  - `toxic`
- *  - `severe_toxic`
- *  - `obscene`
- *  - `threat`
- *  - `insult`
- *  - `identity_hate`
- *
- * Each category receives an independent confidence score.  The label with
- * the highest score is reported as `bestClass` and its score as `confidence`.
- * All six scores are included in `allScores` so the pack orchestrator can
- * apply per-label thresholds.
- *
- * @implements {IContentClassifier}
- *
- * @example
- * ```typescript
- * const classifier = new ToxicityClassifier(serviceRegistry);
- * const result = await classifier.classify('You are terrible!');
- * // result.bestClass === 'insult', result.confidence ≈ 0.87
- * ```
- */
-export class ToxicityClassifier {
-    services;
-    config;
-    // -------------------------------------------------------------------------
-    // IContentClassifier identity fields
-    // -------------------------------------------------------------------------
-    /** Unique service identifier for this classifier. */
-    id = 'toxicity';
-    /** Human-readable name for dashboards and log output. */
-    displayName = 'Toxicity Classifier';
-    /** Short description of what this classifier detects. */
-    description = 'Detects toxic, hateful, or abusive language across six categories: ' +
-        'toxic, severe_toxic, obscene, threat, insult, and identity_hate.';
-    /**
-     * Default Hugging Face model ID.
-     * Overridable via {@link ClassifierConfig.modelId}.
-     */
-    modelId = 'unitary/toxic-bert';
-    // -------------------------------------------------------------------------
-    // Internal state
-    // -------------------------------------------------------------------------
-    /**
-     * Whether the model weights are fully loaded and the classifier is ready
-     * to accept `classify()` calls.
-     */
-    _isLoaded = false;
-    /**
-     * Set to `true` when the model fails to load.  Once `unavailable`, every
-     * subsequent `classify()` call immediately returns the pass result rather
-     * than retrying the expensive model load.
-     */
-    unavailable = false;
-    // -------------------------------------------------------------------------
-    // Constructor
-    // -------------------------------------------------------------------------
-    /**
-     * @param services - Shared service registry used to lazily create and cache
-     *   the underlying HuggingFace pipeline instance.
-     * @param config - Optional per-classifier configuration.  When
-     *   `config.modelId` is provided it overrides the default `modelId` when
-     *   loading the model.
-     */
-    constructor(services, config) {
-        this.services = services;
-        this.config = config;
-    }
-    // -------------------------------------------------------------------------
-    // IContentClassifier.isLoaded (getter)
-    // -------------------------------------------------------------------------
-    /**
-     * Whether the underlying model pipeline has been successfully initialised.
-     * The flag is set to `true` after the first successful `classify()` call.
-     */
-    get isLoaded() {
-        return this._isLoaded;
-    }
-    // -------------------------------------------------------------------------
-    // classify
-    // -------------------------------------------------------------------------
-    /**
-     * Run toxicity inference on `text`.
-     *
-     * Lazily loads the pipeline on the first call via the shared service
-     * registry, then calls it with `{ topk: null }` to retrieve scores for
-     * every label.
-     *
-     * @param text - The text to evaluate.
-     * @returns A promise that resolves with the classification result.  If the
-     *   model is unavailable the pass result is returned instead of throwing.
-     */
-    async classify(text) {
-        // Return the pass result immediately if the model previously failed to load.
-        if (this.unavailable) {
-            return this.passResult();
-        }
-        // Lazily obtain (or create) the HuggingFace pipeline instance from the
-        // shared service registry.  The registry ensures the model is only loaded
-        // once even under concurrent calls.
-        let pipeline;
-        try {
-            pipeline = await this.services.getOrCreate(ML_CLASSIFIER_SERVICE_IDS.TOXICITY_PIPELINE, async () => {
-                // Dynamic import keeps the heavy ONNX runtime out of the initial
-                // bundle and allows environments without the package to skip loading.
-                const { pipeline: createPipeline } = await import('@huggingface/transformers');
-                return createPipeline('text-classification',
-                // Honour a caller-supplied model override; fall back to the default.
-                this.config?.modelId ?? this.modelId, { quantized: true });
-            }, {
-                /** Release ONNX/WASM resources when the registry entry is evicted. */
-                dispose: async (p) => p?.dispose?.(),
-                /** Tags used for diagnostics and capability discovery. */
-                tags: ['ml', 'classifier', 'toxicity', 'onnx'],
-            });
-            // Mark the classifier as ready now that the pipeline is available.
-            this._isLoaded = true;
-        }
-        catch {
-            // Model failed to load — mark as unavailable and return the pass result
-            // so the guardrail pipeline can continue operating.
-            this.unavailable = true;
-            return this.passResult();
-        }
-        // Run inference — request scores for ALL labels (topk: null).
-        const raw = await pipeline(text, { topk: null });
-        return this.mapResult(raw);
-    }
-    // -------------------------------------------------------------------------
-    // dispose (optional IContentClassifier lifecycle hook)
-    // -------------------------------------------------------------------------
-    /**
-     * Release the pipeline instance from the shared service registry.
-     *
-     * Idempotent — safe to call multiple times.
-     */
-    async dispose() {
-        await this.services.release(ML_CLASSIFIER_SERVICE_IDS.TOXICITY_PIPELINE);
-        this._isLoaded = false;
-    }
-    // -------------------------------------------------------------------------
-    // Private helpers
-    // -------------------------------------------------------------------------
-    /**
-     * Returns a "pass" result used when the model is unavailable.
-     *
-     * A pass result reports `bestClass: 'benign'` with zero confidence so the
-     * guardrail orchestrator will always choose {@link GuardrailAction.ALLOW}.
-     */
-    passResult() {
-        return { bestClass: 'benign', confidence: 0, allScores: [] };
-    }
-    /**
-     * Map the raw pipeline output (array of `{ label, score }` objects) to a
-     * {@link ClassificationResult}.
-     *
-     * The label with the highest score becomes `bestClass` / `confidence`.
-     * Every label is included in `allScores` for downstream threshold logic.
-     *
-     * @param raw - Array returned by the pipeline when called with `topk: null`.
-     */
-    mapResult(raw) {
-        if (!raw || raw.length === 0) {
-            // No output from the model — treat as benign.
-            return this.passResult();
-        }
-        // Find the label with the maximum confidence score.
-        let best = raw[0];
-        for (const item of raw) {
-            if (item.score > best.score) {
-                best = item;
-            }
-        }
-        return {
-            bestClass: best.label,
-            confidence: best.score,
-            allScores: raw.map((item) => ({
-                classLabel: item.label,
-                score: item.score,
-            })),
-        };
-    }
-}
-//# sourceMappingURL=ToxicityClassifier.js.map

package/dist/classifiers/ToxicityClassifier.js.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"ToxicityClassifier.js","sourceRoot":"","sources":["../../src/classifiers/ToxicityClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAMH,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAiBrD,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,OAAO,kBAAkB;IAmDV;IACA;IAnDnB,4EAA4E;IAC5E,qCAAqC;IACrC,4EAA4E;IAE5E,qDAAqD;IAC5C,EAAE,GAAG,UAAU,CAAC;IAEzB,yDAAyD;IAChD,WAAW,GAAG,qBAAqB,CAAC;IAE7C,yDAAyD;IAChD,WAAW,GAClB,qEAAqE;QACrE,kEAAkE,CAAC;IAErE;;;OAGG;IACM,OAAO,GAAG,oBAAoB,CAAC;IAExC,4EAA4E;IAC5E,iBAAiB;IACjB,4EAA4E;IAE5E;;;OAGG;IACK,SAAS,GAAG,KAAK,CAAC;IAE1B;;;;OAIG;IACK,WAAW,GAAG,KAAK,CAAC;IAE5B,4EAA4E;IAC5E,cAAc;IACd,4EAA4E;IAE5E;;;;;;OAMG;IACH,YACmB,QAAgC,EAChC,MAAyB;QADzB,aAAQ,GAAR,QAAQ,CAAwB;QAChC,WAAM,GAAN,MAAM,CAAmB;IACzC,CAAC;IAEJ,4EAA4E;IAC5E,uCAAuC;IACvC,4EAA4E;IAE5E;;;OAGG;IACH,IAAI,QAAQ;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,4EAA4E;IAC5E,WAAW;IACX,4EAA4E;IAE5E;;;;;;;;;;OAUG;IACH,KAAK,CAAC,QAAQ,CAAC,IAAY;QACzB,6EAA6E;QAC7E,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC;QAC3B,CAAC;QAED,uEAAuE;QACvE,0EAA0E;QAC1E,oCAAoC;QACpC,IAAI,QAAqE,CAAC;QAC1E,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,CACxC,yBAAyB,CAAC,iBAAiB,EAC3C,KAAK,IAAI,EAAE;gBACT,iEAAiE;gBACjE,sEAAsE;gBACtE,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAC/C,2BAA2B,CAC5B,CAAC;gBACF,OAAO,cAAc,CACnB,qBAAqB;gBACrB,qEAAqE;gBACrE,IAAI,CAAC,MAAM,EAAE,OAAO,IAAI,IAAI,CAAC,OAAO,EACpC,EAAE,SAAS,EAAE,IAAI,EAAE,CACpB,CAAC;YACJ,CAAC,EACD;gBACE,sEAAsE;gBACtE,OAAO,EAAE,KAAK,EAAE,CAAM,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;gBACzC,0DAA0D;gBAC1D,IAAI,EAAE,CAAC,IAAI,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,CAAC;aAC/C,CACF,CAAC;YAEF,mEAAmE;YACnE,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,wEAAwE;YACxE,oDAAoD;YACpD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;YACxB,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC;QAC3B,CAAC;QAED,8DAA8D;QAC9D,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,4EAA4E;IAC5E,uDAAuD;IACvD,4EAA4E;IAE5E;;;;OAIG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,yBAAyB,CAAC,iBAAiB,CAAC,CAAC;QACzE,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E;;;;;OAKG;IACK,UAAU;QAChB,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;IAC/D,CAAC;IAED;;;;;;;;OAQG;IACK,SAAS,CAAC,GAAe;QAC/B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,8CAA8C;YAC9C,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC;QAC3B,CAAC;QAED,oDAAoD;QACpD,IAAI,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QAClB,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;YACvB,IAAI,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC5B,IAAI,GAAG,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,KAAK;YACrB,UAAU,EAAE,IAAI,CAAC,KAAK;YACtB,SAAS,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBAC5B,UAAU,EAAE,IAAI,CAAC,KAAK;gBACtB,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;CACF"}