npm - @framers/agentos-ext-ml-classifiers - Versions diffs - 0.2.1 → 0.3.1 - Mend

@framers/agentos-ext-ml-classifiers 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

package/.github/workflows/ci.yml +20 -0
package/.github/workflows/release.yml +37 -0
package/.releaserc.json +9 -0
package/LICENSE +96 -21
package/README.md +72 -0
package/dist/MLClassifierGuardrail.d.ts.map +1 -1
package/dist/MLClassifierGuardrail.js +14 -6
package/dist/MLClassifierGuardrail.js.map +1 -1
package/dist/index.js +3 -3
package/dist/keyword-classifier.js +1 -1
package/dist/llm-classifier.js +1 -1
package/package.json +5 -13
package/scripts/fix-esm-imports.mjs +181 -0
package/src/MLClassifierGuardrail.ts +38 -5
package/test/llm-tier.spec.ts +267 -0
package/test/ml-classifiers.spec.ts +57 -0
package/test/onnx-tier.spec.ts +255 -0
package/test/tier-fallthrough.spec.ts +185 -0
package/vitest.config.ts +18 -7
package/CHANGELOG.md +0 -18
package/dist/ClassifierOrchestrator.d.ts +0 -126
package/dist/ClassifierOrchestrator.d.ts.map +0 -1
package/dist/ClassifierOrchestrator.js +0 -239
package/dist/ClassifierOrchestrator.js.map +0 -1
package/dist/IContentClassifier.d.ts +0 -117
package/dist/IContentClassifier.d.ts.map +0 -1
package/dist/IContentClassifier.js +0 -22
package/dist/IContentClassifier.js.map +0 -1
package/dist/SlidingWindowBuffer.d.ts +0 -213
package/dist/SlidingWindowBuffer.d.ts.map +0 -1
package/dist/SlidingWindowBuffer.js +0 -246
package/dist/SlidingWindowBuffer.js.map +0 -1
package/dist/classifiers/InjectionClassifier.d.ts +0 -126
package/dist/classifiers/InjectionClassifier.d.ts.map +0 -1
package/dist/classifiers/InjectionClassifier.js +0 -210
package/dist/classifiers/InjectionClassifier.js.map +0 -1
package/dist/classifiers/JailbreakClassifier.d.ts +0 -124
package/dist/classifiers/JailbreakClassifier.d.ts.map +0 -1
package/dist/classifiers/JailbreakClassifier.js +0 -208
package/dist/classifiers/JailbreakClassifier.js.map +0 -1
package/dist/classifiers/ToxicityClassifier.d.ts +0 -125
package/dist/classifiers/ToxicityClassifier.d.ts.map +0 -1
package/dist/classifiers/ToxicityClassifier.js +0 -212
package/dist/classifiers/ToxicityClassifier.js.map +0 -1
package/dist/classifiers/WorkerClassifierProxy.d.ts +0 -158
package/dist/classifiers/WorkerClassifierProxy.d.ts.map +0 -1
package/dist/classifiers/WorkerClassifierProxy.js +0 -268
package/dist/classifiers/WorkerClassifierProxy.js.map +0 -1
package/dist/worker/classifier-worker.d.ts +0 -49
package/dist/worker/classifier-worker.d.ts.map +0 -1
package/dist/worker/classifier-worker.js +0 -180
package/dist/worker/classifier-worker.js.map +0 -1
package/src/ClassifierOrchestrator.ts +0 -290
package/src/IContentClassifier.ts +0 -124
package/src/SlidingWindowBuffer.ts +0 -384
package/src/classifiers/InjectionClassifier.ts +0 -261
package/src/classifiers/JailbreakClassifier.ts +0 -259
package/src/classifiers/ToxicityClassifier.ts +0 -263
package/src/classifiers/WorkerClassifierProxy.ts +0 -366
package/src/worker/classifier-worker.ts +0 -267
package/test/ClassifierOrchestrator.spec.ts +0 -365
package/test/ClassifyContentTool.spec.ts +0 -226
package/test/InjectionClassifier.spec.ts +0 -263
package/test/JailbreakClassifier.spec.ts +0 -295
package/test/MLClassifierGuardrail.spec.ts +0 -486
package/test/SlidingWindowBuffer.spec.ts +0 -391
package/test/ToxicityClassifier.spec.ts +0 -268
package/test/WorkerClassifierProxy.spec.ts +0 -303
package/test/index.spec.ts +0 -431

package/dist/IContentClassifier.d.ts DELETED Viewed

@@ -1,117 +0,0 @@
-/**
- * @fileoverview Interface contract for ML-backed content classifiers.
- *
- * An `IContentClassifier` represents a single model pipeline that accepts
- * arbitrary text and returns a {@link ClassificationResult} containing the
- * winning label and confidence scores for all candidate classes.
- *
- * Built-in implementations (toxicity, injection, jailbreak) each implement
- * this interface.  Third-party classifiers may be registered via the
- * `customClassifiers` option of {@link MLClassifierPackOptions}.
- *
- * Lifecycle
- * ---------
- * 1. The pack initialises each classifier (model loading, warm-up).
- * 2. The guardrail pipeline calls `classify()` for every text chunk.
- * 3. On pack teardown, `dispose()` is called (if present) to release GPU/
- *    WASM memory.
- *
- * @module agentos/extensions/packs/ml-classifiers/IContentClassifier
- */
-import type { ClassificationResult } from '@framers/agentos';
-/**
- * Contract for a single ML content classifier.
- *
- * Implementations back one model pipeline and expose a narrow classify/dispose
- * API so the guardrail orchestrator can drive them uniformly regardless of the
- * underlying runtime (Node.js ONNX, browser WASM, remote inference endpoint).
- *
- * @example Minimal custom classifier
- * ```typescript
- * class SarcasmClassifier implements IContentClassifier {
- *   readonly id = 'custom:sarcasm-detector';
- *   readonly displayName = 'Sarcasm Detector';
- *   readonly description = 'Detects sarcastic or ironic statements.';
- *   readonly modelId = 'my-org/sarcasm-bert';
- *   isLoaded = false;
- *
- *   async classify(text: string): Promise<ClassificationResult> {
- *     // … run inference …
- *     return { bestClass: 'NOT_SARCASTIC', confidence: 0.8, allScores: [] };
- *   }
- *
- *   async dispose(): Promise<void> {
- *     // Free resources.
- *   }
- * }
- * ```
- */
-export interface IContentClassifier {
-    /**
-     * Unique service identifier for this classifier.
-     *
-     * Must follow the `agentos:<domain>:<name>` convention so it can be
-     * registered with the AgentOS shared service registry.
-     *
-     * @example `'agentos:ml-classifiers:toxicity-pipeline'`
-     */
-    readonly id: string;
-    /**
-     * Human-readable name displayed in logs and dashboards.
-     *
-     * @example `'Toxicity Pipeline'`
-     */
-    readonly displayName: string;
-    /**
-     * Short prose description of what this classifier detects.
-     *
-     * @example `'Detects toxic, hateful, or abusive language in text.'`
-     */
-    readonly description: string;
-    /**
-     * Identifier of the underlying model being used, typically a Hugging Face
-     * model ID or a local filesystem path.
-     *
-     * @example `'Xenova/toxic-bert'`
-     */
-    readonly modelId: string;
-    /**
-     * Whether the model weights have been fully loaded into memory and the
-     * classifier is ready to accept `classify()` calls.
-     *
-     * The pack initialiser sets this to `true` after the warm-up inference
-     * succeeds.  Callers can check this flag before calling `classify()` to
-     * avoid queueing calls during a slow model download.
-     */
-    isLoaded: boolean;
-    /**
-     * Classify the provided text and return confidence scores for all candidate
-     * labels.
-     *
-     * The classifier is responsible for mapping raw model output to the
-     * {@link ClassificationResult} shape.  It should NOT apply thresholds or
-     * guardrail actions — that is the responsibility of the pack orchestrator.
-     *
-     * @param text - The text to classify.  May be a short chunk from a streaming
-     *   response or a complete message.  Must not be empty.
-     * @returns A promise that resolves with the classification result, including
-     *   the winning label (`bestClass`), its `confidence`, and `allScores` for
-     *   every label the model evaluated.
-     * @throws {Error} If the model is not loaded (`isLoaded === false`) or if
-     *   inference fails for an unrecoverable reason.
-     */
-    classify(text: string): Promise<ClassificationResult>;
-    /**
-     * Release all resources held by this classifier (model weights, WASM
-     * module, GPU buffers, worker threads, etc.).
-     *
-     * Called by the pack orchestrator during AgentOS shutdown or when the pack
-     * is unloaded.  Implementations should be idempotent — calling `dispose()`
-     * multiple times must not throw.
-     *
-     * @optional Classifiers that hold no persistent resources may omit this
-     *   method.
-     */
-    dispose?(): Promise<void>;
-}
-//# sourceMappingURL=IContentClassifier.d.ts.map

package/dist/IContentClassifier.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"IContentClassifier.d.ts","sourceRoot":"","sources":["../src/IContentClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAE7D;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,WAAW,kBAAkB;IACjC;;;;;;;OAOG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IAEpB;;;;OAIG;IACH,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAE7B;;;;OAIG;IACH,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAE7B;;;;;OAKG;IACH,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IAEzB;;;;;;;OAOG;IACH,QAAQ,EAAE,OAAO,CAAC;IAElB;;;;;;;;;;;;;;;OAeG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAEtD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B"}

package/dist/IContentClassifier.js DELETED Viewed

@@ -1,22 +0,0 @@
-/**
- * @fileoverview Interface contract for ML-backed content classifiers.
- *
- * An `IContentClassifier` represents a single model pipeline that accepts
- * arbitrary text and returns a {@link ClassificationResult} containing the
- * winning label and confidence scores for all candidate classes.
- *
- * Built-in implementations (toxicity, injection, jailbreak) each implement
- * this interface.  Third-party classifiers may be registered via the
- * `customClassifiers` option of {@link MLClassifierPackOptions}.
- *
- * Lifecycle
- * ---------
- * 1. The pack initialises each classifier (model loading, warm-up).
- * 2. The guardrail pipeline calls `classify()` for every text chunk.
- * 3. On pack teardown, `dispose()` is called (if present) to release GPU/
- *    WASM memory.
- *
- * @module agentos/extensions/packs/ml-classifiers/IContentClassifier
- */
-export {};
-//# sourceMappingURL=IContentClassifier.js.map

package/dist/IContentClassifier.js.map DELETED Viewed

	@@ -1 +0,0 @@
1	- {"version":3,"file":"IContentClassifier.js","sourceRoot":"","sources":["../src/IContentClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG"}

package/dist/SlidingWindowBuffer.d.ts DELETED Viewed

@@ -1,213 +0,0 @@
-/**
- * @fileoverview Sliding-window text buffer for streaming ML classifier evaluation.
- *
- * When an LLM streams its response token-by-token, we cannot wait for the
- * complete response before running safety classifiers — that would be too late
- * to block or sanitise harmful content.  At the same time, classifiers are
- * expensive: running one on every individual token is wasteful and introduces
- * unacceptable latency.
- *
- * `SlidingWindowBuffer` solves this by accumulating tokens from one or more
- * concurrent streams and emitting a {@link ChunkReady} event only when enough
- * tokens have accumulated to fill a `chunkSize`-token window.  Each window
- * also includes a `contextSize`-token "ring" from the previous chunk, so the
- * classifier can reason about content that spans window boundaries.
- *
- * Architecture
- * ------------
- * - **Per-stream state**: Stored in a `Map<streamId, WindowState>`.  Each
- *   stream is fully independent and can be used across multiple concurrent
- *   responses.
- * - **Token estimation**: Uses the 4-chars-per-token heuristic for speed;
- *   callers that need exact counts should pre-tokenise text before pushing.
- * - **Evaluation budget**: Once a stream reaches `maxEvaluations` chunks,
- *   `push()` returns `null` for all subsequent pushes, preventing unbounded
- *   classifier invocations on very long responses.
- * - **Stale-stream pruning**: Streams that have not received data within
- *   `streamTimeoutMs` milliseconds are lazily evicted from the map to prevent
- *   memory leaks in long-running servers.
- *
- * @module agentos/extensions/packs/ml-classifiers/SlidingWindowBuffer
- */
-/**
- * Configuration for a {@link SlidingWindowBuffer} instance.
- *
- * All fields are optional; unset fields fall back to the defaults shown below.
- */
-export interface SlidingWindowConfig {
-    /**
-     * Target window size in *estimated* tokens.  When the accumulated buffer
-     * reaches or exceeds this many tokens, a {@link ChunkReady} is emitted and
-     * the buffer is slid forward.
-     *
-     * @default 200
-     */
-    chunkSize: number;
-    /**
-     * Number of tokens from the tail of the previous window to carry into the
-     * `text` field of the next {@link ChunkReady}.  This overlap prevents
-     * boundary effects where a phrase split across two windows is misclassified.
-     *
-     * @default 50
-     */
-    contextSize: number;
-    /**
-     * Maximum number of {@link ChunkReady} events to emit per stream.  After
-     * this budget is exhausted, `push()` returns `null` for the remainder of the
-     * stream.  Use `flush()` to retrieve any buffered text that has not been
-     * emitted yet.
-     *
-     * @default 100
-     */
-    maxEvaluations: number;
-    /**
-     * Milliseconds of inactivity after which a stream is considered stale and
-     * eligible for eviction by {@link SlidingWindowBuffer.pruneStale}.
-     *
-     * @default 30000
-     */
-    streamTimeoutMs: number;
-}
-/**
- * Emitted by {@link SlidingWindowBuffer.push} when sufficient tokens have
- * accumulated to fill one evaluation window.
- */
-export interface ChunkReady {
-    /**
-     * The full text to classify.  Equals `contextRing + newBuffer`, where
-     * `contextRing` is the carried-forward tail from the previous window.
-     * Always non-empty.
-     */
-    text: string;
-    /**
-     * Only the *new* text pushed since the last chunk was emitted (i.e. without
-     * the context prefix).  Useful for determining which part of the response
-     * was newly evaluated.
-     */
-    newText: string;
-    /**
-     * 1-indexed sequence number for this chunk within the stream.
-     * The first chunk emitted for a stream has `evaluationNumber === 1`.
-     */
-    evaluationNumber: number;
-}
-/**
- * A stateful, multi-stream text accumulator that emits fixed-size windows
- * for ML classifier evaluation with configurable context carry-forward.
- *
- * @example
- * ```typescript
- * const buf = new SlidingWindowBuffer({ chunkSize: 200, contextSize: 50 });
- *
- * // Simulate streaming tokens
- * for (const token of streamedTokens) {
- *   const chunk = buf.push('stream-1', token);
- *   if (chunk) {
- *     const result = await toxicityClassifier.classify(chunk.text);
- *     if (result.confidence > 0.9) terminateStream();
- *   }
- * }
- *
- * // Evaluate remaining tokens
- * const finalChunk = buf.flush('stream-1');
- * if (finalChunk) {
- *   await toxicityClassifier.classify(finalChunk.text);
- * }
- * ```
- */
-export declare class SlidingWindowBuffer {
-    /** Resolved configuration (defaults applied). */
-    private readonly config;
-    /**
-     * Per-stream state map.  Keyed by the `streamId` passed to `push()`.
-     * Entries are created lazily on first push and removed on flush or prune.
-     */
-    private readonly streams;
-    /**
-     * Construct a new buffer with the supplied configuration.
-     *
-     * @param config - Partial configuration; unset fields fall back to defaults:
-     *   `chunkSize=200`, `contextSize=50`, `maxEvaluations=100`,
-     *   `streamTimeoutMs=30000`.
-     */
-    constructor(config?: Partial<SlidingWindowConfig>);
-    /**
-     * Push new text into the buffer for the specified stream.
-     *
-     * Internally the text is appended to the stream's accumulation buffer.
-     * If the buffer's estimated token count reaches `chunkSize`, a
-     * {@link ChunkReady} is assembled and returned; the buffer is then reset
-     * (with the tail preserved as the context ring for the next window).
-     *
-     * Returns `null` when:
-     * - The buffer has not yet accumulated `chunkSize` tokens.
-     * - The stream has already emitted `maxEvaluations` chunks.
-     *
-     * When the map contains more than 10 streams, stale streams are pruned
-     * lazily after the push is processed.
-     *
-     * @param streamId - Opaque identifier for the stream (e.g. a request UUID).
-     * @param text     - The new text fragment to accumulate.
-     * @returns A {@link ChunkReady} when an evaluation window is complete, or
-     *   `null` if more data is needed (or the budget is exhausted).
-     */
-    push(streamId: string, text: string): ChunkReady | null;
-    /**
-     * Flush any remaining buffered text for the stream as a final chunk.
-     *
-     * Call this after the stream ends (e.g. when the LLM emits its final
-     * token) to ensure the classifier evaluates the tail of the response.
-     *
-     * The stream's state entry is removed from the map after flushing.
-     *
-     * @param streamId - Identifier of the stream to flush.
-     * @returns A {@link ChunkReady} for the remaining buffer, or `null` if the
-     *   buffer is empty or the stream does not exist.
-     */
-    flush(streamId: string): ChunkReady | null;
-    /**
-     * Remove streams that have not received data within `streamTimeoutMs`.
-     *
-     * Called lazily by `push()` when the stream map grows beyond 10 entries.
-     * May also be called proactively by a maintenance timer.
-     */
-    pruneStale(): void;
-    /**
-     * Remove all stream state from the buffer.
-     *
-     * Useful for graceful shutdown or unit-test teardown to ensure no cross-test
-     * state leaks.
-     */
-    clear(): void;
-    /**
-     * The number of streams currently tracked (including stale ones not yet
-     * pruned).
-     *
-     * Exposed primarily for testing and diagnostics.
-     */
-    get size(): number;
-    /**
-     * Assemble a {@link ChunkReady} from the current stream state.
-     *
-     * The `text` field is the concatenation of `contextRing` and the current
-     * `buffer`, giving the classifier cross-boundary context.  The `newText`
-     * field is just the raw `buffer` so callers can distinguish old from new.
-     *
-     * @param state - The mutable state for the stream being assembled.
-     * @returns A fully-populated {@link ChunkReady}.
-     */
-    private assembleChunk;
-    /**
-     * Estimate the number of LLM tokens in a string using the 4-chars-per-token
-     * heuristic.
-     *
-     * This deliberately mirrors {@link estimateTokens} from `core/utils/text-utils`
-     * without importing it, keeping this module self-contained and safe to load
-     * in Web Worker contexts where module resolution may differ.
-     *
-     * @param text - The string to estimate.
-     * @returns Non-negative integer token count estimate.
-     */
-    private estimateTokens;
-}
-//# sourceMappingURL=SlidingWindowBuffer.d.ts.map

package/dist/SlidingWindowBuffer.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"SlidingWindowBuffer.d.ts","sourceRoot":"","sources":["../src/SlidingWindowBuffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAMH;;;;GAIG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;;;;OAMG;IACH,SAAS,EAAE,MAAM,CAAC;IAElB;;;;;;OAMG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;;;;;;OAOG;IACH,cAAc,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB;;;;OAIG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAEhB;;;OAGG;IACH,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAgDD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B,iDAAiD;IACjD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAE7C;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAuC;IAE/D;;;;;;OAMG;gBACS,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC;IAajD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAwDvD;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAQ,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAe1C;;;;;OAKG;IACH,UAAU,IAAI,IAAI;IASlB;;;;;OAKG;IACH,KAAK,IAAI,IAAI;IAIb;;;;;OAKG;IACH,IAAI,IAAI,IAAI,MAAM,CAEjB;IAMD;;;;;;;;;OASG;IACH,OAAO,CAAC,aAAa;IAWrB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,cAAc;CAIvB"}

package/dist/SlidingWindowBuffer.js DELETED Viewed

@@ -1,246 +0,0 @@
-/**
- * @fileoverview Sliding-window text buffer for streaming ML classifier evaluation.
- *
- * When an LLM streams its response token-by-token, we cannot wait for the
- * complete response before running safety classifiers — that would be too late
- * to block or sanitise harmful content.  At the same time, classifiers are
- * expensive: running one on every individual token is wasteful and introduces
- * unacceptable latency.
- *
- * `SlidingWindowBuffer` solves this by accumulating tokens from one or more
- * concurrent streams and emitting a {@link ChunkReady} event only when enough
- * tokens have accumulated to fill a `chunkSize`-token window.  Each window
- * also includes a `contextSize`-token "ring" from the previous chunk, so the
- * classifier can reason about content that spans window boundaries.
- *
- * Architecture
- * ------------
- * - **Per-stream state**: Stored in a `Map<streamId, WindowState>`.  Each
- *   stream is fully independent and can be used across multiple concurrent
- *   responses.
- * - **Token estimation**: Uses the 4-chars-per-token heuristic for speed;
- *   callers that need exact counts should pre-tokenise text before pushing.
- * - **Evaluation budget**: Once a stream reaches `maxEvaluations` chunks,
- *   `push()` returns `null` for all subsequent pushes, preventing unbounded
- *   classifier invocations on very long responses.
- * - **Stale-stream pruning**: Streams that have not received data within
- *   `streamTimeoutMs` milliseconds are lazily evicted from the map to prevent
- *   memory leaks in long-running servers.
- *
- * @module agentos/extensions/packs/ml-classifiers/SlidingWindowBuffer
- */
-// ---------------------------------------------------------------------------
-// SlidingWindowBuffer implementation
-// ---------------------------------------------------------------------------
-/**
- * A stateful, multi-stream text accumulator that emits fixed-size windows
- * for ML classifier evaluation with configurable context carry-forward.
- *
- * @example
- * ```typescript
- * const buf = new SlidingWindowBuffer({ chunkSize: 200, contextSize: 50 });
- *
- * // Simulate streaming tokens
- * for (const token of streamedTokens) {
- *   const chunk = buf.push('stream-1', token);
- *   if (chunk) {
- *     const result = await toxicityClassifier.classify(chunk.text);
- *     if (result.confidence > 0.9) terminateStream();
- *   }
- * }
- *
- * // Evaluate remaining tokens
- * const finalChunk = buf.flush('stream-1');
- * if (finalChunk) {
- *   await toxicityClassifier.classify(finalChunk.text);
- * }
- * ```
- */
-export class SlidingWindowBuffer {
-    /** Resolved configuration (defaults applied). */
-    config;
-    /**
-     * Per-stream state map.  Keyed by the `streamId` passed to `push()`.
-     * Entries are created lazily on first push and removed on flush or prune.
-     */
-    streams = new Map();
-    /**
-     * Construct a new buffer with the supplied configuration.
-     *
-     * @param config - Partial configuration; unset fields fall back to defaults:
-     *   `chunkSize=200`, `contextSize=50`, `maxEvaluations=100`,
-     *   `streamTimeoutMs=30000`.
-     */
-    constructor(config) {
-        this.config = {
-            chunkSize: config?.chunkSize ?? 200,
-            contextSize: config?.contextSize ?? 50,
-            maxEvaluations: config?.maxEvaluations ?? 100,
-            streamTimeoutMs: config?.streamTimeoutMs ?? 30_000,
-        };
-    }
-    // -------------------------------------------------------------------------
-    // Public API
-    // -------------------------------------------------------------------------
-    /**
-     * Push new text into the buffer for the specified stream.
-     *
-     * Internally the text is appended to the stream's accumulation buffer.
-     * If the buffer's estimated token count reaches `chunkSize`, a
-     * {@link ChunkReady} is assembled and returned; the buffer is then reset
-     * (with the tail preserved as the context ring for the next window).
-     *
-     * Returns `null` when:
-     * - The buffer has not yet accumulated `chunkSize` tokens.
-     * - The stream has already emitted `maxEvaluations` chunks.
-     *
-     * When the map contains more than 10 streams, stale streams are pruned
-     * lazily after the push is processed.
-     *
-     * @param streamId - Opaque identifier for the stream (e.g. a request UUID).
-     * @param text     - The new text fragment to accumulate.
-     * @returns A {@link ChunkReady} when an evaluation window is complete, or
-     *   `null` if more data is needed (or the budget is exhausted).
-     */
-    push(streamId, text) {
-        if (!text) {
-            return null;
-        }
-        // Initialise state for a new stream.
-        if (!this.streams.has(streamId)) {
-            this.streams.set(streamId, {
-                buffer: '',
-                tokenCount: 0,
-                contextRing: '',
-                evaluationCount: 0,
-                lastSeenAt: Date.now(),
-            });
-        }
-        const state = this.streams.get(streamId);
-        state.lastSeenAt = Date.now();
-        // Respect the evaluation budget — stop emitting chunks once exhausted.
-        if (state.evaluationCount >= this.config.maxEvaluations) {
-            return null;
-        }
-        // Accumulate incoming text.
-        state.buffer += text;
-        state.tokenCount = this.estimateTokens(state.buffer);
-        // Lazy pruning: clean up stale streams whenever the map grows large.
-        // Done unconditionally (not just on chunk emit) so stale entries are
-        // reclaimed even when streams are slow to accumulate a full window.
-        if (this.streams.size > 10) {
-            this.pruneStale();
-        }
-        // Not enough tokens yet — wait for more.
-        if (state.tokenCount < this.config.chunkSize) {
-            return null;
-        }
-        // We have a full window.  Assemble the chunk.
-        const chunk = this.assembleChunk(state);
-        // Slide the context ring forward: keep the last `contextSize` tokens'
-        // worth of characters from the buffer that was just emitted.
-        const contextCharBudget = this.config.contextSize * 4;
-        state.contextRing = state.buffer.slice(-contextCharBudget);
-        // Reset the buffer and token count for the next window.
-        state.buffer = '';
-        state.tokenCount = 0;
-        state.evaluationCount += 1;
-        return chunk;
-    }
-    /**
-     * Flush any remaining buffered text for the stream as a final chunk.
-     *
-     * Call this after the stream ends (e.g. when the LLM emits its final
-     * token) to ensure the classifier evaluates the tail of the response.
-     *
-     * The stream's state entry is removed from the map after flushing.
-     *
-     * @param streamId - Identifier of the stream to flush.
-     * @returns A {@link ChunkReady} for the remaining buffer, or `null` if the
-     *   buffer is empty or the stream does not exist.
-     */
-    flush(streamId) {
-        const state = this.streams.get(streamId);
-        // Nothing to flush if the stream is unknown or the buffer is empty.
-        if (!state || state.buffer.length === 0) {
-            // Always clean up the map entry, even for empty buffers.
-            this.streams.delete(streamId);
-            return null;
-        }
-        const chunk = this.assembleChunk(state);
-        this.streams.delete(streamId);
-        return chunk;
-    }
-    /**
-     * Remove streams that have not received data within `streamTimeoutMs`.
-     *
-     * Called lazily by `push()` when the stream map grows beyond 10 entries.
-     * May also be called proactively by a maintenance timer.
-     */
-    pruneStale() {
-        const now = Date.now();
-        for (const [id, state] of this.streams) {
-            if (now - state.lastSeenAt > this.config.streamTimeoutMs) {
-                this.streams.delete(id);
-            }
-        }
-    }
-    /**
-     * Remove all stream state from the buffer.
-     *
-     * Useful for graceful shutdown or unit-test teardown to ensure no cross-test
-     * state leaks.
-     */
-    clear() {
-        this.streams.clear();
-    }
-    /**
-     * The number of streams currently tracked (including stale ones not yet
-     * pruned).
-     *
-     * Exposed primarily for testing and diagnostics.
-     */
-    get size() {
-        return this.streams.size;
-    }
-    // -------------------------------------------------------------------------
-    // Private helpers
-    // -------------------------------------------------------------------------
-    /**
-     * Assemble a {@link ChunkReady} from the current stream state.
-     *
-     * The `text` field is the concatenation of `contextRing` and the current
-     * `buffer`, giving the classifier cross-boundary context.  The `newText`
-     * field is just the raw `buffer` so callers can distinguish old from new.
-     *
-     * @param state - The mutable state for the stream being assembled.
-     * @returns A fully-populated {@link ChunkReady}.
-     */
-    assembleChunk(state) {
-        const newText = state.buffer;
-        const text = state.contextRing + newText;
-        return {
-            text,
-            newText,
-            // evaluationCount is 0-indexed before increment, so +1 gives 1-indexed number.
-            evaluationNumber: state.evaluationCount + 1,
-        };
-    }
-    /**
-     * Estimate the number of LLM tokens in a string using the 4-chars-per-token
-     * heuristic.
-     *
-     * This deliberately mirrors {@link estimateTokens} from `core/utils/text-utils`
-     * without importing it, keeping this module self-contained and safe to load
-     * in Web Worker contexts where module resolution may differ.
-     *
-     * @param text - The string to estimate.
-     * @returns Non-negative integer token count estimate.
-     */
-    estimateTokens(text) {
-        if (!text)
-            return 0;
-        return Math.ceil(text.length / 4);
-    }
-}
-//# sourceMappingURL=SlidingWindowBuffer.js.map

package/dist/SlidingWindowBuffer.js.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"SlidingWindowBuffer.js","sourceRoot":"","sources":["../src/SlidingWindowBuffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAqHH,8EAA8E;AAC9E,qCAAqC;AACrC,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,mBAAmB;IAC9B,iDAAiD;IAChC,MAAM,CAAsB;IAE7C;;;OAGG;IACc,OAAO,GAA6B,IAAI,GAAG,EAAE,CAAC;IAE/D;;;;;;OAMG;IACH,YAAY,MAAqC;QAC/C,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,EAAE,SAAS,IAAI,GAAG;YACnC,WAAW,EAAE,MAAM,EAAE,WAAW,IAAI,EAAE;YACtC,cAAc,EAAE,MAAM,EAAE,cAAc,IAAI,GAAG;YAC7C,eAAe,EAAE,MAAM,EAAE,eAAe,IAAI,MAAM;SACnD,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,aAAa;IACb,4EAA4E;IAE5E;;;;;;;;;;;;;;;;;;;OAmBG;IACH,IAAI,CAAC,QAAgB,EAAE,IAAY;QACjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QACd,CAAC;QAED,qCAAqC;QACrC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE;gBACzB,MAAM,EAAE,EAAE;gBACV,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,EAAE;gBACf,eAAe,EAAE,CAAC;gBAClB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;aACvB,CAAC,CAAC;QACL,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAC1C,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE9B,uEAAuE;QACvE,IAAI,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;YACxD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,4BAA4B;QAC5B,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC;QACrB,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAErD,qEAAqE;QACrE,qEAAqE;QACrE,oEAAoE;QACpE,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC;YAC3B,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC;QAED,yCAAyC;QACzC,IAAI,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC7C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,8CAA8C;QAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAExC,sEAAsE;QACtE,6DAA6D;QAC7D,MAAM,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,GAAG,CAAC,CAAC;QACtD,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,iBAAiB,CAAC,CAAC;QAE3D,wDAAwD;QACxD,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC;QAClB,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC;QACrB,KAAK,CAAC,eAAe,IAAI,CAAC,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAgB;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAEzC,oEAAoE;QACpE,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxC,yDAAyD;YACzD,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;OAKG;IACH,UAAU;QACR,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACvC,IAAI,GAAG,GAAG,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;gBACzD,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED;;;;;OAKG;IACH,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;IAC3B,CAAC;IAED,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E;;;;;;;;;OASG;IACK,aAAa,CAAC,KAAkB;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC;QACzC,OAAO;YACL,IAAI;YACJ,OAAO;YACP,+EAA+E;YAC/E,gBAAgB,EAAE,KAAK,CAAC,eAAe,GAAG,CAAC;SAC5C,CAAC;IACJ,CAAC;IAED;;;;;;;;;;OAUG;IACK,cAAc,CAAC,IAAY;QACjC,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QACpB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;CACF"}