@framers/agentos-ext-ml-classifiers 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.github/workflows/ci.yml +20 -0
  2. package/.github/workflows/release.yml +37 -0
  3. package/.releaserc.json +9 -0
  4. package/LICENSE +96 -21
  5. package/README.md +72 -0
  6. package/dist/MLClassifierGuardrail.d.ts.map +1 -1
  7. package/dist/MLClassifierGuardrail.js +14 -6
  8. package/dist/MLClassifierGuardrail.js.map +1 -1
  9. package/dist/index.js +3 -3
  10. package/dist/keyword-classifier.js +1 -1
  11. package/dist/llm-classifier.js +1 -1
  12. package/package.json +5 -13
  13. package/scripts/fix-esm-imports.mjs +181 -0
  14. package/src/MLClassifierGuardrail.ts +38 -5
  15. package/test/llm-tier.spec.ts +267 -0
  16. package/test/ml-classifiers.spec.ts +57 -0
  17. package/test/onnx-tier.spec.ts +255 -0
  18. package/test/tier-fallthrough.spec.ts +185 -0
  19. package/vitest.config.ts +18 -7
  20. package/CHANGELOG.md +0 -18
  21. package/dist/ClassifierOrchestrator.d.ts +0 -126
  22. package/dist/ClassifierOrchestrator.d.ts.map +0 -1
  23. package/dist/ClassifierOrchestrator.js +0 -239
  24. package/dist/ClassifierOrchestrator.js.map +0 -1
  25. package/dist/IContentClassifier.d.ts +0 -117
  26. package/dist/IContentClassifier.d.ts.map +0 -1
  27. package/dist/IContentClassifier.js +0 -22
  28. package/dist/IContentClassifier.js.map +0 -1
  29. package/dist/SlidingWindowBuffer.d.ts +0 -213
  30. package/dist/SlidingWindowBuffer.d.ts.map +0 -1
  31. package/dist/SlidingWindowBuffer.js +0 -246
  32. package/dist/SlidingWindowBuffer.js.map +0 -1
  33. package/dist/classifiers/InjectionClassifier.d.ts +0 -126
  34. package/dist/classifiers/InjectionClassifier.d.ts.map +0 -1
  35. package/dist/classifiers/InjectionClassifier.js +0 -210
  36. package/dist/classifiers/InjectionClassifier.js.map +0 -1
  37. package/dist/classifiers/JailbreakClassifier.d.ts +0 -124
  38. package/dist/classifiers/JailbreakClassifier.d.ts.map +0 -1
  39. package/dist/classifiers/JailbreakClassifier.js +0 -208
  40. package/dist/classifiers/JailbreakClassifier.js.map +0 -1
  41. package/dist/classifiers/ToxicityClassifier.d.ts +0 -125
  42. package/dist/classifiers/ToxicityClassifier.d.ts.map +0 -1
  43. package/dist/classifiers/ToxicityClassifier.js +0 -212
  44. package/dist/classifiers/ToxicityClassifier.js.map +0 -1
  45. package/dist/classifiers/WorkerClassifierProxy.d.ts +0 -158
  46. package/dist/classifiers/WorkerClassifierProxy.d.ts.map +0 -1
  47. package/dist/classifiers/WorkerClassifierProxy.js +0 -268
  48. package/dist/classifiers/WorkerClassifierProxy.js.map +0 -1
  49. package/dist/worker/classifier-worker.d.ts +0 -49
  50. package/dist/worker/classifier-worker.d.ts.map +0 -1
  51. package/dist/worker/classifier-worker.js +0 -180
  52. package/dist/worker/classifier-worker.js.map +0 -1
  53. package/src/ClassifierOrchestrator.ts +0 -290
  54. package/src/IContentClassifier.ts +0 -124
  55. package/src/SlidingWindowBuffer.ts +0 -384
  56. package/src/classifiers/InjectionClassifier.ts +0 -261
  57. package/src/classifiers/JailbreakClassifier.ts +0 -259
  58. package/src/classifiers/ToxicityClassifier.ts +0 -263
  59. package/src/classifiers/WorkerClassifierProxy.ts +0 -366
  60. package/src/worker/classifier-worker.ts +0 -267
  61. package/test/ClassifierOrchestrator.spec.ts +0 -365
  62. package/test/ClassifyContentTool.spec.ts +0 -226
  63. package/test/InjectionClassifier.spec.ts +0 -263
  64. package/test/JailbreakClassifier.spec.ts +0 -295
  65. package/test/MLClassifierGuardrail.spec.ts +0 -486
  66. package/test/SlidingWindowBuffer.spec.ts +0 -391
  67. package/test/ToxicityClassifier.spec.ts +0 -268
  68. package/test/WorkerClassifierProxy.spec.ts +0 -303
  69. package/test/index.spec.ts +0 -431
@@ -1,117 +0,0 @@
1
- /**
2
- * @fileoverview Interface contract for ML-backed content classifiers.
3
- *
4
- * An `IContentClassifier` represents a single model pipeline that accepts
5
- * arbitrary text and returns a {@link ClassificationResult} containing the
6
- * winning label and confidence scores for all candidate classes.
7
- *
8
- * Built-in implementations (toxicity, injection, jailbreak) each implement
9
- * this interface. Third-party classifiers may be registered via the
10
- * `customClassifiers` option of {@link MLClassifierPackOptions}.
11
- *
12
- * Lifecycle
13
- * ---------
14
- * 1. The pack initialises each classifier (model loading, warm-up).
15
- * 2. The guardrail pipeline calls `classify()` for every text chunk.
16
- * 3. On pack teardown, `dispose()` is called (if present) to release GPU/
17
- * WASM memory.
18
- *
19
- * @module agentos/extensions/packs/ml-classifiers/IContentClassifier
20
- */
21
- import type { ClassificationResult } from '@framers/agentos';
22
- /**
23
- * Contract for a single ML content classifier.
24
- *
25
- * Implementations back one model pipeline and expose a narrow classify/dispose
26
- * API so the guardrail orchestrator can drive them uniformly regardless of the
27
- * underlying runtime (Node.js ONNX, browser WASM, remote inference endpoint).
28
- *
29
- * @example Minimal custom classifier
30
- * ```typescript
31
- * class SarcasmClassifier implements IContentClassifier {
32
- * readonly id = 'custom:sarcasm-detector';
33
- * readonly displayName = 'Sarcasm Detector';
34
- * readonly description = 'Detects sarcastic or ironic statements.';
35
- * readonly modelId = 'my-org/sarcasm-bert';
36
- * isLoaded = false;
37
- *
38
- * async classify(text: string): Promise<ClassificationResult> {
39
- * // … run inference …
40
- * return { bestClass: 'NOT_SARCASTIC', confidence: 0.8, allScores: [] };
41
- * }
42
- *
43
- * async dispose(): Promise<void> {
44
- * // Free resources.
45
- * }
46
- * }
47
- * ```
48
- */
49
- export interface IContentClassifier {
50
- /**
51
- * Unique service identifier for this classifier.
52
- *
53
- * Must follow the `agentos:<domain>:<name>` convention so it can be
54
- * registered with the AgentOS shared service registry.
55
- *
56
- * @example `'agentos:ml-classifiers:toxicity-pipeline'`
57
- */
58
- readonly id: string;
59
- /**
60
- * Human-readable name displayed in logs and dashboards.
61
- *
62
- * @example `'Toxicity Pipeline'`
63
- */
64
- readonly displayName: string;
65
- /**
66
- * Short prose description of what this classifier detects.
67
- *
68
- * @example `'Detects toxic, hateful, or abusive language in text.'`
69
- */
70
- readonly description: string;
71
- /**
72
- * Identifier of the underlying model being used, typically a Hugging Face
73
- * model ID or a local filesystem path.
74
- *
75
- * @example `'Xenova/toxic-bert'`
76
- */
77
- readonly modelId: string;
78
- /**
79
- * Whether the model weights have been fully loaded into memory and the
80
- * classifier is ready to accept `classify()` calls.
81
- *
82
- * The pack initialiser sets this to `true` after the warm-up inference
83
- * succeeds. Callers can check this flag before calling `classify()` to
84
- * avoid queueing calls during a slow model download.
85
- */
86
- isLoaded: boolean;
87
- /**
88
- * Classify the provided text and return confidence scores for all candidate
89
- * labels.
90
- *
91
- * The classifier is responsible for mapping raw model output to the
92
- * {@link ClassificationResult} shape. It should NOT apply thresholds or
93
- * guardrail actions — that is the responsibility of the pack orchestrator.
94
- *
95
- * @param text - The text to classify. May be a short chunk from a streaming
96
- * response or a complete message. Must not be empty.
97
- * @returns A promise that resolves with the classification result, including
98
- * the winning label (`bestClass`), its `confidence`, and `allScores` for
99
- * every label the model evaluated.
100
- * @throws {Error} If the model is not loaded (`isLoaded === false`) or if
101
- * inference fails for an unrecoverable reason.
102
- */
103
- classify(text: string): Promise<ClassificationResult>;
104
- /**
105
- * Release all resources held by this classifier (model weights, WASM
106
- * module, GPU buffers, worker threads, etc.).
107
- *
108
- * Called by the pack orchestrator during AgentOS shutdown or when the pack
109
- * is unloaded. Implementations should be idempotent — calling `dispose()`
110
- * multiple times must not throw.
111
- *
112
- * @optional Classifiers that hold no persistent resources may omit this
113
- * method.
114
- */
115
- dispose?(): Promise<void>;
116
- }
117
- //# sourceMappingURL=IContentClassifier.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"IContentClassifier.d.ts","sourceRoot":"","sources":["../src/IContentClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAE7D;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,WAAW,kBAAkB;IACjC;;;;;;;OAOG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IAEpB;;;;OAIG;IACH,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAE7B;;;;OAIG;IACH,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAE7B;;;;;OAKG;IACH,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IAEzB;;;;;;;OAOG;IACH,QAAQ,EAAE,OAAO,CAAC;IAElB;;;;;;;;;;;;;;;OAeG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAEtD;;;;;;;;;;OAUG;IACH,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B"}
@@ -1,22 +0,0 @@
1
- /**
2
- * @fileoverview Interface contract for ML-backed content classifiers.
3
- *
4
- * An `IContentClassifier` represents a single model pipeline that accepts
5
- * arbitrary text and returns a {@link ClassificationResult} containing the
6
- * winning label and confidence scores for all candidate classes.
7
- *
8
- * Built-in implementations (toxicity, injection, jailbreak) each implement
9
- * this interface. Third-party classifiers may be registered via the
10
- * `customClassifiers` option of {@link MLClassifierPackOptions}.
11
- *
12
- * Lifecycle
13
- * ---------
14
- * 1. The pack initialises each classifier (model loading, warm-up).
15
- * 2. The guardrail pipeline calls `classify()` for every text chunk.
16
- * 3. On pack teardown, `dispose()` is called (if present) to release GPU/
17
- * WASM memory.
18
- *
19
- * @module agentos/extensions/packs/ml-classifiers/IContentClassifier
20
- */
21
- export {};
22
- //# sourceMappingURL=IContentClassifier.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"IContentClassifier.js","sourceRoot":"","sources":["../src/IContentClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG"}
@@ -1,213 +0,0 @@
1
- /**
2
- * @fileoverview Sliding-window text buffer for streaming ML classifier evaluation.
3
- *
4
- * When an LLM streams its response token-by-token, we cannot wait for the
5
- * complete response before running safety classifiers — that would be too late
6
- * to block or sanitise harmful content. At the same time, classifiers are
7
- * expensive: running one on every individual token is wasteful and introduces
8
- * unacceptable latency.
9
- *
10
- * `SlidingWindowBuffer` solves this by accumulating tokens from one or more
11
- * concurrent streams and emitting a {@link ChunkReady} event only when enough
12
- * tokens have accumulated to fill a `chunkSize`-token window. Each window
13
- * also includes a `contextSize`-token "ring" from the previous chunk, so the
14
- * classifier can reason about content that spans window boundaries.
15
- *
16
- * Architecture
17
- * ------------
18
- * - **Per-stream state**: Stored in a `Map<streamId, WindowState>`. Each
19
- * stream is fully independent and can be used across multiple concurrent
20
- * responses.
21
- * - **Token estimation**: Uses the 4-chars-per-token heuristic for speed;
22
- * callers that need exact counts should pre-tokenise text before pushing.
23
- * - **Evaluation budget**: Once a stream reaches `maxEvaluations` chunks,
24
- * `push()` returns `null` for all subsequent pushes, preventing unbounded
25
- * classifier invocations on very long responses.
26
- * - **Stale-stream pruning**: Streams that have not received data within
27
- * `streamTimeoutMs` milliseconds are lazily evicted from the map to prevent
28
- * memory leaks in long-running servers.
29
- *
30
- * @module agentos/extensions/packs/ml-classifiers/SlidingWindowBuffer
31
- */
32
- /**
33
- * Configuration for a {@link SlidingWindowBuffer} instance.
34
- *
35
- * All fields are optional; unset fields fall back to the defaults shown below.
36
- */
37
- export interface SlidingWindowConfig {
38
- /**
39
- * Target window size in *estimated* tokens. When the accumulated buffer
40
- * reaches or exceeds this many tokens, a {@link ChunkReady} is emitted and
41
- * the buffer is slid forward.
42
- *
43
- * @default 200
44
- */
45
- chunkSize: number;
46
- /**
47
- * Number of tokens from the tail of the previous window to carry into the
48
- * `text` field of the next {@link ChunkReady}. This overlap prevents
49
- * boundary effects where a phrase split across two windows is misclassified.
50
- *
51
- * @default 50
52
- */
53
- contextSize: number;
54
- /**
55
- * Maximum number of {@link ChunkReady} events to emit per stream. After
56
- * this budget is exhausted, `push()` returns `null` for the remainder of the
57
- * stream. Use `flush()` to retrieve any buffered text that has not been
58
- * emitted yet.
59
- *
60
- * @default 100
61
- */
62
- maxEvaluations: number;
63
- /**
64
- * Milliseconds of inactivity after which a stream is considered stale and
65
- * eligible for eviction by {@link SlidingWindowBuffer.pruneStale}.
66
- *
67
- * @default 30000
68
- */
69
- streamTimeoutMs: number;
70
- }
71
- /**
72
- * Emitted by {@link SlidingWindowBuffer.push} when sufficient tokens have
73
- * accumulated to fill one evaluation window.
74
- */
75
- export interface ChunkReady {
76
- /**
77
- * The full text to classify. Equals `contextRing + newBuffer`, where
78
- * `contextRing` is the carried-forward tail from the previous window.
79
- * Always non-empty.
80
- */
81
- text: string;
82
- /**
83
- * Only the *new* text pushed since the last chunk was emitted (i.e. without
84
- * the context prefix). Useful for determining which part of the response
85
- * was newly evaluated.
86
- */
87
- newText: string;
88
- /**
89
- * 1-indexed sequence number for this chunk within the stream.
90
- * The first chunk emitted for a stream has `evaluationNumber === 1`.
91
- */
92
- evaluationNumber: number;
93
- }
94
- /**
95
- * A stateful, multi-stream text accumulator that emits fixed-size windows
96
- * for ML classifier evaluation with configurable context carry-forward.
97
- *
98
- * @example
99
- * ```typescript
100
- * const buf = new SlidingWindowBuffer({ chunkSize: 200, contextSize: 50 });
101
- *
102
- * // Simulate streaming tokens
103
- * for (const token of streamedTokens) {
104
- * const chunk = buf.push('stream-1', token);
105
- * if (chunk) {
106
- * const result = await toxicityClassifier.classify(chunk.text);
107
- * if (result.confidence > 0.9) terminateStream();
108
- * }
109
- * }
110
- *
111
- * // Evaluate remaining tokens
112
- * const finalChunk = buf.flush('stream-1');
113
- * if (finalChunk) {
114
- * await toxicityClassifier.classify(finalChunk.text);
115
- * }
116
- * ```
117
- */
118
- export declare class SlidingWindowBuffer {
119
- /** Resolved configuration (defaults applied). */
120
- private readonly config;
121
- /**
122
- * Per-stream state map. Keyed by the `streamId` passed to `push()`.
123
- * Entries are created lazily on first push and removed on flush or prune.
124
- */
125
- private readonly streams;
126
- /**
127
- * Construct a new buffer with the supplied configuration.
128
- *
129
- * @param config - Partial configuration; unset fields fall back to defaults:
130
- * `chunkSize=200`, `contextSize=50`, `maxEvaluations=100`,
131
- * `streamTimeoutMs=30000`.
132
- */
133
- constructor(config?: Partial<SlidingWindowConfig>);
134
- /**
135
- * Push new text into the buffer for the specified stream.
136
- *
137
- * Internally the text is appended to the stream's accumulation buffer.
138
- * If the buffer's estimated token count reaches `chunkSize`, a
139
- * {@link ChunkReady} is assembled and returned; the buffer is then reset
140
- * (with the tail preserved as the context ring for the next window).
141
- *
142
- * Returns `null` when:
143
- * - The buffer has not yet accumulated `chunkSize` tokens.
144
- * - The stream has already emitted `maxEvaluations` chunks.
145
- *
146
- * When the map contains more than 10 streams, stale streams are pruned
147
- * lazily after the push is processed.
148
- *
149
- * @param streamId - Opaque identifier for the stream (e.g. a request UUID).
150
- * @param text - The new text fragment to accumulate.
151
- * @returns A {@link ChunkReady} when an evaluation window is complete, or
152
- * `null` if more data is needed (or the budget is exhausted).
153
- */
154
- push(streamId: string, text: string): ChunkReady | null;
155
- /**
156
- * Flush any remaining buffered text for the stream as a final chunk.
157
- *
158
- * Call this after the stream ends (e.g. when the LLM emits its final
159
- * token) to ensure the classifier evaluates the tail of the response.
160
- *
161
- * The stream's state entry is removed from the map after flushing.
162
- *
163
- * @param streamId - Identifier of the stream to flush.
164
- * @returns A {@link ChunkReady} for the remaining buffer, or `null` if the
165
- * buffer is empty or the stream does not exist.
166
- */
167
- flush(streamId: string): ChunkReady | null;
168
- /**
169
- * Remove streams that have not received data within `streamTimeoutMs`.
170
- *
171
- * Called lazily by `push()` when the stream map grows beyond 10 entries.
172
- * May also be called proactively by a maintenance timer.
173
- */
174
- pruneStale(): void;
175
- /**
176
- * Remove all stream state from the buffer.
177
- *
178
- * Useful for graceful shutdown or unit-test teardown to ensure no cross-test
179
- * state leaks.
180
- */
181
- clear(): void;
182
- /**
183
- * The number of streams currently tracked (including stale ones not yet
184
- * pruned).
185
- *
186
- * Exposed primarily for testing and diagnostics.
187
- */
188
- get size(): number;
189
- /**
190
- * Assemble a {@link ChunkReady} from the current stream state.
191
- *
192
- * The `text` field is the concatenation of `contextRing` and the current
193
- * `buffer`, giving the classifier cross-boundary context. The `newText`
194
- * field is just the raw `buffer` so callers can distinguish old from new.
195
- *
196
- * @param state - The mutable state for the stream being assembled.
197
- * @returns A fully-populated {@link ChunkReady}.
198
- */
199
- private assembleChunk;
200
- /**
201
- * Estimate the number of LLM tokens in a string using the 4-chars-per-token
202
- * heuristic.
203
- *
204
- * This deliberately mirrors {@link estimateTokens} from `core/utils/text-utils`
205
- * without importing it, keeping this module self-contained and safe to load
206
- * in Web Worker contexts where module resolution may differ.
207
- *
208
- * @param text - The string to estimate.
209
- * @returns Non-negative integer token count estimate.
210
- */
211
- private estimateTokens;
212
- }
213
- //# sourceMappingURL=SlidingWindowBuffer.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"SlidingWindowBuffer.d.ts","sourceRoot":"","sources":["../src/SlidingWindowBuffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAMH;;;;GAIG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;;;;OAMG;IACH,SAAS,EAAE,MAAM,CAAC;IAElB;;;;;;OAMG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;;;;;;OAOG;IACH,cAAc,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB;;;;OAIG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAEhB;;;OAGG;IACH,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAgDD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B,iDAAiD;IACjD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAE7C;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAuC;IAE/D;;;;;;OAMG;gBACS,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC;IAajD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAwDvD;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAQ,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAe1C;;;;;OAKG;IACH,UAAU,IAAI,IAAI;IASlB;;;;;OAKG;IACH,KAAK,IAAI,IAAI;IAIb;;;;;OAKG;IACH,IAAI,IAAI,IAAI,MAAM,CAEjB;IAMD;;;;;;;;;OASG;IACH,OAAO,CAAC,aAAa;IAWrB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,cAAc;CAIvB"}
@@ -1,246 +0,0 @@
1
- /**
2
- * @fileoverview Sliding-window text buffer for streaming ML classifier evaluation.
3
- *
4
- * When an LLM streams its response token-by-token, we cannot wait for the
5
- * complete response before running safety classifiers — that would be too late
6
- * to block or sanitise harmful content. At the same time, classifiers are
7
- * expensive: running one on every individual token is wasteful and introduces
8
- * unacceptable latency.
9
- *
10
- * `SlidingWindowBuffer` solves this by accumulating tokens from one or more
11
- * concurrent streams and emitting a {@link ChunkReady} event only when enough
12
- * tokens have accumulated to fill a `chunkSize`-token window. Each window
13
- * also includes a `contextSize`-token "ring" from the previous chunk, so the
14
- * classifier can reason about content that spans window boundaries.
15
- *
16
- * Architecture
17
- * ------------
18
- * - **Per-stream state**: Stored in a `Map<streamId, WindowState>`. Each
19
- * stream is fully independent and can be used across multiple concurrent
20
- * responses.
21
- * - **Token estimation**: Uses the 4-chars-per-token heuristic for speed;
22
- * callers that need exact counts should pre-tokenise text before pushing.
23
- * - **Evaluation budget**: Once a stream reaches `maxEvaluations` chunks,
24
- * `push()` returns `null` for all subsequent pushes, preventing unbounded
25
- * classifier invocations on very long responses.
26
- * - **Stale-stream pruning**: Streams that have not received data within
27
- * `streamTimeoutMs` milliseconds are lazily evicted from the map to prevent
28
- * memory leaks in long-running servers.
29
- *
30
- * @module agentos/extensions/packs/ml-classifiers/SlidingWindowBuffer
31
- */
32
- // ---------------------------------------------------------------------------
33
- // SlidingWindowBuffer implementation
34
- // ---------------------------------------------------------------------------
35
- /**
36
- * A stateful, multi-stream text accumulator that emits fixed-size windows
37
- * for ML classifier evaluation with configurable context carry-forward.
38
- *
39
- * @example
40
- * ```typescript
41
- * const buf = new SlidingWindowBuffer({ chunkSize: 200, contextSize: 50 });
42
- *
43
- * // Simulate streaming tokens
44
- * for (const token of streamedTokens) {
45
- * const chunk = buf.push('stream-1', token);
46
- * if (chunk) {
47
- * const result = await toxicityClassifier.classify(chunk.text);
48
- * if (result.confidence > 0.9) terminateStream();
49
- * }
50
- * }
51
- *
52
- * // Evaluate remaining tokens
53
- * const finalChunk = buf.flush('stream-1');
54
- * if (finalChunk) {
55
- * await toxicityClassifier.classify(finalChunk.text);
56
- * }
57
- * ```
58
- */
59
- export class SlidingWindowBuffer {
60
- /** Resolved configuration (defaults applied). */
61
- config;
62
- /**
63
- * Per-stream state map. Keyed by the `streamId` passed to `push()`.
64
- * Entries are created lazily on first push and removed on flush or prune.
65
- */
66
- streams = new Map();
67
- /**
68
- * Construct a new buffer with the supplied configuration.
69
- *
70
- * @param config - Partial configuration; unset fields fall back to defaults:
71
- * `chunkSize=200`, `contextSize=50`, `maxEvaluations=100`,
72
- * `streamTimeoutMs=30000`.
73
- */
74
- constructor(config) {
75
- this.config = {
76
- chunkSize: config?.chunkSize ?? 200,
77
- contextSize: config?.contextSize ?? 50,
78
- maxEvaluations: config?.maxEvaluations ?? 100,
79
- streamTimeoutMs: config?.streamTimeoutMs ?? 30_000,
80
- };
81
- }
82
- // -------------------------------------------------------------------------
83
- // Public API
84
- // -------------------------------------------------------------------------
85
- /**
86
- * Push new text into the buffer for the specified stream.
87
- *
88
- * Internally the text is appended to the stream's accumulation buffer.
89
- * If the buffer's estimated token count reaches `chunkSize`, a
90
- * {@link ChunkReady} is assembled and returned; the buffer is then reset
91
- * (with the tail preserved as the context ring for the next window).
92
- *
93
- * Returns `null` when:
94
- * - The buffer has not yet accumulated `chunkSize` tokens.
95
- * - The stream has already emitted `maxEvaluations` chunks.
96
- *
97
- * When the map contains more than 10 streams, stale streams are pruned
98
- * lazily after the push is processed.
99
- *
100
- * @param streamId - Opaque identifier for the stream (e.g. a request UUID).
101
- * @param text - The new text fragment to accumulate.
102
- * @returns A {@link ChunkReady} when an evaluation window is complete, or
103
- * `null` if more data is needed (or the budget is exhausted).
104
- */
105
- push(streamId, text) {
106
- if (!text) {
107
- return null;
108
- }
109
- // Initialise state for a new stream.
110
- if (!this.streams.has(streamId)) {
111
- this.streams.set(streamId, {
112
- buffer: '',
113
- tokenCount: 0,
114
- contextRing: '',
115
- evaluationCount: 0,
116
- lastSeenAt: Date.now(),
117
- });
118
- }
119
- const state = this.streams.get(streamId);
120
- state.lastSeenAt = Date.now();
121
- // Respect the evaluation budget — stop emitting chunks once exhausted.
122
- if (state.evaluationCount >= this.config.maxEvaluations) {
123
- return null;
124
- }
125
- // Accumulate incoming text.
126
- state.buffer += text;
127
- state.tokenCount = this.estimateTokens(state.buffer);
128
- // Lazy pruning: clean up stale streams whenever the map grows large.
129
- // Done unconditionally (not just on chunk emit) so stale entries are
130
- // reclaimed even when streams are slow to accumulate a full window.
131
- if (this.streams.size > 10) {
132
- this.pruneStale();
133
- }
134
- // Not enough tokens yet — wait for more.
135
- if (state.tokenCount < this.config.chunkSize) {
136
- return null;
137
- }
138
- // We have a full window. Assemble the chunk.
139
- const chunk = this.assembleChunk(state);
140
- // Slide the context ring forward: keep the last `contextSize` tokens'
141
- // worth of characters from the buffer that was just emitted.
142
- const contextCharBudget = this.config.contextSize * 4;
143
- state.contextRing = state.buffer.slice(-contextCharBudget);
144
- // Reset the buffer and token count for the next window.
145
- state.buffer = '';
146
- state.tokenCount = 0;
147
- state.evaluationCount += 1;
148
- return chunk;
149
- }
150
- /**
151
- * Flush any remaining buffered text for the stream as a final chunk.
152
- *
153
- * Call this after the stream ends (e.g. when the LLM emits its final
154
- * token) to ensure the classifier evaluates the tail of the response.
155
- *
156
- * The stream's state entry is removed from the map after flushing.
157
- *
158
- * @param streamId - Identifier of the stream to flush.
159
- * @returns A {@link ChunkReady} for the remaining buffer, or `null` if the
160
- * buffer is empty or the stream does not exist.
161
- */
162
- flush(streamId) {
163
- const state = this.streams.get(streamId);
164
- // Nothing to flush if the stream is unknown or the buffer is empty.
165
- if (!state || state.buffer.length === 0) {
166
- // Always clean up the map entry, even for empty buffers.
167
- this.streams.delete(streamId);
168
- return null;
169
- }
170
- const chunk = this.assembleChunk(state);
171
- this.streams.delete(streamId);
172
- return chunk;
173
- }
174
- /**
175
- * Remove streams that have not received data within `streamTimeoutMs`.
176
- *
177
- * Called lazily by `push()` when the stream map grows beyond 10 entries.
178
- * May also be called proactively by a maintenance timer.
179
- */
180
- pruneStale() {
181
- const now = Date.now();
182
- for (const [id, state] of this.streams) {
183
- if (now - state.lastSeenAt > this.config.streamTimeoutMs) {
184
- this.streams.delete(id);
185
- }
186
- }
187
- }
188
- /**
189
- * Remove all stream state from the buffer.
190
- *
191
- * Useful for graceful shutdown or unit-test teardown to ensure no cross-test
192
- * state leaks.
193
- */
194
- clear() {
195
- this.streams.clear();
196
- }
197
- /**
198
- * The number of streams currently tracked (including stale ones not yet
199
- * pruned).
200
- *
201
- * Exposed primarily for testing and diagnostics.
202
- */
203
- get size() {
204
- return this.streams.size;
205
- }
206
- // -------------------------------------------------------------------------
207
- // Private helpers
208
- // -------------------------------------------------------------------------
209
- /**
210
- * Assemble a {@link ChunkReady} from the current stream state.
211
- *
212
- * The `text` field is the concatenation of `contextRing` and the current
213
- * `buffer`, giving the classifier cross-boundary context. The `newText`
214
- * field is just the raw `buffer` so callers can distinguish old from new.
215
- *
216
- * @param state - The mutable state for the stream being assembled.
217
- * @returns A fully-populated {@link ChunkReady}.
218
- */
219
- assembleChunk(state) {
220
- const newText = state.buffer;
221
- const text = state.contextRing + newText;
222
- return {
223
- text,
224
- newText,
225
- // evaluationCount is 0-indexed before increment, so +1 gives 1-indexed number.
226
- evaluationNumber: state.evaluationCount + 1,
227
- };
228
- }
229
- /**
230
- * Estimate the number of LLM tokens in a string using the 4-chars-per-token
231
- * heuristic.
232
- *
233
- * This deliberately mirrors {@link estimateTokens} from `core/utils/text-utils`
234
- * without importing it, keeping this module self-contained and safe to load
235
- * in Web Worker contexts where module resolution may differ.
236
- *
237
- * @param text - The string to estimate.
238
- * @returns Non-negative integer token count estimate.
239
- */
240
- estimateTokens(text) {
241
- if (!text)
242
- return 0;
243
- return Math.ceil(text.length / 4);
244
- }
245
- }
246
- //# sourceMappingURL=SlidingWindowBuffer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"SlidingWindowBuffer.js","sourceRoot":"","sources":["../src/SlidingWindowBuffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAqHH,8EAA8E;AAC9E,qCAAqC;AACrC,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,mBAAmB;IAC9B,iDAAiD;IAChC,MAAM,CAAsB;IAE7C;;;OAGG;IACc,OAAO,GAA6B,IAAI,GAAG,EAAE,CAAC;IAE/D;;;;;;OAMG;IACH,YAAY,MAAqC;QAC/C,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,EAAE,SAAS,IAAI,GAAG;YACnC,WAAW,EAAE,MAAM,EAAE,WAAW,IAAI,EAAE;YACtC,cAAc,EAAE,MAAM,EAAE,cAAc,IAAI,GAAG;YAC7C,eAAe,EAAE,MAAM,EAAE,eAAe,IAAI,MAAM;SACnD,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,aAAa;IACb,4EAA4E;IAE5E;;;;;;;;;;;;;;;;;;;OAmBG;IACH,IAAI,CAAC,QAAgB,EAAE,IAAY;QACjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QACd,CAAC;QAED,qCAAqC;QACrC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE;gBACzB,MAAM,EAAE,EAAE;gBACV,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,EAAE;gBACf,eAAe,EAAE,CAAC;gBAClB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;aACvB,CAAC,CAAC;QACL,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAC1C,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE9B,uEAAuE;QACvE,IAAI,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;YACxD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,4BAA4B;QAC5B,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC;QACrB,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAErD,qEAAqE;QACrE,qEAAqE;QACrE,oEAAoE;QACpE,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC;YAC3B,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC;QAED,yCAAyC;QACzC,IAAI,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC7C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,8CAA8C;QAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAExC,sEAAsE;QACtE,6DAA6D;QAC7D,MAAM,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,GAAG,CAAC,CAAC;QACtD,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,iBAAiB,CAAC,CAAC;QAE3D,wDAAwD;QACxD,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC;QAClB,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC;QACrB,KAAK,CAAC,eAAe,IAAI,CAAC,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAgB;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAEzC,oEAAoE;QACpE,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxC,yDAAyD;YACzD,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;OAKG;IACH,UAAU;QACR,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACvC,IAAI,GAAG,GAAG,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;gBACzD,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED;;;;;OAKG;IACH,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;IAC3B,CAAC;IAED,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E;;;;;;;;;OASG;IACK,aAAa,CAAC,KAAkB;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC;QACzC,OAAO;YACL,IAAI;YACJ,OAAO;YACP,+EAA+E;YAC/E,gBAAgB,EAAE,KAAK,CAAC,eAAe,GAAG,CAAC;SAC5C,CAAC;IACJ,CAAC;IAED;;;;;;;;;;OAUG;IACK,cAAc,CAAC,IAAY;QACjC,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QACpB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;CACF"}