@framers/agentos-ext-ml-classifiers 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/LICENSE +23 -0
  2. package/dist/ClassifierOrchestrator.d.ts +126 -0
  3. package/dist/ClassifierOrchestrator.d.ts.map +1 -0
  4. package/dist/ClassifierOrchestrator.js +239 -0
  5. package/dist/ClassifierOrchestrator.js.map +1 -0
  6. package/dist/IContentClassifier.d.ts +117 -0
  7. package/dist/IContentClassifier.d.ts.map +1 -0
  8. package/dist/IContentClassifier.js +22 -0
  9. package/dist/IContentClassifier.js.map +1 -0
  10. package/dist/MLClassifierGuardrail.d.ts +163 -0
  11. package/dist/MLClassifierGuardrail.d.ts.map +1 -0
  12. package/dist/MLClassifierGuardrail.js +335 -0
  13. package/dist/MLClassifierGuardrail.js.map +1 -0
  14. package/dist/SlidingWindowBuffer.d.ts +213 -0
  15. package/dist/SlidingWindowBuffer.d.ts.map +1 -0
  16. package/dist/SlidingWindowBuffer.js +246 -0
  17. package/dist/SlidingWindowBuffer.js.map +1 -0
  18. package/dist/classifiers/InjectionClassifier.d.ts +126 -0
  19. package/dist/classifiers/InjectionClassifier.d.ts.map +1 -0
  20. package/dist/classifiers/InjectionClassifier.js +210 -0
  21. package/dist/classifiers/InjectionClassifier.js.map +1 -0
  22. package/dist/classifiers/JailbreakClassifier.d.ts +124 -0
  23. package/dist/classifiers/JailbreakClassifier.d.ts.map +1 -0
  24. package/dist/classifiers/JailbreakClassifier.js +208 -0
  25. package/dist/classifiers/JailbreakClassifier.js.map +1 -0
  26. package/dist/classifiers/ToxicityClassifier.d.ts +125 -0
  27. package/dist/classifiers/ToxicityClassifier.d.ts.map +1 -0
  28. package/dist/classifiers/ToxicityClassifier.js +212 -0
  29. package/dist/classifiers/ToxicityClassifier.js.map +1 -0
  30. package/dist/classifiers/WorkerClassifierProxy.d.ts +158 -0
  31. package/dist/classifiers/WorkerClassifierProxy.d.ts.map +1 -0
  32. package/dist/classifiers/WorkerClassifierProxy.js +268 -0
  33. package/dist/classifiers/WorkerClassifierProxy.js.map +1 -0
  34. package/dist/index.d.ts +110 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +342 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/tools/ClassifyContentTool.d.ts +105 -0
  39. package/dist/tools/ClassifyContentTool.d.ts.map +1 -0
  40. package/dist/tools/ClassifyContentTool.js +149 -0
  41. package/dist/tools/ClassifyContentTool.js.map +1 -0
  42. package/dist/types.d.ts +319 -0
  43. package/dist/types.d.ts.map +1 -0
  44. package/dist/types.js +62 -0
  45. package/dist/types.js.map +1 -0
  46. package/dist/worker/classifier-worker.d.ts +49 -0
  47. package/dist/worker/classifier-worker.d.ts.map +1 -0
  48. package/dist/worker/classifier-worker.js +180 -0
  49. package/dist/worker/classifier-worker.js.map +1 -0
  50. package/package.json +45 -0
  51. package/src/ClassifierOrchestrator.ts +290 -0
  52. package/src/IContentClassifier.ts +124 -0
  53. package/src/MLClassifierGuardrail.ts +419 -0
  54. package/src/SlidingWindowBuffer.ts +384 -0
  55. package/src/classifiers/InjectionClassifier.ts +261 -0
  56. package/src/classifiers/JailbreakClassifier.ts +259 -0
  57. package/src/classifiers/ToxicityClassifier.ts +263 -0
  58. package/src/classifiers/WorkerClassifierProxy.ts +366 -0
  59. package/src/index.ts +383 -0
  60. package/src/tools/ClassifyContentTool.ts +201 -0
  61. package/src/types.ts +391 -0
  62. package/src/worker/classifier-worker.ts +267 -0
@@ -0,0 +1,213 @@
1
+ /**
2
+ * @fileoverview Sliding-window text buffer for streaming ML classifier evaluation.
3
+ *
4
+ * When an LLM streams its response token-by-token, we cannot wait for the
5
+ * complete response before running safety classifiers — that would be too late
6
+ * to block or sanitise harmful content. At the same time, classifiers are
7
+ * expensive: running one on every individual token is wasteful and introduces
8
+ * unacceptable latency.
9
+ *
10
+ * `SlidingWindowBuffer` solves this by accumulating tokens from one or more
11
+ * concurrent streams and emitting a {@link ChunkReady} event only when enough
12
+ * tokens have accumulated to fill a `chunkSize`-token window. Each window
13
+ * also includes a `contextSize`-token "ring" from the previous chunk, so the
14
+ * classifier can reason about content that spans window boundaries.
15
+ *
16
+ * Architecture
17
+ * ------------
18
+ * - **Per-stream state**: Stored in a `Map<streamId, WindowState>`. Each
19
+ * stream is fully independent and can be used across multiple concurrent
20
+ * responses.
21
+ * - **Token estimation**: Uses the 4-chars-per-token heuristic for speed;
22
+ * callers that need exact counts should pre-tokenise text before pushing.
23
+ * - **Evaluation budget**: Once a stream reaches `maxEvaluations` chunks,
24
+ * `push()` returns `null` for all subsequent pushes, preventing unbounded
25
+ * classifier invocations on very long responses.
26
+ * - **Stale-stream pruning**: Streams that have not received data within
27
+ * `streamTimeoutMs` milliseconds are lazily evicted from the map to prevent
28
+ * memory leaks in long-running servers.
29
+ *
30
+ * @module agentos/extensions/packs/ml-classifiers/SlidingWindowBuffer
31
+ */
32
+ /**
33
+ * Configuration for a {@link SlidingWindowBuffer} instance.
34
+ *
35
+ * All fields are optional; unset fields fall back to the defaults shown below.
36
+ */
37
+ export interface SlidingWindowConfig {
38
+ /**
39
+ * Target window size in *estimated* tokens. When the accumulated buffer
40
+ * reaches or exceeds this many tokens, a {@link ChunkReady} is emitted and
41
+ * the buffer is slid forward.
42
+ *
43
+ * @default 200
44
+ */
45
+ chunkSize: number;
46
+ /**
47
+ * Number of tokens from the tail of the previous window to carry into the
48
+ * `text` field of the next {@link ChunkReady}. This overlap prevents
49
+ * boundary effects where a phrase split across two windows is misclassified.
50
+ *
51
+ * @default 50
52
+ */
53
+ contextSize: number;
54
+ /**
55
+ * Maximum number of {@link ChunkReady} events to emit per stream. After
56
+ * this budget is exhausted, `push()` returns `null` for the remainder of the
57
+ * stream. Use `flush()` to retrieve any buffered text that has not been
58
+ * emitted yet.
59
+ *
60
+ * @default 100
61
+ */
62
+ maxEvaluations: number;
63
+ /**
64
+ * Milliseconds of inactivity after which a stream is considered stale and
65
+ * eligible for eviction by {@link SlidingWindowBuffer.pruneStale}.
66
+ *
67
+ * @default 30000
68
+ */
69
+ streamTimeoutMs: number;
70
+ }
71
+ /**
72
+ * Emitted by {@link SlidingWindowBuffer.push} when sufficient tokens have
73
+ * accumulated to fill one evaluation window.
74
+ */
75
+ export interface ChunkReady {
76
+ /**
77
+ * The full text to classify. Equals `contextRing + newBuffer`, where
78
+ * `contextRing` is the carried-forward tail from the previous window.
79
+ * Always non-empty.
80
+ */
81
+ text: string;
82
+ /**
83
+ * Only the *new* text pushed since the last chunk was emitted (i.e. without
84
+ * the context prefix). Useful for determining which part of the response
85
+ * was newly evaluated.
86
+ */
87
+ newText: string;
88
+ /**
89
+ * 1-indexed sequence number for this chunk within the stream.
90
+ * The first chunk emitted for a stream has `evaluationNumber === 1`.
91
+ */
92
+ evaluationNumber: number;
93
+ }
94
+ /**
95
+ * A stateful, multi-stream text accumulator that emits fixed-size windows
96
+ * for ML classifier evaluation with configurable context carry-forward.
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * const buf = new SlidingWindowBuffer({ chunkSize: 200, contextSize: 50 });
101
+ *
102
+ * // Simulate streaming tokens
103
+ * for (const token of streamedTokens) {
104
+ * const chunk = buf.push('stream-1', token);
105
+ * if (chunk) {
106
+ * const result = await toxicityClassifier.classify(chunk.text);
107
+ * if (result.confidence > 0.9) terminateStream();
108
+ * }
109
+ * }
110
+ *
111
+ * // Evaluate remaining tokens
112
+ * const finalChunk = buf.flush('stream-1');
113
+ * if (finalChunk) {
114
+ * await toxicityClassifier.classify(finalChunk.text);
115
+ * }
116
+ * ```
117
+ */
118
+ export declare class SlidingWindowBuffer {
119
+ /** Resolved configuration (defaults applied). */
120
+ private readonly config;
121
+ /**
122
+ * Per-stream state map. Keyed by the `streamId` passed to `push()`.
123
+ * Entries are created lazily on first push and removed on flush or prune.
124
+ */
125
+ private readonly streams;
126
+ /**
127
+ * Construct a new buffer with the supplied configuration.
128
+ *
129
+ * @param config - Partial configuration; unset fields fall back to defaults:
130
+ * `chunkSize=200`, `contextSize=50`, `maxEvaluations=100`,
131
+ * `streamTimeoutMs=30000`.
132
+ */
133
+ constructor(config?: Partial<SlidingWindowConfig>);
134
+ /**
135
+ * Push new text into the buffer for the specified stream.
136
+ *
137
+ * Internally the text is appended to the stream's accumulation buffer.
138
+ * If the buffer's estimated token count reaches `chunkSize`, a
139
+ * {@link ChunkReady} is assembled and returned; the buffer is then reset
140
+ * (with the tail preserved as the context ring for the next window).
141
+ *
142
+ * Returns `null` when:
143
+ * - The buffer has not yet accumulated `chunkSize` tokens.
144
+ * - The stream has already emitted `maxEvaluations` chunks.
145
+ *
146
+ * When the map contains more than 10 streams, stale streams are pruned
147
+ * lazily after the push is processed.
148
+ *
149
+ * @param streamId - Opaque identifier for the stream (e.g. a request UUID).
150
+ * @param text - The new text fragment to accumulate.
151
+ * @returns A {@link ChunkReady} when an evaluation window is complete, or
152
+ * `null` if more data is needed (or the budget is exhausted).
153
+ */
154
+ push(streamId: string, text: string): ChunkReady | null;
155
+ /**
156
+ * Flush any remaining buffered text for the stream as a final chunk.
157
+ *
158
+ * Call this after the stream ends (e.g. when the LLM emits its final
159
+ * token) to ensure the classifier evaluates the tail of the response.
160
+ *
161
+ * The stream's state entry is removed from the map after flushing.
162
+ *
163
+ * @param streamId - Identifier of the stream to flush.
164
+ * @returns A {@link ChunkReady} for the remaining buffer, or `null` if the
165
+ * buffer is empty or the stream does not exist.
166
+ */
167
+ flush(streamId: string): ChunkReady | null;
168
+ /**
169
+ * Remove streams that have not received data within `streamTimeoutMs`.
170
+ *
171
+ * Called lazily by `push()` when the stream map grows beyond 10 entries.
172
+ * May also be called proactively by a maintenance timer.
173
+ */
174
+ pruneStale(): void;
175
+ /**
176
+ * Remove all stream state from the buffer.
177
+ *
178
+ * Useful for graceful shutdown or unit-test teardown to ensure no cross-test
179
+ * state leaks.
180
+ */
181
+ clear(): void;
182
+ /**
183
+ * The number of streams currently tracked (including stale ones not yet
184
+ * pruned).
185
+ *
186
+ * Exposed primarily for testing and diagnostics.
187
+ */
188
+ get size(): number;
189
+ /**
190
+ * Assemble a {@link ChunkReady} from the current stream state.
191
+ *
192
+ * The `text` field is the concatenation of `contextRing` and the current
193
+ * `buffer`, giving the classifier cross-boundary context. The `newText`
194
+ * field is just the raw `buffer` so callers can distinguish old from new.
195
+ *
196
+ * @param state - The mutable state for the stream being assembled.
197
+ * @returns A fully-populated {@link ChunkReady}.
198
+ */
199
+ private assembleChunk;
200
+ /**
201
+ * Estimate the number of LLM tokens in a string using the 4-chars-per-token
202
+ * heuristic.
203
+ *
204
+ * This deliberately mirrors {@link estimateTokens} from `core/utils/text-utils`
205
+ * without importing it, keeping this module self-contained and safe to load
206
+ * in Web Worker contexts where module resolution may differ.
207
+ *
208
+ * @param text - The string to estimate.
209
+ * @returns Non-negative integer token count estimate.
210
+ */
211
+ private estimateTokens;
212
+ }
213
+ //# sourceMappingURL=SlidingWindowBuffer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SlidingWindowBuffer.d.ts","sourceRoot":"","sources":["../src/SlidingWindowBuffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAMH;;;;GAIG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;;;;OAMG;IACH,SAAS,EAAE,MAAM,CAAC;IAElB;;;;;;OAMG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;;;;;;OAOG;IACH,cAAc,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB;;;;OAIG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAEhB;;;OAGG;IACH,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAgDD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B,iDAAiD;IACjD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAE7C;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAuC;IAE/D;;;;;;OAMG;gBACS,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC;IAajD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAwDvD;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAQ,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAe1C;;;;;OAKG;IACH,UAAU,IAAI,IAAI;IASlB;;;;;OAKG;IACH,KAAK,IAAI,IAAI;IAIb;;;;;OAKG;IACH,IAAI,IAAI,IAAI,MAAM,CAEjB;IAMD;;;;;;;;;OASG;IACH,OAAO,CAAC,aAAa;IAWrB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,cAAc;CAIvB"}
@@ -0,0 +1,246 @@
1
+ /**
2
+ * @fileoverview Sliding-window text buffer for streaming ML classifier evaluation.
3
+ *
4
+ * When an LLM streams its response token-by-token, we cannot wait for the
5
+ * complete response before running safety classifiers — that would be too late
6
+ * to block or sanitise harmful content. At the same time, classifiers are
7
+ * expensive: running one on every individual token is wasteful and introduces
8
+ * unacceptable latency.
9
+ *
10
+ * `SlidingWindowBuffer` solves this by accumulating tokens from one or more
11
+ * concurrent streams and emitting a {@link ChunkReady} event only when enough
12
+ * tokens have accumulated to fill a `chunkSize`-token window. Each window
13
+ * also includes a `contextSize`-token "ring" from the previous chunk, so the
14
+ * classifier can reason about content that spans window boundaries.
15
+ *
16
+ * Architecture
17
+ * ------------
18
+ * - **Per-stream state**: Stored in a `Map<streamId, WindowState>`. Each
19
+ * stream is fully independent and can be used across multiple concurrent
20
+ * responses.
21
+ * - **Token estimation**: Uses the 4-chars-per-token heuristic for speed;
22
+ * callers that need exact counts should pre-tokenise text before pushing.
23
+ * - **Evaluation budget**: Once a stream reaches `maxEvaluations` chunks,
24
+ * `push()` returns `null` for all subsequent pushes, preventing unbounded
25
+ * classifier invocations on very long responses.
26
+ * - **Stale-stream pruning**: Streams that have not received data within
27
+ * `streamTimeoutMs` milliseconds are lazily evicted from the map to prevent
28
+ * memory leaks in long-running servers.
29
+ *
30
+ * @module agentos/extensions/packs/ml-classifiers/SlidingWindowBuffer
31
+ */
32
+ // ---------------------------------------------------------------------------
33
+ // SlidingWindowBuffer implementation
34
+ // ---------------------------------------------------------------------------
35
+ /**
36
+ * A stateful, multi-stream text accumulator that emits fixed-size windows
37
+ * for ML classifier evaluation with configurable context carry-forward.
38
+ *
39
+ * @example
40
+ * ```typescript
41
+ * const buf = new SlidingWindowBuffer({ chunkSize: 200, contextSize: 50 });
42
+ *
43
+ * // Simulate streaming tokens
44
+ * for (const token of streamedTokens) {
45
+ * const chunk = buf.push('stream-1', token);
46
+ * if (chunk) {
47
+ * const result = await toxicityClassifier.classify(chunk.text);
48
+ * if (result.confidence > 0.9) terminateStream();
49
+ * }
50
+ * }
51
+ *
52
+ * // Evaluate remaining tokens
53
+ * const finalChunk = buf.flush('stream-1');
54
+ * if (finalChunk) {
55
+ * await toxicityClassifier.classify(finalChunk.text);
56
+ * }
57
+ * ```
58
+ */
59
+ export class SlidingWindowBuffer {
60
+ /** Resolved configuration (defaults applied). */
61
+ config;
62
+ /**
63
+ * Per-stream state map. Keyed by the `streamId` passed to `push()`.
64
+ * Entries are created lazily on first push and removed on flush or prune.
65
+ */
66
+ streams = new Map();
67
+ /**
68
+ * Construct a new buffer with the supplied configuration.
69
+ *
70
+ * @param config - Partial configuration; unset fields fall back to defaults:
71
+ * `chunkSize=200`, `contextSize=50`, `maxEvaluations=100`,
72
+ * `streamTimeoutMs=30000`.
73
+ */
74
+ constructor(config) {
75
+ this.config = {
76
+ chunkSize: config?.chunkSize ?? 200,
77
+ contextSize: config?.contextSize ?? 50,
78
+ maxEvaluations: config?.maxEvaluations ?? 100,
79
+ streamTimeoutMs: config?.streamTimeoutMs ?? 30_000,
80
+ };
81
+ }
82
+ // -------------------------------------------------------------------------
83
+ // Public API
84
+ // -------------------------------------------------------------------------
85
+ /**
86
+ * Push new text into the buffer for the specified stream.
87
+ *
88
+ * Internally the text is appended to the stream's accumulation buffer.
89
+ * If the buffer's estimated token count reaches `chunkSize`, a
90
+ * {@link ChunkReady} is assembled and returned; the buffer is then reset
91
+ * (with the tail preserved as the context ring for the next window).
92
+ *
93
+ * Returns `null` when:
94
+ * - The buffer has not yet accumulated `chunkSize` tokens.
95
+ * - The stream has already emitted `maxEvaluations` chunks.
96
+ *
97
+ * When the map contains more than 10 streams, stale streams are pruned
98
+ * lazily after the push is processed.
99
+ *
100
+ * @param streamId - Opaque identifier for the stream (e.g. a request UUID).
101
+ * @param text - The new text fragment to accumulate.
102
+ * @returns A {@link ChunkReady} when an evaluation window is complete, or
103
+ * `null` if more data is needed (or the budget is exhausted).
104
+ */
105
+ push(streamId, text) {
106
+ if (!text) {
107
+ return null;
108
+ }
109
+ // Initialise state for a new stream.
110
+ if (!this.streams.has(streamId)) {
111
+ this.streams.set(streamId, {
112
+ buffer: '',
113
+ tokenCount: 0,
114
+ contextRing: '',
115
+ evaluationCount: 0,
116
+ lastSeenAt: Date.now(),
117
+ });
118
+ }
119
+ const state = this.streams.get(streamId);
120
+ state.lastSeenAt = Date.now();
121
+ // Respect the evaluation budget — stop emitting chunks once exhausted.
122
+ if (state.evaluationCount >= this.config.maxEvaluations) {
123
+ return null;
124
+ }
125
+ // Accumulate incoming text.
126
+ state.buffer += text;
127
+ state.tokenCount = this.estimateTokens(state.buffer);
128
+ // Lazy pruning: clean up stale streams whenever the map grows large.
129
+ // Done unconditionally (not just on chunk emit) so stale entries are
130
+ // reclaimed even when streams are slow to accumulate a full window.
131
+ if (this.streams.size > 10) {
132
+ this.pruneStale();
133
+ }
134
+ // Not enough tokens yet — wait for more.
135
+ if (state.tokenCount < this.config.chunkSize) {
136
+ return null;
137
+ }
138
+ // We have a full window. Assemble the chunk.
139
+ const chunk = this.assembleChunk(state);
140
+ // Slide the context ring forward: keep the last `contextSize` tokens'
141
+ // worth of characters from the buffer that was just emitted.
142
+ const contextCharBudget = this.config.contextSize * 4;
143
+ state.contextRing = state.buffer.slice(-contextCharBudget);
144
+ // Reset the buffer and token count for the next window.
145
+ state.buffer = '';
146
+ state.tokenCount = 0;
147
+ state.evaluationCount += 1;
148
+ return chunk;
149
+ }
150
+ /**
151
+ * Flush any remaining buffered text for the stream as a final chunk.
152
+ *
153
+ * Call this after the stream ends (e.g. when the LLM emits its final
154
+ * token) to ensure the classifier evaluates the tail of the response.
155
+ *
156
+ * The stream's state entry is removed from the map after flushing.
157
+ *
158
+ * @param streamId - Identifier of the stream to flush.
159
+ * @returns A {@link ChunkReady} for the remaining buffer, or `null` if the
160
+ * buffer is empty or the stream does not exist.
161
+ */
162
+ flush(streamId) {
163
+ const state = this.streams.get(streamId);
164
+ // Nothing to flush if the stream is unknown or the buffer is empty.
165
+ if (!state || state.buffer.length === 0) {
166
+ // Always clean up the map entry, even for empty buffers.
167
+ this.streams.delete(streamId);
168
+ return null;
169
+ }
170
+ const chunk = this.assembleChunk(state);
171
+ this.streams.delete(streamId);
172
+ return chunk;
173
+ }
174
+ /**
175
+ * Remove streams that have not received data within `streamTimeoutMs`.
176
+ *
177
+ * Called lazily by `push()` when the stream map grows beyond 10 entries.
178
+ * May also be called proactively by a maintenance timer.
179
+ */
180
+ pruneStale() {
181
+ const now = Date.now();
182
+ for (const [id, state] of this.streams) {
183
+ if (now - state.lastSeenAt > this.config.streamTimeoutMs) {
184
+ this.streams.delete(id);
185
+ }
186
+ }
187
+ }
188
+ /**
189
+ * Remove all stream state from the buffer.
190
+ *
191
+ * Useful for graceful shutdown or unit-test teardown to ensure no cross-test
192
+ * state leaks.
193
+ */
194
+ clear() {
195
+ this.streams.clear();
196
+ }
197
+ /**
198
+ * The number of streams currently tracked (including stale ones not yet
199
+ * pruned).
200
+ *
201
+ * Exposed primarily for testing and diagnostics.
202
+ */
203
+ get size() {
204
+ return this.streams.size;
205
+ }
206
+ // -------------------------------------------------------------------------
207
+ // Private helpers
208
+ // -------------------------------------------------------------------------
209
+ /**
210
+ * Assemble a {@link ChunkReady} from the current stream state.
211
+ *
212
+ * The `text` field is the concatenation of `contextRing` and the current
213
+ * `buffer`, giving the classifier cross-boundary context. The `newText`
214
+ * field is just the raw `buffer` so callers can distinguish old from new.
215
+ *
216
+ * @param state - The mutable state for the stream being assembled.
217
+ * @returns A fully-populated {@link ChunkReady}.
218
+ */
219
+ assembleChunk(state) {
220
+ const newText = state.buffer;
221
+ const text = state.contextRing + newText;
222
+ return {
223
+ text,
224
+ newText,
225
+ // evaluationCount is 0-indexed before increment, so +1 gives 1-indexed number.
226
+ evaluationNumber: state.evaluationCount + 1,
227
+ };
228
+ }
229
+ /**
230
+ * Estimate the number of LLM tokens in a string using the 4-chars-per-token
231
+ * heuristic.
232
+ *
233
+ * This deliberately mirrors {@link estimateTokens} from `core/utils/text-utils`
234
+ * without importing it, keeping this module self-contained and safe to load
235
+ * in Web Worker contexts where module resolution may differ.
236
+ *
237
+ * @param text - The string to estimate.
238
+ * @returns Non-negative integer token count estimate.
239
+ */
240
+ estimateTokens(text) {
241
+ if (!text)
242
+ return 0;
243
+ return Math.ceil(text.length / 4);
244
+ }
245
+ }
246
+ //# sourceMappingURL=SlidingWindowBuffer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SlidingWindowBuffer.js","sourceRoot":"","sources":["../src/SlidingWindowBuffer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAqHH,8EAA8E;AAC9E,qCAAqC;AACrC,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,mBAAmB;IAC9B,iDAAiD;IAChC,MAAM,CAAsB;IAE7C;;;OAGG;IACc,OAAO,GAA6B,IAAI,GAAG,EAAE,CAAC;IAE/D;;;;;;OAMG;IACH,YAAY,MAAqC;QAC/C,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,EAAE,SAAS,IAAI,GAAG;YACnC,WAAW,EAAE,MAAM,EAAE,WAAW,IAAI,EAAE;YACtC,cAAc,EAAE,MAAM,EAAE,cAAc,IAAI,GAAG;YAC7C,eAAe,EAAE,MAAM,EAAE,eAAe,IAAI,MAAM;SACnD,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,aAAa;IACb,4EAA4E;IAE5E;;;;;;;;;;;;;;;;;;;OAmBG;IACH,IAAI,CAAC,QAAgB,EAAE,IAAY;QACjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QACd,CAAC;QAED,qCAAqC;QACrC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE;gBACzB,MAAM,EAAE,EAAE;gBACV,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,EAAE;gBACf,eAAe,EAAE,CAAC;gBAClB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;aACvB,CAAC,CAAC;QACL,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAC1C,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE9B,uEAAuE;QACvE,IAAI,KAAK,CAAC,eAAe,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;YACxD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,4BAA4B;QAC5B,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC;QACrB,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAErD,qEAAqE;QACrE,qEAAqE;QACrE,oEAAoE;QACpE,IAAI,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC;YAC3B,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC;QAED,yCAAyC;QACzC,IAAI,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC7C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,8CAA8C;QAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAExC,sEAAsE;QACtE,6DAA6D;QAC7D,MAAM,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,GAAG,CAAC,CAAC;QACtD,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,iBAAiB,CAAC,CAAC;QAE3D,wDAAwD;QACxD,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC;QAClB,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC;QACrB,KAAK,CAAC,eAAe,IAAI,CAAC,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAgB;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAEzC,oEAAoE;QACpE,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxC,yDAAyD;YACzD,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;OAKG;IACH,UAAU;QACR,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACvC,IAAI,GAAG,GAAG,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;gBACzD,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED;;;;;OAKG;IACH,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;IAC3B,CAAC;IAED,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E;;;;;;;;;OASG;IACK,aAAa,CAAC,KAAkB;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC;QACzC,OAAO;YACL,IAAI;YACJ,OAAO;YACP,+EAA+E;YAC/E,gBAAgB,EAAE,KAAK,CAAC,eAAe,GAAG,CAAC;SAC5C,CAAC;IACJ,CAAC;IAED;;;;;;;;;;OAUG;IACK,cAAc,CAAC,IAAY;QACjC,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QACpB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;CACF"}
@@ -0,0 +1,126 @@
1
+ /**
2
+ * @fileoverview Prompt-injection content classifier using the
3
+ * `protectai/deberta-v3-small-prompt-injection-v2` model.
4
+ *
5
+ * Prompt injection is the attack pattern where adversarial instructions are
6
+ * embedded inside user-supplied text to override or hijack the agent's system
7
+ * prompt. This classifier provides a dedicated binary signal (INJECTION /
8
+ * SAFE) that the guardrail orchestrator can act on independently of the
9
+ * toxicity or jailbreak classifiers.
10
+ *
11
+ * Model details
12
+ * -------------
13
+ * `protectai/deberta-v3-small-prompt-injection-v2` is a fine-tuned DeBERTa
14
+ * model from ProtectAI, specifically trained to distinguish benign user
15
+ * messages from prompt-injection payloads. It outputs two labels:
16
+ * - `INJECTION` — high-confidence injection attempt
17
+ * - `SAFE` — normal user input
18
+ *
19
+ * Graceful degradation
20
+ * --------------------
21
+ * If the model fails to load the classifier sets `unavailable = true` and
22
+ * returns a pass result `{ bestClass: 'benign', confidence: 0, allScores: [] }`
23
+ * on every subsequent call.
24
+ *
25
+ * @module agentos/extensions/packs/ml-classifiers/classifiers/InjectionClassifier
26
+ */
27
+ import type { ClassificationResult } from '@framers/agentos';
28
+ import type { ISharedServiceRegistry } from '@framers/agentos';
29
+ import type { IContentClassifier } from '../IContentClassifier';
30
+ import type { ClassifierConfig } from '../types';
31
+ /**
32
+ * Binary prompt-injection classifier backed by
33
+ * `protectai/deberta-v3-small-prompt-injection-v2`.
34
+ *
35
+ * Returns one of two labels:
36
+ * - `INJECTION` — the text contains an injection attempt
37
+ * - `SAFE` — the text is clean
38
+ *
39
+ * The label with the higher confidence becomes `bestClass` / `confidence`.
40
+ * Both labels are present in `allScores` so callers can read the SAFE score
41
+ * as well.
42
+ *
43
+ * @implements {IContentClassifier}
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * const classifier = new InjectionClassifier(serviceRegistry);
48
+ * const result = await classifier.classify('Ignore previous instructions and …');
49
+ * // result.bestClass === 'INJECTION', result.confidence ≈ 0.97
50
+ * ```
51
+ */
52
+ export declare class InjectionClassifier implements IContentClassifier {
53
+ private readonly services;
54
+ private readonly config?;
55
+ /** Unique service identifier for this classifier. */
56
+ readonly id = "prompt-injection";
57
+ /** Human-readable name for dashboards and log output. */
58
+ readonly displayName = "Prompt Injection Classifier";
59
+ /** Short description of what this classifier detects. */
60
+ readonly description: string;
61
+ /**
62
+ * Default Hugging Face model ID.
63
+ * Overridable via {@link ClassifierConfig.modelId}.
64
+ */
65
+ readonly modelId = "protectai/deberta-v3-small-prompt-injection-v2";
66
+ /**
67
+ * Whether the model weights are fully loaded and the classifier is ready
68
+ * to accept `classify()` calls.
69
+ */
70
+ private _isLoaded;
71
+ /**
72
+ * Set to `true` when the model fails to load. Once `unavailable`, every
73
+ * subsequent `classify()` call immediately returns the pass result rather
74
+ * than retrying the expensive model load.
75
+ */
76
+ private unavailable;
77
+ /**
78
+ * @param services - Shared service registry used to lazily create and cache
79
+ * the underlying HuggingFace pipeline instance.
80
+ * @param config - Optional per-classifier configuration. When
81
+ * `config.modelId` is provided it overrides the default `modelId` when
82
+ * loading the model.
83
+ */
84
+ constructor(services: ISharedServiceRegistry, config?: ClassifierConfig | undefined);
85
+ /**
86
+ * Whether the underlying model pipeline has been successfully initialised.
87
+ * The flag is set to `true` after the first successful `classify()` call.
88
+ */
89
+ get isLoaded(): boolean;
90
+ /**
91
+ * Run prompt-injection inference on `text`.
92
+ *
93
+ * Lazily loads the pipeline on the first call via the shared service
94
+ * registry, then calls it with `{ topk: null }` to retrieve scores for both
95
+ * labels.
96
+ *
97
+ * @param text - The text to evaluate.
98
+ * @returns A promise that resolves with the classification result. If the
99
+ * model is unavailable the pass result is returned instead of throwing.
100
+ */
101
+ classify(text: string): Promise<ClassificationResult>;
102
+ /**
103
+ * Release the pipeline instance from the shared service registry.
104
+ *
105
+ * Idempotent — safe to call multiple times.
106
+ */
107
+ dispose(): Promise<void>;
108
+ /**
109
+ * Returns a "pass" result used when the model is unavailable.
110
+ *
111
+ * A pass result reports `bestClass: 'benign'` with zero confidence so the
112
+ * guardrail orchestrator will always choose {@link GuardrailAction.ALLOW}.
113
+ */
114
+ private passResult;
115
+ /**
116
+ * Map the raw pipeline output to a {@link ClassificationResult}.
117
+ *
118
+ * For binary classification the label with the higher confidence score
119
+ * becomes `bestClass` / `confidence`. Both labels are included in
120
+ * `allScores`.
121
+ *
122
+ * @param raw - Array returned by the pipeline when called with `topk: null`.
123
+ */
124
+ private mapResult;
125
+ }
126
+ //# sourceMappingURL=InjectionClassifier.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"InjectionClassifier.d.ts","sourceRoot":"","sources":["../../src/classifiers/InjectionClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC/D,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAsBjD;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,mBAAoB,YAAW,kBAAkB;IAmD1D,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;IA/C1B,qDAAqD;IACrD,QAAQ,CAAC,EAAE,sBAAsB;IAEjC,yDAAyD;IACzD,QAAQ,CAAC,WAAW,iCAAiC;IAErD,yDAAyD;IACzD,QAAQ,CAAC,WAAW,SAEsD;IAE1E;;;OAGG;IACH,QAAQ,CAAC,OAAO,oDAAoD;IAMpE;;;OAGG;IACH,OAAO,CAAC,SAAS,CAAS;IAE1B;;;;OAIG;IACH,OAAO,CAAC,WAAW,CAAS;IAM5B;;;;;;OAMG;gBAEgB,QAAQ,EAAE,sBAAsB,EAChC,MAAM,CAAC,EAAE,gBAAgB,YAAA;IAO5C;;;OAGG;IACH,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAMD;;;;;;;;;;OAUG;IACG,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAkD3D;;;;OAIG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAS9B;;;;;OAKG;IACH,OAAO,CAAC,UAAU;IAIlB;;;;;;;;OAQG;IACH,OAAO,CAAC,SAAS;CAsBlB"}