@framers/agentos-ext-ml-classifiers 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/LICENSE +23 -0
  2. package/dist/ClassifierOrchestrator.d.ts +126 -0
  3. package/dist/ClassifierOrchestrator.d.ts.map +1 -0
  4. package/dist/ClassifierOrchestrator.js +239 -0
  5. package/dist/ClassifierOrchestrator.js.map +1 -0
  6. package/dist/IContentClassifier.d.ts +117 -0
  7. package/dist/IContentClassifier.d.ts.map +1 -0
  8. package/dist/IContentClassifier.js +22 -0
  9. package/dist/IContentClassifier.js.map +1 -0
  10. package/dist/MLClassifierGuardrail.d.ts +163 -0
  11. package/dist/MLClassifierGuardrail.d.ts.map +1 -0
  12. package/dist/MLClassifierGuardrail.js +335 -0
  13. package/dist/MLClassifierGuardrail.js.map +1 -0
  14. package/dist/SlidingWindowBuffer.d.ts +213 -0
  15. package/dist/SlidingWindowBuffer.d.ts.map +1 -0
  16. package/dist/SlidingWindowBuffer.js +246 -0
  17. package/dist/SlidingWindowBuffer.js.map +1 -0
  18. package/dist/classifiers/InjectionClassifier.d.ts +126 -0
  19. package/dist/classifiers/InjectionClassifier.d.ts.map +1 -0
  20. package/dist/classifiers/InjectionClassifier.js +210 -0
  21. package/dist/classifiers/InjectionClassifier.js.map +1 -0
  22. package/dist/classifiers/JailbreakClassifier.d.ts +124 -0
  23. package/dist/classifiers/JailbreakClassifier.d.ts.map +1 -0
  24. package/dist/classifiers/JailbreakClassifier.js +208 -0
  25. package/dist/classifiers/JailbreakClassifier.js.map +1 -0
  26. package/dist/classifiers/ToxicityClassifier.d.ts +125 -0
  27. package/dist/classifiers/ToxicityClassifier.d.ts.map +1 -0
  28. package/dist/classifiers/ToxicityClassifier.js +212 -0
  29. package/dist/classifiers/ToxicityClassifier.js.map +1 -0
  30. package/dist/classifiers/WorkerClassifierProxy.d.ts +158 -0
  31. package/dist/classifiers/WorkerClassifierProxy.d.ts.map +1 -0
  32. package/dist/classifiers/WorkerClassifierProxy.js +268 -0
  33. package/dist/classifiers/WorkerClassifierProxy.js.map +1 -0
  34. package/dist/index.d.ts +110 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +342 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/tools/ClassifyContentTool.d.ts +105 -0
  39. package/dist/tools/ClassifyContentTool.d.ts.map +1 -0
  40. package/dist/tools/ClassifyContentTool.js +149 -0
  41. package/dist/tools/ClassifyContentTool.js.map +1 -0
  42. package/dist/types.d.ts +319 -0
  43. package/dist/types.d.ts.map +1 -0
  44. package/dist/types.js +62 -0
  45. package/dist/types.js.map +1 -0
  46. package/dist/worker/classifier-worker.d.ts +49 -0
  47. package/dist/worker/classifier-worker.d.ts.map +1 -0
  48. package/dist/worker/classifier-worker.js +180 -0
  49. package/dist/worker/classifier-worker.js.map +1 -0
  50. package/package.json +45 -0
  51. package/src/ClassifierOrchestrator.ts +290 -0
  52. package/src/IContentClassifier.ts +124 -0
  53. package/src/MLClassifierGuardrail.ts +419 -0
  54. package/src/SlidingWindowBuffer.ts +384 -0
  55. package/src/classifiers/InjectionClassifier.ts +261 -0
  56. package/src/classifiers/JailbreakClassifier.ts +259 -0
  57. package/src/classifiers/ToxicityClassifier.ts +263 -0
  58. package/src/classifiers/WorkerClassifierProxy.ts +366 -0
  59. package/src/index.ts +383 -0
  60. package/src/tools/ClassifyContentTool.ts +201 -0
  61. package/src/types.ts +391 -0
  62. package/src/worker/classifier-worker.ts +267 -0
@@ -0,0 +1,319 @@
1
+ /**
2
+ * @fileoverview Core type definitions for the ML Classifier Guardrail Extension Pack.
3
+ *
4
+ * This file defines all configuration shapes, runtime result types, and
5
+ * service-identifier constants used by the ML classifier pipeline. All
6
+ * classifiers in this pack evaluate text content against learned models
7
+ * (toxicity, prompt-injection, jailbreak) and emit structured results that
8
+ * feed into the AgentOS guardrail decision tree.
9
+ *
10
+ * Import hierarchy
11
+ * ----------------
12
+ * ```
13
+ * IUtilityAI ──── ClassificationResult, ClassificationScore
14
+ * IGuardrailService ── GuardrailAction
15
+ * │
16
+ * ▼
17
+ * types.ts (this file)
18
+ * │
19
+ * ▼
20
+ * IContentClassifier.ts / SlidingWindowBuffer.ts / …
21
+ * ```
22
+ *
23
+ * @module agentos/extensions/packs/ml-classifiers/types
24
+ */
25
+ import type { ClassificationResult, ClassificationScore } from '@framers/agentos';
26
+ import type { GuardrailAction } from '@framers/agentos';
27
+ export type { ClassificationResult, ClassificationScore };
28
+ /**
29
+ * Numeric thresholds that map raw classifier confidence scores (0–1) to
30
+ * guardrail actions.
31
+ *
32
+ * The thresholds are applied in descending priority:
33
+ * 1. `score >= blockThreshold` → {@link GuardrailAction.BLOCK}
34
+ * 2. `score >= flagThreshold` → {@link GuardrailAction.FLAG}
35
+ * 3. `score >= warnThreshold` → {@link GuardrailAction.SANITIZE}
36
+ * 4. otherwise → {@link GuardrailAction.ALLOW}
37
+ */
38
+ export interface ClassifierThresholds {
39
+ /**
40
+ * Minimum score at which content is **blocked** (interaction terminated).
41
+ * Must be in the range [0, 1]. Typical default: `0.9`.
42
+ */
43
+ blockThreshold: number;
44
+ /**
45
+ * Minimum score at which content is **flagged** for review while still
46
+ * being allowed through. Must be in the range [0, 1]. Typical default: `0.7`.
47
+ */
48
+ flagThreshold: number;
49
+ /**
50
+ * Minimum score at which a **warn** action is taken (e.g. the chunk is
51
+ * sanitised or a warning is appended to the response). Must be in the range
52
+ * [0, 1]. Typical default: `0.4`.
53
+ */
54
+ warnThreshold: number;
55
+ }
56
+ /**
57
+ * Sensible defaults for {@link ClassifierThresholds}.
58
+ *
59
+ * These values reflect a conservative-but-pragmatic policy:
60
+ * - block at 90 % confidence → very high bar, minimises false positives
61
+ * - flag at 70 % → surfaced for human review, not blocked
62
+ * - warn at 40 % → low-confidence signal, handled with a light touch
63
+ */
64
+ export declare const DEFAULT_THRESHOLDS: ClassifierThresholds;
65
+ /**
66
+ * Configuration for a single ML classifier pipeline.
67
+ *
68
+ * Allows individual classifiers to override the pack-level defaults for the
69
+ * model variant and decision thresholds, and to customise which guardrail
70
+ * action is taken for each classification label.
71
+ */
72
+ export interface ClassifierConfig {
73
+ /**
74
+ * Hugging Face model identifier (e.g. `"Xenova/toxic-bert"`) or a local
75
+ * model path to load instead of the pack default.
76
+ * @optional Falls back to the pack-level `MLClassifierPackOptions.modelCacheDir` default.
77
+ */
78
+ modelId?: string;
79
+ /**
80
+ * Per-classifier threshold overrides.
81
+ * @optional Falls back to {@link DEFAULT_THRESHOLDS}.
82
+ */
83
+ thresholds?: Partial<ClassifierThresholds>;
84
+ /**
85
+ * Maps classification labels to the guardrail action that should be taken
86
+ * when that label is the winning class.
87
+ *
88
+ * @example
89
+ * ```typescript
90
+ * // Always block on TOXIC label regardless of threshold.
91
+ * labelActions: { TOXIC: GuardrailAction.BLOCK }
92
+ * ```
93
+ */
94
+ labelActions?: Record<string, GuardrailAction>;
95
+ }
96
+ /**
97
+ * Configuration for browser-side model execution.
98
+ *
99
+ * When the ML classifier pack is loaded in a browser context (e.g. a chat
100
+ * widget), models run inside a Web Worker to avoid blocking the main thread.
101
+ * This interface controls worker lifecycle and cache management.
102
+ */
103
+ export interface BrowserConfig {
104
+ /**
105
+ * Run model inference in a Web Worker.
106
+ * @default true
107
+ */
108
+ useWebWorker?: boolean;
109
+ /**
110
+ * Caching strategy for downloaded model weights.
111
+ * - `'memory'` — keep weights in memory only (lost on page unload)
112
+ * - `'indexeddb'` — persist weights to IndexedDB (survives reloads)
113
+ * - `'none'` — no caching; re-download on every page load
114
+ * @default 'indexeddb'
115
+ */
116
+ cacheStrategy?: 'memory' | 'indexeddb' | 'none';
117
+ /**
118
+ * Maximum number of model shards to keep in the in-memory cache when
119
+ * `cacheStrategy === 'memory'`. Oldest entries are evicted LRU-style.
120
+ * @default 3
121
+ */
122
+ maxCacheSize?: number;
123
+ /**
124
+ * Callback invoked with download progress as model weights are fetched.
125
+ * Useful for showing a progress bar in the UI.
126
+ *
127
+ * @param progress - Current progress state.
128
+ */
129
+ onProgress?: (progress: ModelDownloadProgress) => void;
130
+ }
131
+ /**
132
+ * Progress report emitted during model weight downloads.
133
+ *
134
+ * @example
135
+ * ```typescript
136
+ * onProgress({ modelId: 'Xenova/toxic-bert', loaded: 50_000, total: 200_000, percent: 25 })
137
+ * ```
138
+ */
139
+ export interface ModelDownloadProgress {
140
+ /** Identifier of the model being downloaded (Hugging Face ID or path). */
141
+ modelId: string;
142
+ /** Number of bytes downloaded so far. */
143
+ loaded: number;
144
+ /** Total number of bytes to download (`0` if unknown). */
145
+ total: number;
146
+ /** Download progress as a percentage in the range [0, 100]. */
147
+ percent: number;
148
+ }
149
+ /**
150
+ * Top-level configuration for the ML Classifier Extension Pack.
151
+ *
152
+ * Passed to `createMLClassifierPack()` (or the NestJS module factory) to
153
+ * control which classifiers are active, how models are loaded, and how the
154
+ * sliding-window streaming evaluation behaves.
155
+ *
156
+ * @example
157
+ * ```typescript
158
+ * const packOptions: MLClassifierPackOptions = {
159
+ * classifiers: ['toxicity', 'jailbreak'],
160
+ * quantized: true,
161
+ * runtime: 'node',
162
+ * thresholds: { blockThreshold: 0.95, flagThreshold: 0.75, warnThreshold: 0.5 },
163
+ * streamingMode: true,
164
+ * chunkSize: 150,
165
+ * contextSize: 50,
166
+ * };
167
+ * ```
168
+ */
169
+ export interface MLClassifierPackOptions {
170
+ /**
171
+ * Subset of built-in classifiers to activate.
172
+ * Omit or pass an empty array to activate all built-in classifiers.
173
+ *
174
+ * @example `['toxicity', 'injection']`
175
+ */
176
+ classifiers?: Array<'toxicity' | 'injection' | 'jailbreak'>;
177
+ /**
178
+ * Fully-qualified `IContentClassifier` instances to add alongside the
179
+ * built-in classifiers (e.g. domain-specific harm classifiers).
180
+ */
181
+ customClassifiers?: import('./IContentClassifier').IContentClassifier[];
182
+ /**
183
+ * Local filesystem path where downloaded model weights are cached.
184
+ * Defaults to `~/.cache/agentos/ml-classifiers`.
185
+ */
186
+ modelCacheDir?: string;
187
+ /**
188
+ * Use 8-bit quantised model variants when available.
189
+ * Reduces VRAM/RAM footprint and increases inference speed at a small
190
+ * accuracy cost.
191
+ * @default false
192
+ */
193
+ quantized?: boolean;
194
+ /**
195
+ * Execution runtime for model inference.
196
+ * - `'node'` — Runs via `@xenova/transformers` in the Node.js process.
197
+ * - `'browser'` — Runs via `@xenova/transformers` in a Web Worker.
198
+ * - `'wasm'` — Explicit WebAssembly fallback (Node.js or browser).
199
+ * @default 'node'
200
+ */
201
+ runtime?: 'node' | 'browser' | 'wasm';
202
+ /**
203
+ * Browser-specific options. Only applicable when `runtime === 'browser'`.
204
+ */
205
+ browser?: BrowserConfig;
206
+ /**
207
+ * Number of tokens per evaluation window when streaming mode is enabled.
208
+ * Smaller values detect issues earlier but increase evaluation frequency.
209
+ * @default 200
210
+ */
211
+ chunkSize?: number;
212
+ /**
213
+ * Number of tokens from the previous chunk to carry forward as context into
214
+ * the next window, preventing boundary effects.
215
+ * @default 50
216
+ */
217
+ contextSize?: number;
218
+ /**
219
+ * Maximum number of classifier evaluations per stream. The sliding window
220
+ * stops advancing after this many evaluations, allowing the stream to
221
+ * complete without further overhead.
222
+ * @default 100
223
+ */
224
+ maxEvaluations?: number;
225
+ /**
226
+ * Enable sliding-window evaluation for streamed (token-by-token) output.
227
+ * When `false`, classifiers only run on the completed final response.
228
+ * @default false
229
+ */
230
+ streamingMode?: boolean;
231
+ /**
232
+ * Pack-level threshold defaults applied to every classifier unless
233
+ * overridden by a per-classifier {@link ClassifierConfig}.
234
+ */
235
+ thresholds?: Partial<ClassifierThresholds>;
236
+ /**
237
+ * Scope of guardrail enforcement.
238
+ * - `'input'` — Evaluate user messages before orchestration.
239
+ * - `'output'` — Evaluate agent responses before delivery.
240
+ * - `'both'` — Evaluate at both stages.
241
+ * @default 'both'
242
+ */
243
+ guardrailScope?: 'input' | 'output' | 'both';
244
+ }
245
+ /**
246
+ * Well-known service identifier strings for the three built-in ML classifier
247
+ * pipelines.
248
+ *
249
+ * These IDs follow the `agentos:<domain>:<name>` naming convention used
250
+ * throughout the AgentOS extension ecosystem. Use them to retrieve specific
251
+ * classifier services from the shared service registry.
252
+ *
253
+ * @example
254
+ * ```typescript
255
+ * const toxicity = serviceRegistry.get(ML_CLASSIFIER_SERVICE_IDS.TOXICITY_PIPELINE);
256
+ * ```
257
+ */
258
+ export declare const ML_CLASSIFIER_SERVICE_IDS: {
259
+ /** Classifier that detects toxic, hateful, or abusive language. */
260
+ readonly TOXICITY_PIPELINE: "agentos:ml-classifiers:toxicity-pipeline";
261
+ /** Classifier that detects prompt-injection attempts. */
262
+ readonly INJECTION_PIPELINE: "agentos:ml-classifiers:injection-pipeline";
263
+ /** Classifier that detects jailbreak / system-override attempts. */
264
+ readonly JAILBREAK_PIPELINE: "agentos:ml-classifiers:jailbreak-pipeline";
265
+ };
266
+ /** Union type of all ML classifier service ID strings. */
267
+ export type MLClassifierServiceId = (typeof ML_CLASSIFIER_SERVICE_IDS)[keyof typeof ML_CLASSIFIER_SERVICE_IDS];
268
+ /**
269
+ * A {@link ClassificationResult} augmented with provenance metadata.
270
+ *
271
+ * Produced when a classifier evaluates a chunk of text. Carries the
272
+ * classifier's identity and the wall-clock latency so callers can build
273
+ * audit trails and SLO dashboards.
274
+ */
275
+ export interface AnnotatedClassificationResult extends ClassificationResult {
276
+ /**
277
+ * The {@link IContentClassifier.id} of the classifier that produced this
278
+ * result (e.g. `ML_CLASSIFIER_SERVICE_IDS.TOXICITY_PIPELINE`).
279
+ */
280
+ classifierId: string;
281
+ /**
282
+ * Wall-clock time in milliseconds from when `classify()` was called to when
283
+ * it resolved.
284
+ */
285
+ latencyMs: number;
286
+ }
287
+ /**
288
+ * Aggregated evaluation outcome for a single sliding-window chunk.
289
+ *
290
+ * Produced by running all active classifiers against one text window and
291
+ * collating their results into a single action recommendation.
292
+ *
293
+ * The `recommendedAction` is the most restrictive action across all
294
+ * classifiers (BLOCK > FLAG > SANITIZE > ALLOW).
295
+ */
296
+ export interface ChunkEvaluation {
297
+ /**
298
+ * Individual results from every classifier that evaluated this chunk,
299
+ * in the order the classifiers were invoked.
300
+ */
301
+ results: AnnotatedClassificationResult[];
302
+ /**
303
+ * The most restrictive guardrail action recommended across all results.
304
+ * The pipeline should act on this value rather than iterating `results`
305
+ * manually.
306
+ */
307
+ recommendedAction: GuardrailAction;
308
+ /**
309
+ * ID of the classifier that triggered the `recommendedAction`, or `null`
310
+ * if the action is {@link GuardrailAction.ALLOW} (no classifier triggered).
311
+ */
312
+ triggeredBy: string | null;
313
+ /**
314
+ * Sum of all classifier `latencyMs` values — useful for profiling the
315
+ * total evaluation overhead per chunk.
316
+ */
317
+ totalLatencyMs: number;
318
+ }
319
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAClF,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAGxD,YAAY,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,CAAC;AAM1D;;;;;;;;;GASG;AACH,MAAM,WAAW,oBAAoB;IACnC;;;OAGG;IACH,cAAc,EAAE,MAAM,CAAC;IAEvB;;;OAGG;IACH,aAAa,EAAE,MAAM,CAAC;IAEtB;;;;OAIG;IACH,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;;;;;;GAOG;AACH,eAAO,MAAM,kBAAkB,EAAE,oBAIvB,CAAC;AAMX;;;;;;GAMG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAE3C;;;;;;;;;OASG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;CAChD;AAMD;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC5B;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IAEvB;;;;;;OAMG;IACH,aAAa,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,MAAM,CAAC;IAEhD;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,qBAAqB,KAAK,IAAI,CAAC;CACxD;AAMD;;;;;;;GAOG;AACH,MAAM,WAAW,qBAAqB;IACpC,0EAA0E;IAC1E,OAAO,EAAE,MAAM,CAAC;IAEhB,yCAAyC;IACzC,MAAM,EAAE,MAAM,CAAC;IAEf,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;IAEd,+DAA+D;IAC/D,OAAO,EAAE,MAAM,CAAC;CACjB;AAMD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,WAAW,uBAAuB;IACtC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,KAAK,CAAC,UAAU,GAAG,WAAW,GAAG,WAAW,CAAC,CAAC;IAE5D;;;OAGG;IACH,iBAAiB,CAAC,EAAE,OAAO,sBAAsB,EAAE,kBAAkB,EAAE,CAAC;IAExE;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IAEtC;;OAEG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;IAExB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;;OAIG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB;;;OAGG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAE3C;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;CAC9C;AAMD;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB;IACpC,mEAAmE;;IAGnE,yDAAyD;;IAGzD,oEAAoE;;CAE5D,CAAC;AAEX,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAC/B,CAAC,OAAO,yBAAyB,CAAC,CAAC,MAAM,OAAO,yBAAyB,CAAC,CAAC;AAM7E;;;;;;GAMG;AACH,MAAM,WAAW,6BAA8B,SAAQ,oBAAoB;IACzE;;;OAGG;IACH,YAAY,EAAE,MAAM,CAAC;IAErB;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,eAAe;IAC9B;;;OAGG;IACH,OAAO,EAAE,6BAA6B,EAAE,CAAC;IAEzC;;;;OAIG;IACH,iBAAiB,EAAE,eAAe,CAAC;IAEnC;;;OAGG;IACH,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B;;;OAGG;IACH,cAAc,EAAE,MAAM,CAAC;CACxB"}
package/dist/types.js ADDED
@@ -0,0 +1,62 @@
1
+ /**
2
+ * @fileoverview Core type definitions for the ML Classifier Guardrail Extension Pack.
3
+ *
4
+ * This file defines all configuration shapes, runtime result types, and
5
+ * service-identifier constants used by the ML classifier pipeline. All
6
+ * classifiers in this pack evaluate text content against learned models
7
+ * (toxicity, prompt-injection, jailbreak) and emit structured results that
8
+ * feed into the AgentOS guardrail decision tree.
9
+ *
10
+ * Import hierarchy
11
+ * ----------------
12
+ * ```
13
+ * IUtilityAI ──── ClassificationResult, ClassificationScore
14
+ * IGuardrailService ── GuardrailAction
15
+ * │
16
+ * ▼
17
+ * types.ts (this file)
18
+ * │
19
+ * ▼
20
+ * IContentClassifier.ts / SlidingWindowBuffer.ts / …
21
+ * ```
22
+ *
23
+ * @module agentos/extensions/packs/ml-classifiers/types
24
+ */
25
+ /**
26
+ * Sensible defaults for {@link ClassifierThresholds}.
27
+ *
28
+ * These values reflect a conservative-but-pragmatic policy:
29
+ * - block at 90 % confidence → very high bar, minimises false positives
30
+ * - flag at 70 % → surfaced for human review, not blocked
31
+ * - warn at 40 % → low-confidence signal, handled with a light touch
32
+ */
33
+ export const DEFAULT_THRESHOLDS = {
34
+ blockThreshold: 0.9,
35
+ flagThreshold: 0.7,
36
+ warnThreshold: 0.4,
37
+ };
38
+ // ---------------------------------------------------------------------------
39
+ // Service identifiers
40
+ // ---------------------------------------------------------------------------
41
+ /**
42
+ * Well-known service identifier strings for the three built-in ML classifier
43
+ * pipelines.
44
+ *
45
+ * These IDs follow the `agentos:<domain>:<name>` naming convention used
46
+ * throughout the AgentOS extension ecosystem. Use them to retrieve specific
47
+ * classifier services from the shared service registry.
48
+ *
49
+ * @example
50
+ * ```typescript
51
+ * const toxicity = serviceRegistry.get(ML_CLASSIFIER_SERVICE_IDS.TOXICITY_PIPELINE);
52
+ * ```
53
+ */
54
+ export const ML_CLASSIFIER_SERVICE_IDS = {
55
+ /** Classifier that detects toxic, hateful, or abusive language. */
56
+ TOXICITY_PIPELINE: 'agentos:ml-classifiers:toxicity-pipeline',
57
+ /** Classifier that detects prompt-injection attempts. */
58
+ INJECTION_PIPELINE: 'agentos:ml-classifiers:injection-pipeline',
59
+ /** Classifier that detects jailbreak / system-override attempts. */
60
+ JAILBREAK_PIPELINE: 'agentos:ml-classifiers:jailbreak-pipeline',
61
+ };
62
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AA2CH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAyB;IACtD,cAAc,EAAE,GAAG;IACnB,aAAa,EAAE,GAAG;IAClB,aAAa,EAAE,GAAG;CACV,CAAC;AA6NX,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,mEAAmE;IACnE,iBAAiB,EAAE,0CAA0C;IAE7D,yDAAyD;IACzD,kBAAkB,EAAE,2CAA2C;IAE/D,oEAAoE;IACpE,kBAAkB,EAAE,2CAA2C;CACvD,CAAC"}
@@ -0,0 +1,49 @@
1
+ /**
2
+ * @fileoverview Web Worker entry point for ML content classification.
3
+ *
4
+ * This script is loaded by {@link WorkerClassifierProxy} as a dedicated Web
5
+ * Worker. It listens for `classify` messages from the main thread, lazily
6
+ * loads the requested model pipeline via `@huggingface/transformers`, runs
7
+ * inference, then posts the result (or an error) back.
8
+ *
9
+ * ## Message protocol
10
+ *
11
+ * **Incoming** (main thread → worker):
12
+ * ```json
13
+ * {
14
+ * "type": "classify",
15
+ * "text": "<string>",
16
+ * "modelId": "<HuggingFace model ID or local path>",
17
+ * "quantized": true | false,
18
+ * "taskType": "<transformers.js task string>"
19
+ * }
20
+ * ```
21
+ *
22
+ * **Outgoing** (worker → main thread) on success:
23
+ * ```json
24
+ * { "type": "result", "result": { "bestClass": "...", "confidence": 0.92, "allScores": [...] } }
25
+ * ```
26
+ *
27
+ * **Outgoing** (worker → main thread) on error:
28
+ * ```json
29
+ * { "type": "error", "error": "<error message>" }
30
+ * ```
31
+ *
32
+ * ## Pipeline caching
33
+ * The pipeline is loaded once per `(modelId, taskType)` key and cached in
34
+ * a module-level `Map`. Subsequent `classify` messages for the same model
35
+ * reuse the cached instance, avoiding repeated expensive model downloads and
36
+ * WASM initialisation.
37
+ *
38
+ * ## Raw label normalisation
39
+ * The worker normalises the raw `@huggingface/transformers` output (an array
40
+ * of `{ label, score }` objects when called with `topk: null`) into the
41
+ * AgentOS {@link ClassificationResult} shape:
42
+ * - `bestClass` — label with the highest score
43
+ * - `confidence` — score of the winning label
44
+ * - `allScores` — all labels mapped to `{ classLabel, score }` pairs
45
+ *
46
+ * @module agentos/extensions/packs/ml-classifiers/worker/classifier-worker
47
+ */
48
+ export {};
49
+ //# sourceMappingURL=classifier-worker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"classifier-worker.d.ts","sourceRoot":"","sources":["../../src/worker/classifier-worker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8CG"}
@@ -0,0 +1,180 @@
1
+ /**
2
+ * @fileoverview Web Worker entry point for ML content classification.
3
+ *
4
+ * This script is loaded by {@link WorkerClassifierProxy} as a dedicated Web
5
+ * Worker. It listens for `classify` messages from the main thread, lazily
6
+ * loads the requested model pipeline via `@huggingface/transformers`, runs
7
+ * inference, then posts the result (or an error) back.
8
+ *
9
+ * ## Message protocol
10
+ *
11
+ * **Incoming** (main thread → worker):
12
+ * ```json
13
+ * {
14
+ * "type": "classify",
15
+ * "text": "<string>",
16
+ * "modelId": "<HuggingFace model ID or local path>",
17
+ * "quantized": true | false,
18
+ * "taskType": "<transformers.js task string>"
19
+ * }
20
+ * ```
21
+ *
22
+ * **Outgoing** (worker → main thread) on success:
23
+ * ```json
24
+ * { "type": "result", "result": { "bestClass": "...", "confidence": 0.92, "allScores": [...] } }
25
+ * ```
26
+ *
27
+ * **Outgoing** (worker → main thread) on error:
28
+ * ```json
29
+ * { "type": "error", "error": "<error message>" }
30
+ * ```
31
+ *
32
+ * ## Pipeline caching
33
+ * The pipeline is loaded once per `(modelId, taskType)` key and cached in
34
+ * a module-level `Map`. Subsequent `classify` messages for the same model
35
+ * reuse the cached instance, avoiding repeated expensive model downloads and
36
+ * WASM initialisation.
37
+ *
38
+ * ## Raw label normalisation
39
+ * The worker normalises the raw `@huggingface/transformers` output (an array
40
+ * of `{ label, score }` objects when called with `topk: null`) into the
41
+ * AgentOS {@link ClassificationResult} shape:
42
+ * - `bestClass` — label with the highest score
43
+ * - `confidence` — score of the winning label
44
+ * - `allScores` — all labels mapped to `{ classLabel, score }` pairs
45
+ *
46
+ * @module agentos/extensions/packs/ml-classifiers/worker/classifier-worker
47
+ */
48
+ // ---------------------------------------------------------------------------
49
+ // Pipeline cache
50
+ // ---------------------------------------------------------------------------
51
+ /**
52
+ * Cache key composed of `modelId` and `taskType` so different task types
53
+ * for the same model ID are kept separate.
54
+ *
55
+ * @param modelId - Hugging Face model ID or local path.
56
+ * @param taskType - transformers.js task string.
57
+ * @returns Cache key string.
58
+ */
59
+ function cacheKey(modelId, taskType) {
60
+ return `${taskType}::${modelId}`;
61
+ }
62
+ /**
63
+ * Module-level pipeline cache.
64
+ *
65
+ * Maps cache keys (see {@link cacheKey}) to loaded pipeline functions.
66
+ * Populated lazily on the first `classify` message for each unique
67
+ * `(modelId, taskType)` combination.
68
+ */
69
+ const pipelineCache = new Map();
70
+ // ---------------------------------------------------------------------------
71
+ // Classification logic
72
+ // ---------------------------------------------------------------------------
73
+ /**
74
+ * Load (or retrieve from cache) the text-classification pipeline for the
75
+ * given model and run inference on `text`.
76
+ *
77
+ * @param request - The incoming classify request.
78
+ * @returns A promise resolving with the raw label array from the pipeline.
79
+ * @throws If the pipeline fails to load or inference throws.
80
+ */
81
+ async function runPipeline(request) {
82
+ const key = cacheKey(request.modelId, request.taskType);
83
+ // Check the cache first to avoid re-loading on every message.
84
+ let pipe = pipelineCache.get(key);
85
+ if (!pipe) {
86
+ // Lazy-load the @huggingface/transformers package.
87
+ // Dynamic import is used so this module can be evaluated in environments
88
+ // where the package is optional (the Worker is only instantiated when
89
+ // browser runtime is active and the package is present).
90
+ const { pipeline: createPipeline } = await import('@huggingface/transformers');
91
+ // Create the pipeline with quantisation option from the request.
92
+ const newPipe = await createPipeline(request.taskType, request.modelId, {
93
+ quantized: request.quantized,
94
+ });
95
+ // Store in cache and narrow the type.
96
+ pipe = newPipe;
97
+ pipelineCache.set(key, pipe);
98
+ }
99
+ // Run inference — request all label scores (topk: null).
100
+ return pipe(request.text, { topk: null });
101
+ }
102
+ /**
103
+ * Normalise raw pipeline output into an AgentOS {@link ClassificationResult}.
104
+ *
105
+ * @param raw - Array of `{ label, score }` objects from the pipeline.
106
+ * @returns A fully-populated `ClassificationResult`.
107
+ */
108
+ function normaliseResult(raw) {
109
+ if (!raw || raw.length === 0) {
110
+ // No output — return a benign pass result so the orchestrator treats this
111
+ // as ALLOW rather than an error.
112
+ return { bestClass: 'benign', confidence: 0, allScores: [] };
113
+ }
114
+ // Find the label with the highest confidence score.
115
+ let best = raw[0];
116
+ for (const item of raw) {
117
+ if (item.score > best.score) {
118
+ best = item;
119
+ }
120
+ }
121
+ // Map every label to the AgentOS ClassificationScore shape.
122
+ const allScores = raw.map((item) => ({
123
+ classLabel: item.label,
124
+ score: item.score,
125
+ }));
126
+ return {
127
+ bestClass: best.label,
128
+ confidence: best.score,
129
+ allScores,
130
+ };
131
+ }
132
+ // ---------------------------------------------------------------------------
133
+ // Message handler
134
+ // ---------------------------------------------------------------------------
135
+ /**
136
+ * Handle a `classify` message from the main thread.
137
+ *
138
+ * Runs the pipeline and posts either a {@link ResultMessage} or an
139
+ * {@link ErrorMessage} back to the main thread.
140
+ *
141
+ * @param request - The incoming classify request.
142
+ */
143
+ async function handleClassify(request) {
144
+ try {
145
+ const raw = await runPipeline(request);
146
+ const result = normaliseResult(raw);
147
+ const response = { type: 'result', result };
148
+ self.postMessage(response);
149
+ }
150
+ catch (err) {
151
+ const message = err instanceof Error ? err.message : String(err);
152
+ const response = { type: 'error', error: message };
153
+ self.postMessage(response);
154
+ }
155
+ }
156
+ // ---------------------------------------------------------------------------
157
+ // Worker bootstrap — listen for messages from the main thread
158
+ // ---------------------------------------------------------------------------
159
+ /**
160
+ * The primary message listener for this Worker.
161
+ *
162
+ * Dispatches incoming messages to {@link handleClassify} when the message
163
+ * type is `'classify'`. All other message types are ignored with a warning
164
+ * logged to the Worker console (useful for debugging unexpected messages
165
+ * during development).
166
+ */
167
+ self.onmessage = (event) => {
168
+ const data = event.data;
169
+ if (data?.type === 'classify') {
170
+ // Kick off async classification. Errors are caught inside handleClassify
171
+ // and posted back as ErrorMessage, so we do not need a top-level catch here.
172
+ void handleClassify(data);
173
+ }
174
+ else {
175
+ // Unknown message type — log and ignore.
176
+ console.warn('[classifier-worker] Received unexpected message type:', data?.type ?? data);
177
+ }
178
+ };
179
+ export {};
180
+ //# sourceMappingURL=classifier-worker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"classifier-worker.js","sourceRoot":"","sources":["../../src/worker/classifier-worker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8CG;AAsEH,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;GAOG;AACH,SAAS,QAAQ,CAAC,OAAe,EAAE,QAAgB;IACjD,OAAO,GAAG,QAAQ,KAAK,OAAO,EAAE,CAAC;AACnC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,aAAa,GAAG,IAAI,GAAG,EAAuE,CAAC;AAErG,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E;;;;;;;GAOG;AACH,KAAK,UAAU,WAAW,CAAC,OAAwB;IACjD,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAExD,8DAA8D;IAC9D,IAAI,IAAI,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAElC,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,mDAAmD;QACnD,yEAAyE;QACzE,sEAAsE;QACtE,yDAAyD;QACzD,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;QAE/E,iEAAiE;QACjE,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,EAAE;YACtE,SAAS,EAAE,OAAO,CAAC,SAAS;SAC7B,CAAC,CAAC;QAEH,sCAAsC;QACtC,IAAI,GAAG,OAAsE,CAAC;QAC9E,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IAC/B,CAAC;IAED,yDAAyD;IACzD,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;AAC5C,CAAC;AAED;;;;;GAKG;AACH,SAAS,eAAe,CAAC,GAAe;IACtC,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,0EAA0E;QAC1E,iCAAiC;QACjC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;IAC/D,CAAC;IAED,oDAAoD;IACpD,IAAI,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IAClB,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;QACvB,IAAI,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;YAC5B,IAAI,GAAG,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,4DAA4D;IAC5D,MAAM,SAAS,GAA0B,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC1D,UAAU,EAAE,IAAI,CAAC,KAAK;QACtB,KAAK,EAAE,IAAI,CAAC,KAAK;KAClB,CAAC,CAAC,CAAC;IAEJ,OAAO;QACL,SAAS,EAAE,IAAI,CAAC,KAAK;QACrB,UAAU,EAAE,IAAI,CAAC,KAAK;QACtB,SAAS;KACV,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,kBAAkB;AAClB,8EAA8E;AAE9E;;;;;;;GAOG;AACH,KAAK,UAAU,cAAc,CAAC,OAAwB;IACpD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;QAEpC,MAAM,QAAQ,GAAkB,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;QAC3D,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;IAC7B,CAAC;IAAC,OAAO,GAAY,EAAE,CAAC;QACtB,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,MAAM,QAAQ,GAAiB,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;QACjE,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,8DAA8D;AAC9D,8EAA8E;AAE9E;;;;;;;GAOG;AACH,IAAI,CAAC,SAAS,GAAG,CAAC,KAAmB,EAAE,EAAE;IACvC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAuB,CAAC;IAE3C,IAAI,IAAI,EAAE,IAAI,KAAK,UAAU,EAAE,CAAC;QAC9B,0EAA0E;QAC1E,6EAA6E;QAC7E,KAAK,cAAc,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;SAAM,CAAC;QACN,yCAAyC;QACzC,OAAO,CAAC,IAAI,CACV,uDAAuD,EACvD,IAAI,EAAE,IAAI,IAAI,IAAI,CACnB,CAAC;IACJ,CAAC;AACH,CAAC,CAAC"}
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@framers/agentos-ext-ml-classifiers",
3
+ "version": "0.1.0",
4
+ "description": "ML-based content classification guardrail (toxicity, injection, jailbreak) for AgentOS",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "types": "./dist/index.d.ts"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist",
16
+ "src",
17
+ "SKILL.md",
18
+ "manifest.json"
19
+ ],
20
+ "peerDependencies": {
21
+ "@framers/agentos": "^0.1.0"
22
+ },
23
+ "optionalDependencies": {
24
+ "@huggingface/transformers": "^3.0.0"
25
+ },
26
+ "devDependencies": {
27
+ "typescript": "^5.5.0",
28
+ "vitest": "^1.6.0",
29
+ "@framers/agentos": "0.1.47"
30
+ },
31
+ "license": "MIT",
32
+ "author": "Frame.dev",
33
+ "repository": {
34
+ "type": "git",
35
+ "url": "https://github.com/framersai/agentos-extensions.git",
36
+ "directory": "registry/curated/safety/ml-classifiers"
37
+ },
38
+ "publishConfig": {
39
+ "access": "public"
40
+ },
41
+ "scripts": {
42
+ "build": "tsc -p tsconfig.json",
43
+ "test": "vitest run"
44
+ }
45
+ }