@framers/agentos-ext-ml-classifiers 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +20 -0
- package/.github/workflows/release.yml +37 -0
- package/.releaserc.json +9 -0
- package/LICENSE +96 -21
- package/README.md +72 -0
- package/dist/MLClassifierGuardrail.d.ts.map +1 -1
- package/dist/MLClassifierGuardrail.js +14 -6
- package/dist/MLClassifierGuardrail.js.map +1 -1
- package/dist/index.js +3 -3
- package/dist/keyword-classifier.js +1 -1
- package/dist/llm-classifier.js +1 -1
- package/package.json +5 -13
- package/scripts/fix-esm-imports.mjs +181 -0
- package/src/MLClassifierGuardrail.ts +38 -5
- package/test/llm-tier.spec.ts +267 -0
- package/test/ml-classifiers.spec.ts +57 -0
- package/test/onnx-tier.spec.ts +255 -0
- package/test/tier-fallthrough.spec.ts +185 -0
- package/vitest.config.ts +18 -7
- package/CHANGELOG.md +0 -18
- package/dist/ClassifierOrchestrator.d.ts +0 -126
- package/dist/ClassifierOrchestrator.d.ts.map +0 -1
- package/dist/ClassifierOrchestrator.js +0 -239
- package/dist/ClassifierOrchestrator.js.map +0 -1
- package/dist/IContentClassifier.d.ts +0 -117
- package/dist/IContentClassifier.d.ts.map +0 -1
- package/dist/IContentClassifier.js +0 -22
- package/dist/IContentClassifier.js.map +0 -1
- package/dist/SlidingWindowBuffer.d.ts +0 -213
- package/dist/SlidingWindowBuffer.d.ts.map +0 -1
- package/dist/SlidingWindowBuffer.js +0 -246
- package/dist/SlidingWindowBuffer.js.map +0 -1
- package/dist/classifiers/InjectionClassifier.d.ts +0 -126
- package/dist/classifiers/InjectionClassifier.d.ts.map +0 -1
- package/dist/classifiers/InjectionClassifier.js +0 -210
- package/dist/classifiers/InjectionClassifier.js.map +0 -1
- package/dist/classifiers/JailbreakClassifier.d.ts +0 -124
- package/dist/classifiers/JailbreakClassifier.d.ts.map +0 -1
- package/dist/classifiers/JailbreakClassifier.js +0 -208
- package/dist/classifiers/JailbreakClassifier.js.map +0 -1
- package/dist/classifiers/ToxicityClassifier.d.ts +0 -125
- package/dist/classifiers/ToxicityClassifier.d.ts.map +0 -1
- package/dist/classifiers/ToxicityClassifier.js +0 -212
- package/dist/classifiers/ToxicityClassifier.js.map +0 -1
- package/dist/classifiers/WorkerClassifierProxy.d.ts +0 -158
- package/dist/classifiers/WorkerClassifierProxy.d.ts.map +0 -1
- package/dist/classifiers/WorkerClassifierProxy.js +0 -268
- package/dist/classifiers/WorkerClassifierProxy.js.map +0 -1
- package/dist/worker/classifier-worker.d.ts +0 -49
- package/dist/worker/classifier-worker.d.ts.map +0 -1
- package/dist/worker/classifier-worker.js +0 -180
- package/dist/worker/classifier-worker.js.map +0 -1
- package/src/ClassifierOrchestrator.ts +0 -290
- package/src/IContentClassifier.ts +0 -124
- package/src/SlidingWindowBuffer.ts +0 -384
- package/src/classifiers/InjectionClassifier.ts +0 -261
- package/src/classifiers/JailbreakClassifier.ts +0 -259
- package/src/classifiers/ToxicityClassifier.ts +0 -263
- package/src/classifiers/WorkerClassifierProxy.ts +0 -366
- package/src/worker/classifier-worker.ts +0 -267
- package/test/ClassifierOrchestrator.spec.ts +0 -365
- package/test/ClassifyContentTool.spec.ts +0 -226
- package/test/InjectionClassifier.spec.ts +0 -263
- package/test/JailbreakClassifier.spec.ts +0 -295
- package/test/MLClassifierGuardrail.spec.ts +0 -486
- package/test/SlidingWindowBuffer.spec.ts +0 -391
- package/test/ToxicityClassifier.spec.ts +0 -268
- package/test/WorkerClassifierProxy.spec.ts +0 -303
- package/test/index.spec.ts +0 -431
|
@@ -1,366 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview WorkerClassifierProxy — wraps an IContentClassifier to run
|
|
3
|
-
* inference inside a Web Worker, with automatic main-thread fallback.
|
|
4
|
-
*
|
|
5
|
-
* ## Why a proxy?
|
|
6
|
-
* ML inference (even quantized ONNX / WASM pipelines) can block the main
|
|
7
|
-
* thread for 50–500 ms per classification. Moving classification into a
|
|
8
|
-
* Web Worker keeps the UI responsive. This proxy makes the switch
|
|
9
|
-
* transparent to callers: they still call `classify(text)` and receive a
|
|
10
|
-
* `ClassificationResult`; the underlying transport (Worker vs. direct call)
|
|
11
|
-
* is an implementation detail.
|
|
12
|
-
*
|
|
13
|
-
* ## Fallback policy
|
|
14
|
-
* The proxy falls back to direct (main-thread) delegation whenever:
|
|
15
|
-
* - The global `Worker` constructor is undefined (Node.js, older browsers).
|
|
16
|
-
* - `browserConfig.useWebWorker` is explicitly `false`.
|
|
17
|
-
* - Worker creation throws (e.g. strict CSP that blocks `blob:` URLs).
|
|
18
|
-
*
|
|
19
|
-
* Once a fallback has been triggered by a Worker creation error the proxy
|
|
20
|
-
* sets `workerFailed = true` and remains in fallback mode for all subsequent
|
|
21
|
-
* calls.
|
|
22
|
-
*
|
|
23
|
-
* ## IContentClassifier contract
|
|
24
|
-
* The proxy forwards all identity fields (`id`, `displayName`, `description`,
|
|
25
|
-
* `modelId`) and the `isLoaded` state directly from the wrapped classifier so
|
|
26
|
-
* it is completely transparent to the orchestrator.
|
|
27
|
-
*
|
|
28
|
-
* @module agentos/extensions/packs/ml-classifiers/classifiers/WorkerClassifierProxy
|
|
29
|
-
*/
|
|
30
|
-
|
|
31
|
-
import type { ClassificationResult } from '@framers/agentos';
|
|
32
|
-
import type { IContentClassifier } from '../IContentClassifier';
|
|
33
|
-
import type { BrowserConfig } from '../types';
|
|
34
|
-
|
|
35
|
-
// ---------------------------------------------------------------------------
|
|
36
|
-
// Internal message shapes
|
|
37
|
-
// ---------------------------------------------------------------------------
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* Message sent from the main thread to the Worker to request classification.
|
|
41
|
-
*
|
|
42
|
-
* @internal
|
|
43
|
-
*/
|
|
44
|
-
interface WorkerClassifyRequest {
|
|
45
|
-
/** Discriminant tag that identifies this message type. */
|
|
46
|
-
type: 'classify';
|
|
47
|
-
|
|
48
|
-
/** The text to classify. Passed directly to the pipeline. */
|
|
49
|
-
text: string;
|
|
50
|
-
|
|
51
|
-
/** Hugging Face model ID (or local path) to load if not yet cached. */
|
|
52
|
-
modelId: string;
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* Whether to request a quantized model variant.
|
|
56
|
-
* Passed through to the `@huggingface/transformers` pipeline factory.
|
|
57
|
-
*/
|
|
58
|
-
quantized: boolean;
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* HuggingFace pipeline task string, e.g. `'text-classification'`.
|
|
62
|
-
* Sent so the Worker can use the correct pipeline type when loading the
|
|
63
|
-
* model for the first time.
|
|
64
|
-
*/
|
|
65
|
-
taskType: string;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* Success response posted back from the Worker.
|
|
70
|
-
*
|
|
71
|
-
* @internal
|
|
72
|
-
*/
|
|
73
|
-
interface WorkerResultMessage {
|
|
74
|
-
/** Discriminant tag. */
|
|
75
|
-
type: 'result';
|
|
76
|
-
|
|
77
|
-
/** The resolved classification result. */
|
|
78
|
-
result: ClassificationResult;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Error response posted back from the Worker.
|
|
83
|
-
*
|
|
84
|
-
* @internal
|
|
85
|
-
*/
|
|
86
|
-
interface WorkerErrorMessage {
|
|
87
|
-
/** Discriminant tag. */
|
|
88
|
-
type: 'error';
|
|
89
|
-
|
|
90
|
-
/** Human-readable error message. */
|
|
91
|
-
error: string;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/** Union of all possible messages coming back from the Worker. */
|
|
95
|
-
type WorkerResponse = WorkerResultMessage | WorkerErrorMessage;
|
|
96
|
-
|
|
97
|
-
// ---------------------------------------------------------------------------
|
|
98
|
-
// WorkerClassifierProxy
|
|
99
|
-
// ---------------------------------------------------------------------------
|
|
100
|
-
|
|
101
|
-
/**
|
|
102
|
-
* Transparent proxy around an {@link IContentClassifier} that offloads
|
|
103
|
-
* `classify()` calls to a Web Worker when the browser environment supports it.
|
|
104
|
-
*
|
|
105
|
-
* In all other environments (Node.js, strict CSP, explicit opt-out) the proxy
|
|
106
|
-
* delegates calls directly to the wrapped classifier on the main thread.
|
|
107
|
-
*
|
|
108
|
-
* @implements {IContentClassifier}
|
|
109
|
-
*
|
|
110
|
-
* @example Browser context — Web Worker path
|
|
111
|
-
* ```typescript
|
|
112
|
-
* const toxicity = new ToxicityClassifier(serviceRegistry);
|
|
113
|
-
* const proxy = new WorkerClassifierProxy(toxicity, { useWebWorker: true });
|
|
114
|
-
* const result = await proxy.classify('some text');
|
|
115
|
-
* ```
|
|
116
|
-
*
|
|
117
|
-
* @example Node.js / forced fallback path
|
|
118
|
-
* ```typescript
|
|
119
|
-
* const proxy = new WorkerClassifierProxy(toxicity, { useWebWorker: false });
|
|
120
|
-
* // Delegates directly to toxicity.classify() on the same thread.
|
|
121
|
-
* ```
|
|
122
|
-
*/
|
|
123
|
-
export class WorkerClassifierProxy implements IContentClassifier {
|
|
124
|
-
// -------------------------------------------------------------------------
|
|
125
|
-
// IContentClassifier identity — delegated from wrapped classifier
|
|
126
|
-
// -------------------------------------------------------------------------
|
|
127
|
-
|
|
128
|
-
/**
|
|
129
|
-
* {@inheritDoc IContentClassifier.id}
|
|
130
|
-
* Delegated from the wrapped classifier so this proxy is transparent in
|
|
131
|
-
* the orchestrator's service-ID lookups.
|
|
132
|
-
*/
|
|
133
|
-
get id(): string {
|
|
134
|
-
return this.wrapped.id;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
/**
|
|
138
|
-
* {@inheritDoc IContentClassifier.displayName}
|
|
139
|
-
* Returns the wrapped classifier's display name with a `(Worker)` suffix
|
|
140
|
-
* when the Web Worker path is active, so logs clearly indicate the mode.
|
|
141
|
-
*/
|
|
142
|
-
get displayName(): string {
|
|
143
|
-
return this.wrapped.displayName;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
/**
|
|
147
|
-
* {@inheritDoc IContentClassifier.description}
|
|
148
|
-
* Delegated directly from the wrapped classifier.
|
|
149
|
-
*/
|
|
150
|
-
get description(): string {
|
|
151
|
-
return this.wrapped.description;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
/**
|
|
155
|
-
* {@inheritDoc IContentClassifier.modelId}
|
|
156
|
-
* Delegated directly from the wrapped classifier.
|
|
157
|
-
*/
|
|
158
|
-
get modelId(): string {
|
|
159
|
-
return this.wrapped.modelId;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
/**
|
|
163
|
-
* {@inheritDoc IContentClassifier.isLoaded}
|
|
164
|
-
*
|
|
165
|
-
* Reflects the wrapped classifier's `isLoaded` state. The wrapped
|
|
166
|
-
* instance is the authoritative source because it owns the model weights
|
|
167
|
-
* (whether they live in the Worker or on the main thread).
|
|
168
|
-
*/
|
|
169
|
-
get isLoaded(): boolean {
|
|
170
|
-
return this.wrapped.isLoaded;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
/**
|
|
174
|
-
* IContentClassifier requires `isLoaded` to be settable via the interface
|
|
175
|
-
* contract (`isLoaded: boolean`). We store the value through the wrapped
|
|
176
|
-
* classifier so the authoritative state lives in one place.
|
|
177
|
-
*/
|
|
178
|
-
set isLoaded(value: boolean) {
|
|
179
|
-
this.wrapped.isLoaded = value;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// -------------------------------------------------------------------------
|
|
183
|
-
// Internal state
|
|
184
|
-
// -------------------------------------------------------------------------
|
|
185
|
-
|
|
186
|
-
/**
|
|
187
|
-
* Set to `true` after a Worker creation failure. Once set, all subsequent
|
|
188
|
-
* `classify()` calls are routed directly to the wrapped classifier without
|
|
189
|
-
* attempting to re-create the Worker.
|
|
190
|
-
*/
|
|
191
|
-
private workerFailed = false;
|
|
192
|
-
|
|
193
|
-
// -------------------------------------------------------------------------
|
|
194
|
-
// Constructor
|
|
195
|
-
// -------------------------------------------------------------------------
|
|
196
|
-
|
|
197
|
-
/**
|
|
198
|
-
* Create a WorkerClassifierProxy.
|
|
199
|
-
*
|
|
200
|
-
* @param wrapped - The real classifier to delegate to. In Worker
|
|
201
|
-
* mode this classifier is still responsible for
|
|
202
|
-
* model loading and inference; the proxy just
|
|
203
|
-
* changes the thread on which it executes.
|
|
204
|
-
* @param browserConfig - Optional browser-side configuration. Controls
|
|
205
|
-
* whether Worker mode is attempted
|
|
206
|
-
* (`useWebWorker`, default `true`).
|
|
207
|
-
*/
|
|
208
|
-
constructor(
|
|
209
|
-
private readonly wrapped: IContentClassifier,
|
|
210
|
-
private readonly browserConfig?: BrowserConfig,
|
|
211
|
-
) {}
|
|
212
|
-
|
|
213
|
-
// -------------------------------------------------------------------------
|
|
214
|
-
// classify
|
|
215
|
-
// -------------------------------------------------------------------------
|
|
216
|
-
|
|
217
|
-
/**
|
|
218
|
-
* Classify the provided text, routing to a Web Worker when available.
|
|
219
|
-
*
|
|
220
|
-
* ### Routing decision (evaluated once per call)
|
|
221
|
-
* 1. `typeof Worker === 'undefined'` → fallback (Node.js / no Worker API).
|
|
222
|
-
* 2. `browserConfig.useWebWorker === false` → fallback (explicit opt-out).
|
|
223
|
-
* 3. `workerFailed === true` → fallback (previous Worker creation error).
|
|
224
|
-
* 4. Otherwise → attempt to run in a Web Worker.
|
|
225
|
-
*
|
|
226
|
-
* If the Worker is created but fails to post a result within the
|
|
227
|
-
* classification request, the error is propagated as a rejected promise
|
|
228
|
-
* (not silently swallowed) so the orchestrator can log and fall back at
|
|
229
|
-
* a higher level.
|
|
230
|
-
*
|
|
231
|
-
* @param text - The text to classify. Must not be empty.
|
|
232
|
-
* @returns A promise that resolves with the classification result.
|
|
233
|
-
*/
|
|
234
|
-
async classify(text: string): Promise<ClassificationResult> {
|
|
235
|
-
// Determine whether to use a Web Worker.
|
|
236
|
-
const shouldUseWorker = this.shouldUseWebWorker();
|
|
237
|
-
|
|
238
|
-
if (!shouldUseWorker) {
|
|
239
|
-
// Fallback: delegate directly to the wrapped classifier on this thread.
|
|
240
|
-
return this.wrapped.classify(text);
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// Attempt to classify in a Worker.
|
|
244
|
-
return this.classifyInWorker(text);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// -------------------------------------------------------------------------
|
|
248
|
-
// dispose (optional IContentClassifier lifecycle hook)
|
|
249
|
-
// -------------------------------------------------------------------------
|
|
250
|
-
|
|
251
|
-
/**
|
|
252
|
-
* Release resources held by the wrapped classifier.
|
|
253
|
-
*
|
|
254
|
-
* Delegates to `wrapped.dispose()` if it exists. Idempotent.
|
|
255
|
-
*/
|
|
256
|
-
async dispose(): Promise<void> {
|
|
257
|
-
if (this.wrapped.dispose) {
|
|
258
|
-
await this.wrapped.dispose();
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
// -------------------------------------------------------------------------
|
|
263
|
-
// Private helpers
|
|
264
|
-
// -------------------------------------------------------------------------
|
|
265
|
-
|
|
266
|
-
/**
|
|
267
|
-
* Determine whether the current environment and configuration support
|
|
268
|
-
* running inference in a Web Worker.
|
|
269
|
-
*
|
|
270
|
-
* @returns `true` when Web Worker mode should be attempted.
|
|
271
|
-
*/
|
|
272
|
-
private shouldUseWebWorker(): boolean {
|
|
273
|
-
// Worker API is not available (Node.js, JSDOM without worker support, etc.)
|
|
274
|
-
if (typeof Worker === 'undefined') {
|
|
275
|
-
return false;
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// Caller explicitly opted out of Web Worker mode.
|
|
279
|
-
if (this.browserConfig?.useWebWorker === false) {
|
|
280
|
-
return false;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
// A previous Worker creation attempt failed — stay on main thread.
|
|
284
|
-
if (this.workerFailed) {
|
|
285
|
-
return false;
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
return true;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
/**
|
|
292
|
-
* Run `classify(text)` inside a transient Web Worker.
|
|
293
|
-
*
|
|
294
|
-
* Each call creates a new Worker, sends a single `classify` message,
|
|
295
|
-
* awaits the `result` or `error` response, then terminates the Worker.
|
|
296
|
-
*
|
|
297
|
-
* If Worker creation itself throws (e.g. CSP violation), `workerFailed`
|
|
298
|
-
* is set to `true` and the call falls back to the wrapped classifier on
|
|
299
|
-
* the main thread.
|
|
300
|
-
*
|
|
301
|
-
* @param text - The text to classify inside the Worker.
|
|
302
|
-
* @returns A promise resolving with the {@link ClassificationResult}.
|
|
303
|
-
*/
|
|
304
|
-
private async classifyInWorker(text: string): Promise<ClassificationResult> {
|
|
305
|
-
let worker: Worker;
|
|
306
|
-
|
|
307
|
-
try {
|
|
308
|
-
// Resolve the Worker script URL. We use the sibling classifier-worker
|
|
309
|
-
// module. In a bundled environment this will be a blob URL or a
|
|
310
|
-
// `new URL(...)` import; here we use a relative path that bundlers
|
|
311
|
-
// understand via the standard Worker constructor pattern.
|
|
312
|
-
worker = new Worker(new URL('../worker/classifier-worker.ts', import.meta.url), {
|
|
313
|
-
type: 'module',
|
|
314
|
-
});
|
|
315
|
-
} catch (err) {
|
|
316
|
-
// Worker could not be created (CSP, missing support, etc.).
|
|
317
|
-
// Mark as failed and fall back to the main thread.
|
|
318
|
-
this.workerFailed = true;
|
|
319
|
-
console.warn(
|
|
320
|
-
`[WorkerClassifierProxy] Worker creation failed for "${this.wrapped.id}"; ` +
|
|
321
|
-
`falling back to main-thread classification. Reason: ${err}`,
|
|
322
|
-
);
|
|
323
|
-
return this.wrapped.classify(text);
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
// Build the request message.
|
|
327
|
-
const request: WorkerClassifyRequest = {
|
|
328
|
-
type: 'classify',
|
|
329
|
-
text,
|
|
330
|
-
modelId: this.wrapped.modelId,
|
|
331
|
-
// Default to non-quantized; the wrapped classifier's config owns this,
|
|
332
|
-
// but the Worker needs it to load the right model variant.
|
|
333
|
-
quantized: false,
|
|
334
|
-
taskType: 'text-classification',
|
|
335
|
-
};
|
|
336
|
-
|
|
337
|
-
return new Promise<ClassificationResult>((resolve, reject) => {
|
|
338
|
-
// Handle the single response message from the Worker.
|
|
339
|
-
worker.onmessage = (event: MessageEvent<WorkerResponse>) => {
|
|
340
|
-
const message = event.data;
|
|
341
|
-
|
|
342
|
-
if (message.type === 'result') {
|
|
343
|
-
resolve(message.result);
|
|
344
|
-
} else {
|
|
345
|
-
reject(new Error(`Worker classification error: ${message.error}`));
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
// Terminate the Worker after receiving its response to free resources.
|
|
349
|
-
worker.terminate();
|
|
350
|
-
};
|
|
351
|
-
|
|
352
|
-
// Handle any uncaught errors thrown inside the Worker.
|
|
353
|
-
worker.onerror = (errorEvent: ErrorEvent) => {
|
|
354
|
-
reject(
|
|
355
|
-
new Error(
|
|
356
|
-
`Worker runtime error in "${this.wrapped.id}": ${errorEvent.message}`,
|
|
357
|
-
),
|
|
358
|
-
);
|
|
359
|
-
worker.terminate();
|
|
360
|
-
};
|
|
361
|
-
|
|
362
|
-
// Send the classify request to the Worker.
|
|
363
|
-
worker.postMessage(request);
|
|
364
|
-
});
|
|
365
|
-
}
|
|
366
|
-
}
|
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview Web Worker entry point for ML content classification.
|
|
3
|
-
*
|
|
4
|
-
* This script is loaded by {@link WorkerClassifierProxy} as a dedicated Web
|
|
5
|
-
* Worker. It listens for `classify` messages from the main thread, lazily
|
|
6
|
-
* loads the requested model pipeline via `@huggingface/transformers`, runs
|
|
7
|
-
* inference, then posts the result (or an error) back.
|
|
8
|
-
*
|
|
9
|
-
* ## Message protocol
|
|
10
|
-
*
|
|
11
|
-
* **Incoming** (main thread → worker):
|
|
12
|
-
* ```json
|
|
13
|
-
* {
|
|
14
|
-
* "type": "classify",
|
|
15
|
-
* "text": "<string>",
|
|
16
|
-
* "modelId": "<HuggingFace model ID or local path>",
|
|
17
|
-
* "quantized": true | false,
|
|
18
|
-
* "taskType": "<transformers.js task string>"
|
|
19
|
-
* }
|
|
20
|
-
* ```
|
|
21
|
-
*
|
|
22
|
-
* **Outgoing** (worker → main thread) on success:
|
|
23
|
-
* ```json
|
|
24
|
-
* { "type": "result", "result": { "bestClass": "...", "confidence": 0.92, "allScores": [...] } }
|
|
25
|
-
* ```
|
|
26
|
-
*
|
|
27
|
-
* **Outgoing** (worker → main thread) on error:
|
|
28
|
-
* ```json
|
|
29
|
-
* { "type": "error", "error": "<error message>" }
|
|
30
|
-
* ```
|
|
31
|
-
*
|
|
32
|
-
* ## Pipeline caching
|
|
33
|
-
* The pipeline is loaded once per `(modelId, taskType)` key and cached in
|
|
34
|
-
* a module-level `Map`. Subsequent `classify` messages for the same model
|
|
35
|
-
* reuse the cached instance, avoiding repeated expensive model downloads and
|
|
36
|
-
* WASM initialisation.
|
|
37
|
-
*
|
|
38
|
-
* ## Raw label normalisation
|
|
39
|
-
* The worker normalises the raw `@huggingface/transformers` output (an array
|
|
40
|
-
* of `{ label, score }` objects when called with `topk: null`) into the
|
|
41
|
-
* AgentOS {@link ClassificationResult} shape:
|
|
42
|
-
* - `bestClass` — label with the highest score
|
|
43
|
-
* - `confidence` — score of the winning label
|
|
44
|
-
* - `allScores` — all labels mapped to `{ classLabel, score }` pairs
|
|
45
|
-
*
|
|
46
|
-
* @module agentos/extensions/packs/ml-classifiers/worker/classifier-worker
|
|
47
|
-
*/
|
|
48
|
-
|
|
49
|
-
import type { ClassificationResult, ClassificationScore } from '@framers/agentos';
|
|
50
|
-
|
|
51
|
-
// ---------------------------------------------------------------------------
|
|
52
|
-
// Internal message shapes (mirrored from WorkerClassifierProxy for clarity)
|
|
53
|
-
// ---------------------------------------------------------------------------
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* A classification request message received from the main thread.
|
|
57
|
-
*
|
|
58
|
-
* @internal
|
|
59
|
-
*/
|
|
60
|
-
interface ClassifyRequest {
|
|
61
|
-
/** Must be `'classify'` — other message types are silently ignored. */
|
|
62
|
-
type: 'classify';
|
|
63
|
-
|
|
64
|
-
/** The text to pass to the pipeline. */
|
|
65
|
-
text: string;
|
|
66
|
-
|
|
67
|
-
/** Hugging Face model ID (e.g. `'Xenova/toxic-bert'`) or local path. */
|
|
68
|
-
modelId: string;
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Whether to request a quantized (8-bit) model variant.
|
|
72
|
-
* Passed to the pipeline constructor's `{ quantized }` option.
|
|
73
|
-
*/
|
|
74
|
-
quantized: boolean;
|
|
75
|
-
|
|
76
|
-
/**
|
|
77
|
-
* The `@huggingface/transformers` task identifier.
|
|
78
|
-
* Most classifiers use `'text-classification'`.
|
|
79
|
-
*/
|
|
80
|
-
taskType: string;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* A single label/score pair as returned by the transformers.js
|
|
85
|
-
* text-classification pipeline when called with `{ topk: null }`.
|
|
86
|
-
*
|
|
87
|
-
* @internal
|
|
88
|
-
*/
|
|
89
|
-
interface RawLabel {
|
|
90
|
-
/** Classification label name, e.g. `'toxic'`. */
|
|
91
|
-
label: string;
|
|
92
|
-
|
|
93
|
-
/** Confidence score in the range [0, 1]. */
|
|
94
|
-
score: number;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Success response posted to the main thread.
|
|
99
|
-
*
|
|
100
|
-
* @internal
|
|
101
|
-
*/
|
|
102
|
-
interface ResultMessage {
|
|
103
|
-
type: 'result';
|
|
104
|
-
result: ClassificationResult;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Error response posted to the main thread.
|
|
109
|
-
*
|
|
110
|
-
* @internal
|
|
111
|
-
*/
|
|
112
|
-
interface ErrorMessage {
|
|
113
|
-
type: 'error';
|
|
114
|
-
error: string;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// ---------------------------------------------------------------------------
|
|
118
|
-
// Pipeline cache
|
|
119
|
-
// ---------------------------------------------------------------------------
|
|
120
|
-
|
|
121
|
-
/**
|
|
122
|
-
* Cache key composed of `modelId` and `taskType` so different task types
|
|
123
|
-
* for the same model ID are kept separate.
|
|
124
|
-
*
|
|
125
|
-
* @param modelId - Hugging Face model ID or local path.
|
|
126
|
-
* @param taskType - transformers.js task string.
|
|
127
|
-
* @returns Cache key string.
|
|
128
|
-
*/
|
|
129
|
-
function cacheKey(modelId: string, taskType: string): string {
|
|
130
|
-
return `${taskType}::${modelId}`;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* Module-level pipeline cache.
|
|
135
|
-
*
|
|
136
|
-
* Maps cache keys (see {@link cacheKey}) to loaded pipeline functions.
|
|
137
|
-
* Populated lazily on the first `classify` message for each unique
|
|
138
|
-
* `(modelId, taskType)` combination.
|
|
139
|
-
*/
|
|
140
|
-
const pipelineCache = new Map<string, (text: string, opts: { topk: null }) => Promise<RawLabel[]>>();
|
|
141
|
-
|
|
142
|
-
// ---------------------------------------------------------------------------
|
|
143
|
-
// Classification logic
|
|
144
|
-
// ---------------------------------------------------------------------------
|
|
145
|
-
|
|
146
|
-
/**
|
|
147
|
-
* Load (or retrieve from cache) the text-classification pipeline for the
|
|
148
|
-
* given model and run inference on `text`.
|
|
149
|
-
*
|
|
150
|
-
* @param request - The incoming classify request.
|
|
151
|
-
* @returns A promise resolving with the raw label array from the pipeline.
|
|
152
|
-
* @throws If the pipeline fails to load or inference throws.
|
|
153
|
-
*/
|
|
154
|
-
async function runPipeline(request: ClassifyRequest): Promise<RawLabel[]> {
|
|
155
|
-
const key = cacheKey(request.modelId, request.taskType);
|
|
156
|
-
|
|
157
|
-
// Check the cache first to avoid re-loading on every message.
|
|
158
|
-
let pipe = pipelineCache.get(key);
|
|
159
|
-
|
|
160
|
-
if (!pipe) {
|
|
161
|
-
// Lazy-load the @huggingface/transformers package.
|
|
162
|
-
// Dynamic import is used so this module can be evaluated in environments
|
|
163
|
-
// where the package is optional (the Worker is only instantiated when
|
|
164
|
-
// browser runtime is active and the package is present).
|
|
165
|
-
const { pipeline: createPipeline } = await import('@huggingface/transformers');
|
|
166
|
-
|
|
167
|
-
// Create the pipeline with quantisation option from the request.
|
|
168
|
-
const newPipe = await createPipeline(request.taskType, request.modelId, {
|
|
169
|
-
quantized: request.quantized,
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
// Store in cache and narrow the type.
|
|
173
|
-
pipe = newPipe as (text: string, opts: { topk: null }) => Promise<RawLabel[]>;
|
|
174
|
-
pipelineCache.set(key, pipe);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// Run inference — request all label scores (topk: null).
|
|
178
|
-
return pipe(request.text, { topk: null });
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
/**
|
|
182
|
-
* Normalise raw pipeline output into an AgentOS {@link ClassificationResult}.
|
|
183
|
-
*
|
|
184
|
-
* @param raw - Array of `{ label, score }` objects from the pipeline.
|
|
185
|
-
* @returns A fully-populated `ClassificationResult`.
|
|
186
|
-
*/
|
|
187
|
-
function normaliseResult(raw: RawLabel[]): ClassificationResult {
|
|
188
|
-
if (!raw || raw.length === 0) {
|
|
189
|
-
// No output — return a benign pass result so the orchestrator treats this
|
|
190
|
-
// as ALLOW rather than an error.
|
|
191
|
-
return { bestClass: 'benign', confidence: 0, allScores: [] };
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// Find the label with the highest confidence score.
|
|
195
|
-
let best = raw[0];
|
|
196
|
-
for (const item of raw) {
|
|
197
|
-
if (item.score > best.score) {
|
|
198
|
-
best = item;
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
// Map every label to the AgentOS ClassificationScore shape.
|
|
203
|
-
const allScores: ClassificationScore[] = raw.map((item) => ({
|
|
204
|
-
classLabel: item.label,
|
|
205
|
-
score: item.score,
|
|
206
|
-
}));
|
|
207
|
-
|
|
208
|
-
return {
|
|
209
|
-
bestClass: best.label,
|
|
210
|
-
confidence: best.score,
|
|
211
|
-
allScores,
|
|
212
|
-
};
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
// ---------------------------------------------------------------------------
|
|
216
|
-
// Message handler
|
|
217
|
-
// ---------------------------------------------------------------------------
|
|
218
|
-
|
|
219
|
-
/**
|
|
220
|
-
* Handle a `classify` message from the main thread.
|
|
221
|
-
*
|
|
222
|
-
* Runs the pipeline and posts either a {@link ResultMessage} or an
|
|
223
|
-
* {@link ErrorMessage} back to the main thread.
|
|
224
|
-
*
|
|
225
|
-
* @param request - The incoming classify request.
|
|
226
|
-
*/
|
|
227
|
-
async function handleClassify(request: ClassifyRequest): Promise<void> {
|
|
228
|
-
try {
|
|
229
|
-
const raw = await runPipeline(request);
|
|
230
|
-
const result = normaliseResult(raw);
|
|
231
|
-
|
|
232
|
-
const response: ResultMessage = { type: 'result', result };
|
|
233
|
-
self.postMessage(response);
|
|
234
|
-
} catch (err: unknown) {
|
|
235
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
236
|
-
const response: ErrorMessage = { type: 'error', error: message };
|
|
237
|
-
self.postMessage(response);
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
// ---------------------------------------------------------------------------
|
|
242
|
-
// Worker bootstrap — listen for messages from the main thread
|
|
243
|
-
// ---------------------------------------------------------------------------
|
|
244
|
-
|
|
245
|
-
/**
|
|
246
|
-
* The primary message listener for this Worker.
|
|
247
|
-
*
|
|
248
|
-
* Dispatches incoming messages to {@link handleClassify} when the message
|
|
249
|
-
* type is `'classify'`. All other message types are ignored with a warning
|
|
250
|
-
* logged to the Worker console (useful for debugging unexpected messages
|
|
251
|
-
* during development).
|
|
252
|
-
*/
|
|
253
|
-
self.onmessage = (event: MessageEvent) => {
|
|
254
|
-
const data = event.data as ClassifyRequest;
|
|
255
|
-
|
|
256
|
-
if (data?.type === 'classify') {
|
|
257
|
-
// Kick off async classification. Errors are caught inside handleClassify
|
|
258
|
-
// and posted back as ErrorMessage, so we do not need a top-level catch here.
|
|
259
|
-
void handleClassify(data);
|
|
260
|
-
} else {
|
|
261
|
-
// Unknown message type — log and ignore.
|
|
262
|
-
console.warn(
|
|
263
|
-
'[classifier-worker] Received unexpected message type:',
|
|
264
|
-
data?.type ?? data,
|
|
265
|
-
);
|
|
266
|
-
}
|
|
267
|
-
};
|