@framers/agentos-ext-ml-classifiers 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/dist/MLClassifierGuardrail.d.ts +88 -117
  3. package/dist/MLClassifierGuardrail.d.ts.map +1 -1
  4. package/dist/MLClassifierGuardrail.js +255 -264
  5. package/dist/MLClassifierGuardrail.js.map +1 -1
  6. package/dist/classifiers/InjectionClassifier.d.ts +1 -1
  7. package/dist/classifiers/InjectionClassifier.d.ts.map +1 -1
  8. package/dist/classifiers/JailbreakClassifier.d.ts +1 -1
  9. package/dist/classifiers/JailbreakClassifier.d.ts.map +1 -1
  10. package/dist/classifiers/ToxicityClassifier.d.ts +1 -1
  11. package/dist/classifiers/ToxicityClassifier.d.ts.map +1 -1
  12. package/dist/classifiers/WorkerClassifierProxy.d.ts +1 -1
  13. package/dist/classifiers/WorkerClassifierProxy.d.ts.map +1 -1
  14. package/dist/index.d.ts +16 -90
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +33 -306
  17. package/dist/index.js.map +1 -1
  18. package/dist/keyword-classifier.d.ts +26 -0
  19. package/dist/keyword-classifier.d.ts.map +1 -0
  20. package/dist/keyword-classifier.js +113 -0
  21. package/dist/keyword-classifier.js.map +1 -0
  22. package/dist/llm-classifier.d.ts +27 -0
  23. package/dist/llm-classifier.d.ts.map +1 -0
  24. package/dist/llm-classifier.js +129 -0
  25. package/dist/llm-classifier.js.map +1 -0
  26. package/dist/tools/ClassifyContentTool.d.ts +53 -80
  27. package/dist/tools/ClassifyContentTool.d.ts.map +1 -1
  28. package/dist/tools/ClassifyContentTool.js +52 -103
  29. package/dist/tools/ClassifyContentTool.js.map +1 -1
  30. package/dist/types.d.ts +77 -277
  31. package/dist/types.d.ts.map +1 -1
  32. package/dist/types.js +9 -55
  33. package/dist/types.js.map +1 -1
  34. package/package.json +10 -16
  35. package/src/MLClassifierGuardrail.ts +279 -316
  36. package/src/index.ts +35 -339
  37. package/src/keyword-classifier.ts +130 -0
  38. package/src/llm-classifier.ts +163 -0
  39. package/src/tools/ClassifyContentTool.ts +75 -132
  40. package/src/types.ts +78 -325
  41. package/test/ClassifierOrchestrator.spec.ts +365 -0
  42. package/test/ClassifyContentTool.spec.ts +226 -0
  43. package/test/InjectionClassifier.spec.ts +263 -0
  44. package/test/JailbreakClassifier.spec.ts +295 -0
  45. package/test/MLClassifierGuardrail.spec.ts +486 -0
  46. package/test/SlidingWindowBuffer.spec.ts +391 -0
  47. package/test/ToxicityClassifier.spec.ts +268 -0
  48. package/test/WorkerClassifierProxy.spec.ts +303 -0
  49. package/test/index.spec.ts +431 -0
  50. package/tsconfig.json +20 -0
  51. package/vitest.config.ts +24 -0
@@ -0,0 +1,365 @@
1
+ /**
2
+ * @fileoverview Unit tests for `ClassifierOrchestrator`.
3
+ *
4
+ * Tests verify:
5
+ * - Classifiers run in parallel (total time < sum of individual latencies)
6
+ * - Worst-wins aggregation: any BLOCK → overall BLOCK
7
+ * - FLAG > ALLOW in aggregation
8
+ * - All pass → ALLOW with triggeredBy null
9
+ * - triggeredBy identifies the classifier that caused escalation
10
+ * - Single classifier failure does not block others (contributes ALLOW)
11
+ * - Per-classifier threshold overrides work correctly
12
+ * - dispose() calls dispose on all classifiers
13
+ */
14
+
15
+ import { describe, it, expect, vi } from 'vitest';
16
+ import { ClassifierOrchestrator } from '../src/ClassifierOrchestrator';
17
+ import type { IContentClassifier } from '../src/IContentClassifier';
18
+ import type { ClassificationResult } from '@framers/agentos';
19
+ import type { ClassifierThresholds } from '../src/types';
20
+ import { DEFAULT_THRESHOLDS } from '../src/types';
21
+ import { GuardrailAction } from '@framers/agentos';
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Mock classifier factory
25
+ // ---------------------------------------------------------------------------
26
+
27
+ /**
28
+ * Create a mock classifier that returns a configurable result after an
29
+ * optional simulated delay. The `dispose` method is a vitest spy so
30
+ * callers can assert it was invoked.
31
+ *
32
+ * @param id - Unique classifier ID.
33
+ * @param result - The classification result to return.
34
+ * @param delayMs - Simulated inference latency (ms).
35
+ * @param shouldFail - If true, `classify()` rejects with an error.
36
+ */
37
+ function createMockClassifier(
38
+ id: string,
39
+ result: ClassificationResult,
40
+ delayMs = 0,
41
+ shouldFail = false,
42
+ ): IContentClassifier & { dispose: ReturnType<typeof vi.fn> } {
43
+ return {
44
+ id,
45
+ displayName: `Mock ${id}`,
46
+ description: `Mock classifier: ${id}`,
47
+ modelId: `mock/${id}`,
48
+ isLoaded: true,
49
+ classify: async (_text: string): Promise<ClassificationResult> => {
50
+ if (delayMs > 0) {
51
+ await new Promise((r) => setTimeout(r, delayMs));
52
+ }
53
+ if (shouldFail) {
54
+ throw new Error(`${id} inference failed`);
55
+ }
56
+ return result;
57
+ },
58
+ dispose: vi.fn(async () => {}),
59
+ };
60
+ }
61
+
62
+ // ---------------------------------------------------------------------------
63
+ // Helpers — pre-built classification results
64
+ // ---------------------------------------------------------------------------
65
+
66
+ /** A benign result with very low confidence. */
67
+ const BENIGN_RESULT: ClassificationResult = {
68
+ bestClass: 'benign',
69
+ confidence: 0.1,
70
+ allScores: [{ classLabel: 'benign', score: 0.1 }],
71
+ };
72
+
73
+ /** A toxic result with confidence above the default block threshold (0.9). */
74
+ const TOXIC_BLOCK_RESULT: ClassificationResult = {
75
+ bestClass: 'toxic',
76
+ confidence: 0.95,
77
+ allScores: [{ classLabel: 'toxic', score: 0.95 }],
78
+ };
79
+
80
+ /** A flaggable result — confidence between flag (0.7) and block (0.9). */
81
+ const FLAG_RESULT: ClassificationResult = {
82
+ bestClass: 'suspicious',
83
+ confidence: 0.75,
84
+ allScores: [{ classLabel: 'suspicious', score: 0.75 }],
85
+ };
86
+
87
+ /** A warn-level result — confidence between warn (0.4) and flag (0.7). */
88
+ const WARN_RESULT: ClassificationResult = {
89
+ bestClass: 'mildly_suspicious',
90
+ confidence: 0.5,
91
+ allScores: [{ classLabel: 'mildly_suspicious', score: 0.5 }],
92
+ };
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // Tests
96
+ // ---------------------------------------------------------------------------
97
+
98
+ describe('ClassifierOrchestrator', () => {
99
+ // -----------------------------------------------------------------------
100
+ // Parallel execution
101
+ // -----------------------------------------------------------------------
102
+
103
+ it('runs classifiers in parallel (total time < sum of individual latencies)', async () => {
104
+ // Each classifier takes 50ms. If run sequentially total would be ~150ms.
105
+ const classifiers = [
106
+ createMockClassifier('a', BENIGN_RESULT, 50),
107
+ createMockClassifier('b', BENIGN_RESULT, 50),
108
+ createMockClassifier('c', BENIGN_RESULT, 50),
109
+ ];
110
+
111
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
112
+
113
+ const start = performance.now();
114
+ await orchestrator.classifyAll('hello');
115
+ const elapsed = performance.now() - start;
116
+
117
+ // Parallel execution should complete in roughly 50ms + overhead,
118
+ // well under the sequential total of 150ms.
119
+ expect(elapsed).toBeLessThan(130);
120
+ });
121
+
122
+ // -----------------------------------------------------------------------
123
+ // Worst-wins aggregation
124
+ // -----------------------------------------------------------------------
125
+
126
+ it('worst-wins: any BLOCK → result is BLOCK', async () => {
127
+ const classifiers = [
128
+ createMockClassifier('clean', BENIGN_RESULT),
129
+ createMockClassifier('toxic', TOXIC_BLOCK_RESULT),
130
+ ];
131
+
132
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
133
+ const result = await orchestrator.classifyAll('bad text');
134
+
135
+ expect(result.recommendedAction).toBe(GuardrailAction.BLOCK);
136
+ expect(result.triggeredBy).toBe('toxic');
137
+ });
138
+
139
+ it('FLAG > ALLOW in aggregation', async () => {
140
+ const classifiers = [
141
+ createMockClassifier('clean', BENIGN_RESULT),
142
+ createMockClassifier('flagger', FLAG_RESULT),
143
+ ];
144
+
145
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
146
+ const result = await orchestrator.classifyAll('some text');
147
+
148
+ expect(result.recommendedAction).toBe(GuardrailAction.FLAG);
149
+ expect(result.triggeredBy).toBe('flagger');
150
+ });
151
+
152
+ it('BLOCK wins over FLAG in aggregation', async () => {
153
+ const classifiers = [
154
+ createMockClassifier('flagger', FLAG_RESULT),
155
+ createMockClassifier('blocker', TOXIC_BLOCK_RESULT),
156
+ ];
157
+
158
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
159
+ const result = await orchestrator.classifyAll('bad text');
160
+
161
+ expect(result.recommendedAction).toBe(GuardrailAction.BLOCK);
162
+ expect(result.triggeredBy).toBe('blocker');
163
+ });
164
+
165
+ // -----------------------------------------------------------------------
166
+ // All pass → ALLOW
167
+ // -----------------------------------------------------------------------
168
+
169
+ it('all pass → ALLOW with triggeredBy null', async () => {
170
+ const classifiers = [
171
+ createMockClassifier('a', BENIGN_RESULT),
172
+ createMockClassifier('b', BENIGN_RESULT),
173
+ ];
174
+
175
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
176
+ const result = await orchestrator.classifyAll('hello world');
177
+
178
+ expect(result.recommendedAction).toBe(GuardrailAction.ALLOW);
179
+ expect(result.triggeredBy).toBeNull();
180
+ expect(result.results).toHaveLength(2);
181
+ });
182
+
183
+ // -----------------------------------------------------------------------
184
+ // triggeredBy identification
185
+ // -----------------------------------------------------------------------
186
+
187
+ it('triggeredBy identifies which classifier triggered the action', async () => {
188
+ const classifiers = [
189
+ createMockClassifier('safe', BENIGN_RESULT),
190
+ createMockClassifier('injection-detector', TOXIC_BLOCK_RESULT),
191
+ createMockClassifier('also-safe', BENIGN_RESULT),
192
+ ];
193
+
194
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
195
+ const result = await orchestrator.classifyAll('inject this');
196
+
197
+ expect(result.triggeredBy).toBe('injection-detector');
198
+ });
199
+
200
+ // -----------------------------------------------------------------------
201
+ // Classifier failure handling
202
+ // -----------------------------------------------------------------------
203
+
204
+ it('single classifier failure does not block others (contributes ALLOW)', async () => {
205
+ const classifiers = [
206
+ createMockClassifier('broken', BENIGN_RESULT, 0, /* shouldFail */ true),
207
+ createMockClassifier('working', BENIGN_RESULT),
208
+ ];
209
+
210
+ const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
211
+
212
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
213
+ const result = await orchestrator.classifyAll('test');
214
+
215
+ // Only the working classifier's result should appear.
216
+ expect(result.results).toHaveLength(1);
217
+ expect(result.results[0].classifierId).toBe('working');
218
+ expect(result.recommendedAction).toBe(GuardrailAction.ALLOW);
219
+
220
+ // A warning should have been logged for the broken classifier.
221
+ expect(warnSpy).toHaveBeenCalledWith(
222
+ expect.stringContaining('Classifier "broken" failed'),
223
+ );
224
+
225
+ warnSpy.mockRestore();
226
+ });
227
+
228
+ it('failure of one classifier does not suppress BLOCK from another', async () => {
229
+ const classifiers = [
230
+ createMockClassifier('broken', BENIGN_RESULT, 0, true),
231
+ createMockClassifier('blocker', TOXIC_BLOCK_RESULT),
232
+ ];
233
+
234
+ vi.spyOn(console, 'warn').mockImplementation(() => {});
235
+
236
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
237
+ const result = await orchestrator.classifyAll('toxic input');
238
+
239
+ expect(result.recommendedAction).toBe(GuardrailAction.BLOCK);
240
+ expect(result.triggeredBy).toBe('blocker');
241
+
242
+ vi.restoreAllMocks();
243
+ });
244
+
245
+ // -----------------------------------------------------------------------
246
+ // Per-classifier threshold overrides
247
+ // -----------------------------------------------------------------------
248
+
249
+ it('per-classifier threshold overrides work', async () => {
250
+ // Create a classifier whose confidence (0.75) would normally be FLAG
251
+ // with default thresholds, but we lower the block threshold to 0.6
252
+ // for this specific classifier.
253
+ const classifiers = [createMockClassifier('custom', FLAG_RESULT)];
254
+
255
+ const perClassifierThresholds: Record<string, Partial<ClassifierThresholds>> = {
256
+ custom: { blockThreshold: 0.6 },
257
+ };
258
+
259
+ const orchestrator = new ClassifierOrchestrator(
260
+ classifiers,
261
+ DEFAULT_THRESHOLDS,
262
+ perClassifierThresholds,
263
+ );
264
+
265
+ const result = await orchestrator.classifyAll('test');
266
+
267
+ // With block threshold at 0.6 and confidence at 0.75, this should BLOCK.
268
+ expect(result.recommendedAction).toBe(GuardrailAction.BLOCK);
269
+ expect(result.triggeredBy).toBe('custom');
270
+ });
271
+
272
+ it('per-classifier overrides do not affect other classifiers', async () => {
273
+ const classifiers = [
274
+ createMockClassifier('overridden', WARN_RESULT),
275
+ createMockClassifier('default', WARN_RESULT),
276
+ ];
277
+
278
+ const perClassifierThresholds: Record<string, Partial<ClassifierThresholds>> = {
279
+ // Lower warn threshold for 'overridden' so 0.5 becomes FLAG-level
280
+ overridden: { flagThreshold: 0.45 },
281
+ };
282
+
283
+ const orchestrator = new ClassifierOrchestrator(
284
+ classifiers,
285
+ DEFAULT_THRESHOLDS,
286
+ perClassifierThresholds,
287
+ );
288
+
289
+ const result = await orchestrator.classifyAll('test');
290
+
291
+ // 'overridden' at 0.5 with flagThreshold=0.45 → FLAG.
292
+ // 'default' at 0.5 with flagThreshold=0.7 → SANITIZE (warn).
293
+ // Worst wins: FLAG > SANITIZE → FLAG.
294
+ expect(result.recommendedAction).toBe(GuardrailAction.FLAG);
295
+ expect(result.triggeredBy).toBe('overridden');
296
+ });
297
+
298
+ // -----------------------------------------------------------------------
299
+ // Result metadata
300
+ // -----------------------------------------------------------------------
301
+
302
+ it('includes totalLatencyMs as wall time', async () => {
303
+ const classifiers = [createMockClassifier('a', BENIGN_RESULT, 20)];
304
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
305
+ const result = await orchestrator.classifyAll('text');
306
+
307
+ expect(result.totalLatencyMs).toBeGreaterThanOrEqual(0);
308
+ });
309
+
310
+ it('annotates each result with classifierId and latencyMs', async () => {
311
+ const classifiers = [
312
+ createMockClassifier('alpha', BENIGN_RESULT),
313
+ createMockClassifier('beta', FLAG_RESULT),
314
+ ];
315
+
316
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
317
+ const result = await orchestrator.classifyAll('text');
318
+
319
+ expect(result.results).toHaveLength(2);
320
+ expect(result.results[0].classifierId).toBe('alpha');
321
+ expect(result.results[1].classifierId).toBe('beta');
322
+
323
+ for (const r of result.results) {
324
+ expect(r.latencyMs).toBeGreaterThanOrEqual(0);
325
+ expect(r.bestClass).toBeDefined();
326
+ expect(r.confidence).toBeDefined();
327
+ }
328
+ });
329
+
330
+ // -----------------------------------------------------------------------
331
+ // dispose
332
+ // -----------------------------------------------------------------------
333
+
334
+ it('dispose calls dispose on all classifiers', async () => {
335
+ const classifiers = [
336
+ createMockClassifier('a', BENIGN_RESULT),
337
+ createMockClassifier('b', BENIGN_RESULT),
338
+ createMockClassifier('c', BENIGN_RESULT),
339
+ ];
340
+
341
+ const orchestrator = new ClassifierOrchestrator(classifiers, DEFAULT_THRESHOLDS);
342
+ await orchestrator.dispose();
343
+
344
+ for (const c of classifiers) {
345
+ expect(c.dispose).toHaveBeenCalledOnce();
346
+ }
347
+ });
348
+
349
+ it('dispose handles classifiers without dispose method', async () => {
350
+ const classifier: IContentClassifier = {
351
+ id: 'no-dispose',
352
+ displayName: 'No Dispose',
353
+ description: 'Test',
354
+ modelId: 'test',
355
+ isLoaded: true,
356
+ classify: async () => BENIGN_RESULT,
357
+ // No dispose method.
358
+ };
359
+
360
+ const orchestrator = new ClassifierOrchestrator([classifier], DEFAULT_THRESHOLDS);
361
+
362
+ // Should not throw.
363
+ await expect(orchestrator.dispose()).resolves.toBeUndefined();
364
+ });
365
+ });
@@ -0,0 +1,226 @@
1
+ /**
2
+ * @fileoverview Unit tests for `ClassifyContentTool`.
3
+ *
4
+ * Tests verify:
5
+ * - Has correct ITool properties (id, name, displayName, etc.)
6
+ * - inputSchema has text (required) and classifiers (optional)
7
+ * - execute returns ChunkEvaluation with results for toxic text
8
+ * - Returns ALLOW for benign text
9
+ * - Returns error for empty text
10
+ */
11
+
12
+ import { describe, it, expect, vi } from 'vitest';
13
+ import { ClassifyContentTool } from '../src/tools/ClassifyContentTool';
14
+ import { ClassifierOrchestrator } from '../src/ClassifierOrchestrator';
15
+ import type { IContentClassifier } from '../src/IContentClassifier';
16
+ import type { ClassificationResult } from '@framers/agentos';
17
+ import { DEFAULT_THRESHOLDS } from '../src/types';
18
+ import { GuardrailAction } from '@framers/agentos';
19
+ import type { ToolExecutionContext } from '@framers/agentos';
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Mock helpers
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /**
26
+ * Create a mock classifier returning a configurable result.
27
+ */
28
+ function createMockClassifier(
29
+ id: string,
30
+ result: ClassificationResult,
31
+ ): IContentClassifier {
32
+ return {
33
+ id,
34
+ displayName: `Mock ${id}`,
35
+ description: `Mock classifier: ${id}`,
36
+ modelId: `mock/${id}`,
37
+ isLoaded: true,
38
+ classify: vi.fn(async () => result),
39
+ dispose: vi.fn(async () => {}),
40
+ };
41
+ }
42
+
43
+ /** Benign result — low confidence. */
44
+ const BENIGN: ClassificationResult = {
45
+ bestClass: 'benign',
46
+ confidence: 0.1,
47
+ allScores: [{ classLabel: 'benign', score: 0.1 }],
48
+ };
49
+
50
+ /** Toxic result — above default block threshold. */
51
+ const TOXIC: ClassificationResult = {
52
+ bestClass: 'toxic',
53
+ confidence: 0.95,
54
+ allScores: [{ classLabel: 'toxic', score: 0.95 }],
55
+ };
56
+
57
+ /** Minimal execution context for tool invocation. */
58
+ const EXEC_CONTEXT: ToolExecutionContext = {
59
+ gmiId: 'gmi-1',
60
+ personaId: 'persona-1',
61
+ userContext: { userId: 'user-1' } as any,
62
+ };
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // Tests
66
+ // ---------------------------------------------------------------------------
67
+
68
+ describe('ClassifyContentTool', () => {
69
+ // -----------------------------------------------------------------------
70
+ // ITool metadata
71
+ // -----------------------------------------------------------------------
72
+
73
+ describe('ITool properties', () => {
74
+ it('has correct id and name', () => {
75
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
76
+ const tool = new ClassifyContentTool(orchestrator);
77
+
78
+ expect(tool.id).toBe('classify_content');
79
+ expect(tool.name).toBe('classify_content');
80
+ });
81
+
82
+ it('has correct displayName and description', () => {
83
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
84
+ const tool = new ClassifyContentTool(orchestrator);
85
+
86
+ expect(tool.displayName).toBe('Content Safety Classifier');
87
+ expect(tool.description).toContain('toxicity');
88
+ expect(tool.description).toContain('prompt injection');
89
+ expect(tool.description).toContain('jailbreak');
90
+ });
91
+
92
+ it('has category=security and version=1.0.0', () => {
93
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
94
+ const tool = new ClassifyContentTool(orchestrator);
95
+
96
+ expect(tool.category).toBe('security');
97
+ expect(tool.version).toBe('1.0.0');
98
+ });
99
+
100
+ it('has hasSideEffects=false', () => {
101
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
102
+ const tool = new ClassifyContentTool(orchestrator);
103
+
104
+ expect(tool.hasSideEffects).toBe(false);
105
+ });
106
+ });
107
+
108
+ // -----------------------------------------------------------------------
109
+ // inputSchema
110
+ // -----------------------------------------------------------------------
111
+
112
+ describe('inputSchema', () => {
113
+ it('has text as a required property', () => {
114
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
115
+ const tool = new ClassifyContentTool(orchestrator);
116
+
117
+ expect(tool.inputSchema.type).toBe('object');
118
+ expect(tool.inputSchema.properties.text).toBeDefined();
119
+ expect(tool.inputSchema.properties.text.type).toBe('string');
120
+ expect(tool.inputSchema.required).toContain('text');
121
+ });
122
+
123
+ it('has classifiers as an optional array property', () => {
124
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
125
+ const tool = new ClassifyContentTool(orchestrator);
126
+
127
+ const classifiersProp = tool.inputSchema.properties.classifiers;
128
+ expect(classifiersProp).toBeDefined();
129
+ expect(classifiersProp.type).toBe('array');
130
+ expect(classifiersProp.items.type).toBe('string');
131
+
132
+ // Should NOT be in the required list.
133
+ expect(tool.inputSchema.required).not.toContain('classifiers');
134
+ });
135
+ });
136
+
137
+ // -----------------------------------------------------------------------
138
+ // execute
139
+ // -----------------------------------------------------------------------
140
+
141
+ describe('execute', () => {
142
+ it('returns ChunkEvaluation with results for toxic text', async () => {
143
+ const classifier = createMockClassifier('tox', TOXIC);
144
+ const orchestrator = new ClassifierOrchestrator([classifier], DEFAULT_THRESHOLDS);
145
+ const tool = new ClassifyContentTool(orchestrator);
146
+
147
+ const result = await tool.execute({ text: 'you are terrible' }, EXEC_CONTEXT);
148
+
149
+ expect(result.success).toBe(true);
150
+ expect(result.output).toBeDefined();
151
+ expect(result.output!.recommendedAction).toBe(GuardrailAction.BLOCK);
152
+ expect(result.output!.results).toHaveLength(1);
153
+ expect(result.output!.results[0].classifierId).toBe('tox');
154
+ expect(result.output!.triggeredBy).toBe('tox');
155
+ });
156
+
157
+ it('returns ALLOW for benign text', async () => {
158
+ const classifier = createMockClassifier('safe', BENIGN);
159
+ const orchestrator = new ClassifierOrchestrator([classifier], DEFAULT_THRESHOLDS);
160
+ const tool = new ClassifyContentTool(orchestrator);
161
+
162
+ const result = await tool.execute({ text: 'hello world' }, EXEC_CONTEXT);
163
+
164
+ expect(result.success).toBe(true);
165
+ expect(result.output).toBeDefined();
166
+ expect(result.output!.recommendedAction).toBe(GuardrailAction.ALLOW);
167
+ expect(result.output!.triggeredBy).toBeNull();
168
+ });
169
+
170
+ it('returns error for empty text', async () => {
171
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
172
+ const tool = new ClassifyContentTool(orchestrator);
173
+
174
+ const result = await tool.execute({ text: '' }, EXEC_CONTEXT);
175
+
176
+ expect(result.success).toBe(false);
177
+ expect(result.error).toContain('required');
178
+ });
179
+
180
+ it('returns error for whitespace-only text', async () => {
181
+ const orchestrator = new ClassifierOrchestrator([], DEFAULT_THRESHOLDS);
182
+ const tool = new ClassifyContentTool(orchestrator);
183
+
184
+ const result = await tool.execute({ text: ' ' }, EXEC_CONTEXT);
185
+
186
+ expect(result.success).toBe(false);
187
+ expect(result.error).toContain('required');
188
+ });
189
+
190
+ it('handles orchestrator errors gracefully', async () => {
191
+ // Create a classifier that always throws.
192
+ const brokenClassifier: IContentClassifier = {
193
+ id: 'broken',
194
+ displayName: 'Broken',
195
+ description: 'Always fails',
196
+ modelId: 'broken',
197
+ isLoaded: true,
198
+ classify: async () => { throw new Error('model crash'); },
199
+ };
200
+
201
+ // Even though the classifier throws, the orchestrator catches it via
202
+ // allSettled, so the tool should still succeed with ALLOW.
203
+ const orchestrator = new ClassifierOrchestrator([brokenClassifier], DEFAULT_THRESHOLDS);
204
+ vi.spyOn(console, 'warn').mockImplementation(() => {});
205
+
206
+ const tool = new ClassifyContentTool(orchestrator);
207
+ const result = await tool.execute({ text: 'test' }, EXEC_CONTEXT);
208
+
209
+ expect(result.success).toBe(true);
210
+ expect(result.output!.recommendedAction).toBe(GuardrailAction.ALLOW);
211
+
212
+ vi.restoreAllMocks();
213
+ });
214
+
215
+ it('includes totalLatencyMs in output', async () => {
216
+ const classifier = createMockClassifier('safe', BENIGN);
217
+ const orchestrator = new ClassifierOrchestrator([classifier], DEFAULT_THRESHOLDS);
218
+ const tool = new ClassifyContentTool(orchestrator);
219
+
220
+ const result = await tool.execute({ text: 'test' }, EXEC_CONTEXT);
221
+
222
+ expect(result.success).toBe(true);
223
+ expect(result.output!.totalLatencyMs).toBeGreaterThanOrEqual(0);
224
+ });
225
+ });
226
+ });