@artemiskit/sdk 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ /**
2
+ * @artemiskit/sdk
3
+ * Tests for ArtemisKit class
4
+ */
5
+
6
+ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
7
+
8
+ // Mock modules before importing ArtemisKit
9
+ vi.mock('@artemiskit/core', () => ({
10
+ createAdapter: vi.fn().mockResolvedValue({
11
+ provider: 'mock',
12
+ generate: vi.fn().mockResolvedValue({
13
+ id: 'test-id',
14
+ model: 'mock-model',
15
+ text: 'Hello, world!',
16
+ tokens: { prompt: 10, completion: 5, total: 15 },
17
+ latencyMs: 100,
18
+ finishReason: 'stop',
19
+ }),
20
+ capabilities: vi.fn().mockResolvedValue({
21
+ streaming: false,
22
+ functionCalling: false,
23
+ toolUse: false,
24
+ maxContext: 4096,
25
+ }),
26
+ close: vi.fn(),
27
+ }),
28
+ parseScenarioFile: vi.fn().mockResolvedValue({
29
+ name: 'Test Scenario',
30
+ version: '1.0',
31
+ model: 'mock-model',
32
+ cases: [
33
+ {
34
+ id: 'case-1',
35
+ name: 'Test Case 1',
36
+ prompt: 'Say hello',
37
+ expected: { type: 'contains', values: ['Hello'], mode: 'any' },
38
+ tags: ['greeting'],
39
+ metadata: {},
40
+ retries: 0,
41
+ },
42
+ {
43
+ id: 'case-2',
44
+ name: 'Test Case 2',
45
+ prompt: 'Say goodbye',
46
+ expected: { type: 'contains', values: ['Goodbye'], mode: 'any' },
47
+ tags: ['farewell'],
48
+ metadata: {},
49
+ retries: 0,
50
+ },
51
+ ],
52
+ tags: [],
53
+ }),
54
+ // biome-ignore lint/suspicious/noExplicitAny: Mock helper
55
+ runScenario: vi.fn().mockImplementation(async (options: any) => {
56
+ const cases = options.scenario.cases;
57
+ // biome-ignore lint/suspicious/noExplicitAny: Mock helper
58
+ const results = cases.map((c: any, i: number) => {
59
+ const result = {
60
+ id: c.id,
61
+ name: c.name,
62
+ ok: true,
63
+ score: 1,
64
+ matcherType: c.expected.type,
65
+ reason: 'Passed',
66
+ latencyMs: 100 + i * 10,
67
+ tokens: { prompt: 10, completion: 5, total: 15 },
68
+ prompt: c.prompt,
69
+ response: 'Hello, world!',
70
+ expected: c.expected,
71
+ tags: c.tags,
72
+ };
73
+ options.onCaseComplete?.(result, i, cases.length);
74
+ return result;
75
+ });
76
+
77
+ return {
78
+ manifest: {
79
+ version: '1.0',
80
+ run_id: 'test-run-id',
81
+ project: options.project || 'default',
82
+ start_time: new Date().toISOString(),
83
+ end_time: new Date().toISOString(),
84
+ duration_ms: 200,
85
+ config: {
86
+ scenario: options.scenario.name,
87
+ provider: 'mock',
88
+ model: 'mock-model',
89
+ },
90
+ metrics: {
91
+ success_rate: 1,
92
+ total_cases: cases.length,
93
+ passed_cases: cases.length,
94
+ failed_cases: 0,
95
+ median_latency_ms: 105,
96
+ p95_latency_ms: 110,
97
+ total_tokens: 30,
98
+ total_prompt_tokens: 20,
99
+ total_completion_tokens: 10,
100
+ },
101
+ git: {
102
+ commit: 'abc123',
103
+ branch: 'main',
104
+ dirty: false,
105
+ },
106
+ provenance: {
107
+ run_by: 'test',
108
+ },
109
+ cases: results,
110
+ environment: {
111
+ node_version: process.version,
112
+ platform: process.platform,
113
+ arch: process.arch,
114
+ },
115
+ },
116
+ cases: results,
117
+ success: true,
118
+ };
119
+ }),
120
+ getGitInfo: vi.fn().mockResolvedValue({
121
+ commit: 'abc123',
122
+ branch: 'main',
123
+ dirty: false,
124
+ }),
125
+ estimateCost: vi.fn(),
126
+ formatCost: vi.fn(),
127
+ getModelPricing: vi.fn(),
128
+ }));
129
+
130
+ vi.mock('@artemiskit/redteam', () => {
131
+ return {
132
+ TypoMutation: class {
133
+ name = 'typo';
134
+ description = 'Typo mutation';
135
+ severity = 'low' as const;
136
+ mutate(prompt: string) {
137
+ return `typo: ${prompt}`;
138
+ }
139
+ },
140
+ RoleSpoofMutation: class {
141
+ name = 'role-spoof';
142
+ description = 'Role spoof mutation';
143
+ severity = 'medium' as const;
144
+ mutate(prompt: string) {
145
+ return `role: ${prompt}`;
146
+ }
147
+ },
148
+ InstructionFlipMutation: class {
149
+ name = 'instruction-flip';
150
+ description = 'Instruction flip mutation';
151
+ severity = 'medium' as const;
152
+ mutate(prompt: string) {
153
+ return `flip: ${prompt}`;
154
+ }
155
+ },
156
+ CotInjectionMutation: class {
157
+ name = 'cot-injection';
158
+ description = 'COT injection mutation';
159
+ severity = 'high' as const;
160
+ mutate(prompt: string) {
161
+ return `cot: ${prompt}`;
162
+ }
163
+ },
164
+ EncodingMutation: class {
165
+ name = 'encoding';
166
+ description = 'Encoding mutation';
167
+ severity = 'medium' as const;
168
+ mutate(prompt: string) {
169
+ return `enc: ${prompt}`;
170
+ }
171
+ },
172
+ MultiTurnMutation: class {
173
+ name = 'multi-turn';
174
+ description = 'Multi-turn mutation';
175
+ severity = 'high' as const;
176
+ mutate(prompt: string) {
177
+ return `multi: ${prompt}`;
178
+ }
179
+ },
180
+ RedTeamGenerator: class {
181
+ // biome-ignore lint/suspicious/noExplicitAny: Mock helper
182
+ mutations: any[];
183
+ // biome-ignore lint/suspicious/noExplicitAny: Mock helper
184
+ constructor(mutations?: any[]) {
185
+ this.mutations = mutations ?? [];
186
+ }
187
+ generate(prompt: string, count: number) {
188
+ return Array.from({ length: count }, (_, i) => ({
189
+ original: prompt,
190
+ mutated: `mutated-${i}: ${prompt}`,
191
+ mutations: ['typo'],
192
+ severity: 'low' as const,
193
+ }));
194
+ }
195
+ listMutations() {
196
+ // biome-ignore lint/suspicious/noExplicitAny: Mock helper
197
+ return this.mutations.map((m: any) => ({
198
+ name: m.name,
199
+ description: m.description,
200
+ severity: m.severity,
201
+ }));
202
+ }
203
+ },
204
+ UnsafeResponseDetector: class {
205
+ detect() {
206
+ return { unsafe: false, blocked: false, reasons: [], severity: 'none' };
207
+ }
208
+ },
209
+ SeverityMapper: class {
210
+ mapMutationToSeverity() {
211
+ return 'none';
212
+ }
213
+ },
214
+ };
215
+ });
216
+
217
+ vi.mock('nanoid', () => ({
218
+ nanoid: () => 'test-id-12345',
219
+ }));
220
+
221
+ // Import after mocks are set up
222
+ import { ArtemisKit } from '../artemiskit';
223
+
224
+ describe('ArtemisKit', () => {
225
+ let kit: ArtemisKit;
226
+
227
+ beforeEach(() => {
228
+ kit = new ArtemisKit({
229
+ project: 'test-project',
230
+ provider: 'openai',
231
+ model: 'gpt-4',
232
+ });
233
+ });
234
+
235
+ afterEach(() => {
236
+ vi.clearAllMocks();
237
+ });
238
+
239
+ describe('constructor', () => {
240
+ it('should create instance with default config', () => {
241
+ const defaultKit = new ArtemisKit();
242
+ expect(defaultKit).toBeInstanceOf(ArtemisKit);
243
+ });
244
+
245
+ it('should create instance with custom config', () => {
246
+ const customKit = new ArtemisKit({
247
+ project: 'my-project',
248
+ provider: 'azure-openai',
249
+ model: 'gpt-4-turbo',
250
+ timeout: 30000,
251
+ retries: 3,
252
+ concurrency: 5,
253
+ });
254
+ expect(customKit).toBeInstanceOf(ArtemisKit);
255
+ });
256
+ });
257
+
258
+ describe('event handling', () => {
259
+ it('should register and emit caseComplete events', async () => {
260
+ const handler = vi.fn();
261
+ kit.onCaseComplete(handler);
262
+
263
+ await kit.run({ scenario: './test.yaml' });
264
+
265
+ expect(handler).toHaveBeenCalledTimes(2);
266
+ expect(handler).toHaveBeenCalledWith(
267
+ expect.objectContaining({
268
+ result: expect.objectContaining({ id: 'case-1' }),
269
+ index: 0,
270
+ total: 2,
271
+ })
272
+ );
273
+ });
274
+
275
+ it('should register and emit progress events', async () => {
276
+ const handler = vi.fn();
277
+ kit.onProgress(handler);
278
+
279
+ await kit.run({ scenario: './test.yaml' });
280
+
281
+ expect(handler).toHaveBeenCalled();
282
+ expect(handler).toHaveBeenCalledWith(
283
+ expect.objectContaining({
284
+ phase: expect.stringMatching(/setup|running|teardown/),
285
+ })
286
+ );
287
+ });
288
+
289
+ it('should support once() for one-time handlers', async () => {
290
+ const handler = vi.fn();
291
+ kit.once('progress', handler);
292
+
293
+ await kit.run({ scenario: './test.yaml' });
294
+
295
+ // Handler should have been called only once despite multiple progress events
296
+ expect(handler).toHaveBeenCalledTimes(1);
297
+ });
298
+
299
+ it('should support off() to remove handlers', async () => {
300
+ const handler = vi.fn();
301
+ kit.on('progress', handler);
302
+ kit.off('progress', handler);
303
+
304
+ await kit.run({ scenario: './test.yaml' });
305
+
306
+ expect(handler).not.toHaveBeenCalled();
307
+ });
308
+
309
+ it('should chain event registration methods', () => {
310
+ const result = kit
311
+ .onCaseStart(() => {})
312
+ .onCaseComplete(() => {})
313
+ .onProgress(() => {});
314
+
315
+ expect(result).toBe(kit);
316
+ });
317
+ });
318
+
319
+ describe('run()', () => {
320
+ it('should run a scenario from file path', async () => {
321
+ const result = await kit.run({ scenario: './test.yaml' });
322
+
323
+ expect(result.success).toBe(true);
324
+ expect(result.manifest.metrics.total_cases).toBe(2);
325
+ expect(result.manifest.metrics.passed_cases).toBe(2);
326
+ });
327
+
328
+ it('should run a scenario from inline object', async () => {
329
+ const scenario = {
330
+ name: 'Inline Scenario',
331
+ version: '1.0',
332
+ model: 'gpt-4',
333
+ cases: [
334
+ {
335
+ id: 'inline-1',
336
+ name: 'Inline Test',
337
+ prompt: 'Hello',
338
+ expected: {
339
+ type: 'contains' as const,
340
+ values: ['Hello'],
341
+ mode: 'any' as const,
342
+ },
343
+ tags: [],
344
+ metadata: {},
345
+ retries: 0,
346
+ },
347
+ ],
348
+ tags: [],
349
+ };
350
+
351
+ const result = await kit.run({ scenario });
352
+
353
+ expect(result.success).toBe(true);
354
+ });
355
+
356
+ it('should pass tags filter to runner', async () => {
357
+ const result = await kit.run({
358
+ scenario: './test.yaml',
359
+ tags: ['greeting'],
360
+ });
361
+
362
+ expect(result.success).toBe(true);
363
+ });
364
+
365
+ it('should use custom concurrency', async () => {
366
+ const result = await kit.run({
367
+ scenario: './test.yaml',
368
+ concurrency: 5,
369
+ });
370
+
371
+ expect(result.success).toBe(true);
372
+ });
373
+
374
+ it('should use custom timeout', async () => {
375
+ const result = await kit.run({
376
+ scenario: './test.yaml',
377
+ timeout: 60000,
378
+ });
379
+
380
+ expect(result.success).toBe(true);
381
+ });
382
+ });
383
+
384
+ describe('getAvailableMutations()', () => {
385
+ it('should return list of available mutations', () => {
386
+ const mutations = kit.getAvailableMutations();
387
+
388
+ expect(Array.isArray(mutations)).toBe(true);
389
+ expect(mutations).toContain('typo');
390
+ expect(mutations).toContain('role-spoof');
391
+ expect(mutations).toContain('encoding');
392
+ });
393
+ });
394
+ });
395
+
396
+ describe('ArtemisKit with custom client', () => {
397
+ it('should use provided client instead of creating one', async () => {
398
+ const mockClient = {
399
+ provider: 'custom',
400
+ generate: vi.fn().mockResolvedValue({
401
+ id: 'custom-id',
402
+ model: 'custom-model',
403
+ text: 'Custom response',
404
+ tokens: { prompt: 5, completion: 3, total: 8 },
405
+ latencyMs: 50,
406
+ finishReason: 'stop',
407
+ }),
408
+ capabilities: vi.fn().mockResolvedValue({
409
+ streaming: false,
410
+ functionCalling: false,
411
+ toolUse: false,
412
+ maxContext: 4096,
413
+ }),
414
+ };
415
+
416
+ const kit = new ArtemisKit();
417
+ const result = await kit.run({
418
+ scenario: './test.yaml',
419
+ // @ts-expect-error - Testing with mock client
420
+ client: mockClient,
421
+ });
422
+
423
+ expect(result.success).toBe(true);
424
+ });
425
+ });