@artemiskit/cli 0.1.8 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +139 -0
- package/bin/artemis.ts +0 -0
- package/dist/index.js +72343 -34002
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/commands/baseline.d.ts +9 -0
- package/dist/src/commands/baseline.d.ts.map +1 -0
- package/dist/src/commands/compare.d.ts.map +1 -1
- package/dist/src/commands/init.d.ts.map +1 -1
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/run.d.ts.map +1 -1
- package/dist/src/commands/stress.d.ts.map +1 -1
- package/dist/src/config/loader.d.ts +3 -1
- package/dist/src/config/loader.d.ts.map +1 -1
- package/dist/src/config/schema.d.ts +16 -0
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/ui/index.d.ts +3 -1
- package/dist/src/ui/index.d.ts.map +1 -1
- package/dist/src/ui/panels.d.ts +21 -0
- package/dist/src/ui/panels.d.ts.map +1 -1
- package/dist/src/ui/prompts.d.ts +92 -0
- package/dist/src/ui/prompts.d.ts.map +1 -0
- package/dist/src/utils/adapter.d.ts.map +1 -1
- package/package.json +6 -6
- package/src/cli.ts +2 -0
- package/src/commands/baseline.ts +473 -0
- package/src/commands/compare.ts +25 -0
- package/src/commands/init.ts +173 -69
- package/src/commands/redteam.ts +63 -10
- package/src/commands/run.ts +863 -141
- package/src/commands/stress.ts +76 -3
- package/src/config/loader.ts +5 -2
- package/src/config/schema.ts +4 -0
- package/src/ui/index.ts +19 -0
- package/src/ui/panels.ts +153 -5
- package/src/ui/prompts.ts +749 -0
- package/src/utils/adapter.ts +15 -0
package/src/commands/init.ts
CHANGED
|
@@ -7,7 +7,13 @@ import { appendFile, mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
|
7
7
|
import { join } from 'node:path';
|
|
8
8
|
import chalk from 'chalk';
|
|
9
9
|
import { Command } from 'commander';
|
|
10
|
-
import {
|
|
10
|
+
import {
|
|
11
|
+
type InitWizardResult,
|
|
12
|
+
createSpinner,
|
|
13
|
+
icons,
|
|
14
|
+
isInteractive,
|
|
15
|
+
runInitWizard,
|
|
16
|
+
} from '../ui/index.js';
|
|
11
17
|
import { checkForUpdateAndNotify, getCurrentVersion } from '../utils/update-checker.js';
|
|
12
18
|
|
|
13
19
|
const DEFAULT_CONFIG = `# ArtemisKit Configuration
|
|
@@ -86,6 +92,67 @@ const ENV_KEYS = [
|
|
|
86
92
|
'ANTHROPIC_API_KEY=',
|
|
87
93
|
];
|
|
88
94
|
|
|
95
|
+
/**
|
|
96
|
+
* Generate config content from wizard results
|
|
97
|
+
*/
|
|
98
|
+
function generateConfigFromWizard(wizard: InitWizardResult): string {
|
|
99
|
+
const providerConfigs: Record<string, string> = {
|
|
100
|
+
openai: ` openai:
|
|
101
|
+
apiKey: \${OPENAI_API_KEY}
|
|
102
|
+
defaultModel: ${wizard.model}`,
|
|
103
|
+
'azure-openai': ` azure-openai:
|
|
104
|
+
apiKey: \${AZURE_OPENAI_API_KEY}
|
|
105
|
+
resourceName: \${AZURE_OPENAI_RESOURCE}
|
|
106
|
+
deploymentName: \${AZURE_OPENAI_DEPLOYMENT}
|
|
107
|
+
apiVersion: "2024-02-15-preview"`,
|
|
108
|
+
anthropic: ` anthropic:
|
|
109
|
+
apiKey: \${ANTHROPIC_API_KEY}
|
|
110
|
+
defaultModel: ${wizard.model}`,
|
|
111
|
+
google: ` google:
|
|
112
|
+
apiKey: \${GOOGLE_AI_API_KEY}
|
|
113
|
+
defaultModel: ${wizard.model}`,
|
|
114
|
+
mistral: ` mistral:
|
|
115
|
+
apiKey: \${MISTRAL_API_KEY}
|
|
116
|
+
defaultModel: ${wizard.model}`,
|
|
117
|
+
ollama: ` ollama:
|
|
118
|
+
baseUrl: http://localhost:11434
|
|
119
|
+
defaultModel: ${wizard.model}`,
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const storageConfig =
|
|
123
|
+
wizard.storageType === 'supabase'
|
|
124
|
+
? `storage:
|
|
125
|
+
type: supabase
|
|
126
|
+
supabaseUrl: \${SUPABASE_URL}
|
|
127
|
+
supabaseKey: \${SUPABASE_ANON_KEY}`
|
|
128
|
+
: `storage:
|
|
129
|
+
type: local
|
|
130
|
+
basePath: ./artemis-runs`;
|
|
131
|
+
|
|
132
|
+
return `# ArtemisKit Configuration
|
|
133
|
+
project: ${wizard.projectName}
|
|
134
|
+
|
|
135
|
+
# Default provider settings
|
|
136
|
+
provider: ${wizard.provider}
|
|
137
|
+
model: ${wizard.model}
|
|
138
|
+
|
|
139
|
+
# Provider configurations
|
|
140
|
+
providers:
|
|
141
|
+
${providerConfigs[wizard.provider] || providerConfigs.openai}
|
|
142
|
+
|
|
143
|
+
# Storage configuration
|
|
144
|
+
${storageConfig}
|
|
145
|
+
|
|
146
|
+
# Scenarios directory
|
|
147
|
+
scenariosDir: ./scenarios
|
|
148
|
+
|
|
149
|
+
# Output settings
|
|
150
|
+
output:
|
|
151
|
+
format: json
|
|
152
|
+
dir: ./artemis-output
|
|
153
|
+
`;
|
|
154
|
+
}
|
|
155
|
+
|
|
89
156
|
function renderWelcomeBanner(): string {
|
|
90
157
|
// Brand color for "KIT" portion: #fb923c (orange)
|
|
91
158
|
const brandColor = chalk.hex('#fb923c');
|
|
@@ -230,82 +297,119 @@ export function initCommand(): Command {
|
|
|
230
297
|
.description('Initialize ArtemisKit in the current directory')
|
|
231
298
|
.option('-f, --force', 'Overwrite existing configuration')
|
|
232
299
|
.option('--skip-env', 'Skip adding environment variables to .env')
|
|
233
|
-
.
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
300
|
+
.option('-i, --interactive', 'Run interactive setup wizard')
|
|
301
|
+
.option('-y, --yes', 'Use defaults without prompts (non-interactive)')
|
|
302
|
+
.action(
|
|
303
|
+
async (options: {
|
|
304
|
+
force?: boolean;
|
|
305
|
+
skipEnv?: boolean;
|
|
306
|
+
interactive?: boolean;
|
|
307
|
+
yes?: boolean;
|
|
308
|
+
}) => {
|
|
309
|
+
const spinner = createSpinner();
|
|
310
|
+
|
|
311
|
+
try {
|
|
312
|
+
const cwd = process.cwd();
|
|
313
|
+
|
|
314
|
+
// Show welcome banner
|
|
315
|
+
console.log(renderWelcomeBanner());
|
|
316
|
+
|
|
317
|
+
// Determine if we should run interactive wizard
|
|
318
|
+
const shouldRunWizard =
|
|
319
|
+
options.interactive || (isInteractive() && !options.yes && !options.force);
|
|
320
|
+
|
|
321
|
+
let configContent = DEFAULT_CONFIG;
|
|
322
|
+
let createExample = true;
|
|
323
|
+
|
|
324
|
+
// Run interactive wizard if applicable
|
|
325
|
+
if (shouldRunWizard) {
|
|
326
|
+
try {
|
|
327
|
+
const wizardResult = await runInitWizard();
|
|
328
|
+
configContent = generateConfigFromWizard(wizardResult);
|
|
329
|
+
createExample = wizardResult.createExample;
|
|
330
|
+
console.log(''); // Add spacing after wizard
|
|
331
|
+
} catch (wizardError) {
|
|
332
|
+
// If wizard fails (e.g., user cancels), fall back to defaults
|
|
333
|
+
if ((wizardError as Error).message?.includes('closed')) {
|
|
334
|
+
console.log(chalk.yellow('\n Setup cancelled. Using defaults.\n'));
|
|
335
|
+
} else {
|
|
336
|
+
throw wizardError;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
262
340
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
spinner.
|
|
269
|
-
} else {
|
|
270
|
-
spinner.start('Creating example scenario...');
|
|
271
|
-
await writeFile(scenarioPath, DEFAULT_SCENARIO);
|
|
272
|
-
spinner.succeed(
|
|
273
|
-
scenarioExists ? 'Overwrote scenarios/example.yaml' : 'Created scenarios/example.yaml'
|
|
274
|
-
);
|
|
275
|
-
}
|
|
341
|
+
// Step 1: Create directories
|
|
342
|
+
spinner.start('Creating project structure...');
|
|
343
|
+
await mkdir(join(cwd, 'scenarios'), { recursive: true });
|
|
344
|
+
await mkdir(join(cwd, 'artemis-runs'), { recursive: true });
|
|
345
|
+
await mkdir(join(cwd, 'artemis-output'), { recursive: true });
|
|
346
|
+
spinner.succeed('Created project structure');
|
|
276
347
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
348
|
+
// Step 2: Write config file
|
|
349
|
+
const configPath = join(cwd, 'artemis.config.yaml');
|
|
350
|
+
const configExists = existsSync(configPath);
|
|
351
|
+
|
|
352
|
+
if (configExists && !options.force) {
|
|
353
|
+
spinner.info('Config file already exists (use --force to overwrite)');
|
|
354
|
+
} else {
|
|
355
|
+
spinner.start('Writing configuration...');
|
|
356
|
+
await writeFile(configPath, configContent);
|
|
357
|
+
spinner.succeed(
|
|
358
|
+
configExists ? 'Overwrote artemis.config.yaml' : 'Created artemis.config.yaml'
|
|
359
|
+
);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Step 3: Write example scenario (if requested)
|
|
363
|
+
if (createExample) {
|
|
364
|
+
const scenarioPath = join(cwd, 'scenarios', 'example.yaml');
|
|
365
|
+
const scenarioExists = existsSync(scenarioPath);
|
|
366
|
+
|
|
367
|
+
if (scenarioExists && !options.force) {
|
|
368
|
+
spinner.info('Example scenario already exists (use --force to overwrite)');
|
|
369
|
+
} else {
|
|
370
|
+
spinner.start('Creating example scenario...');
|
|
371
|
+
await writeFile(scenarioPath, DEFAULT_SCENARIO);
|
|
372
|
+
spinner.succeed(
|
|
373
|
+
scenarioExists
|
|
374
|
+
? 'Overwrote scenarios/example.yaml'
|
|
375
|
+
: 'Created scenarios/example.yaml'
|
|
289
376
|
);
|
|
290
377
|
}
|
|
291
|
-
} else if (skipped.length > 0) {
|
|
292
|
-
spinner.info('All environment variables already exist in .env');
|
|
293
|
-
} else {
|
|
294
|
-
spinner.succeed('Created .env with environment variables');
|
|
295
378
|
}
|
|
296
|
-
}
|
|
297
379
|
|
|
298
|
-
|
|
299
|
-
|
|
380
|
+
// Step 4: Update .env file
|
|
381
|
+
if (!options.skipEnv) {
|
|
382
|
+
spinner.start('Updating .env file...');
|
|
383
|
+
const { added, skipped } = await appendEnvKeys(cwd);
|
|
384
|
+
|
|
385
|
+
if (added.length > 0) {
|
|
386
|
+
spinner.succeed(`Added ${added.length} environment variable(s) to .env`);
|
|
387
|
+
if (skipped.length > 0) {
|
|
388
|
+
console.log(
|
|
389
|
+
chalk.dim(
|
|
390
|
+
` ${icons.info} Skipped ${skipped.length} existing key(s): ${skipped.join(', ')}`
|
|
391
|
+
)
|
|
392
|
+
);
|
|
393
|
+
}
|
|
394
|
+
} else if (skipped.length > 0) {
|
|
395
|
+
spinner.info('All environment variables already exist in .env');
|
|
396
|
+
} else {
|
|
397
|
+
spinner.succeed('Created .env with environment variables');
|
|
398
|
+
}
|
|
399
|
+
}
|
|
300
400
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
401
|
+
// Show success panel
|
|
402
|
+
console.log(renderSuccessPanel());
|
|
403
|
+
|
|
404
|
+
// Non-blocking update check (fire and forget)
|
|
405
|
+
checkForUpdateAndNotify();
|
|
406
|
+
} catch (error) {
|
|
407
|
+
spinner.fail('Error');
|
|
408
|
+
console.error(chalk.red(`\n${icons.failed} ${(error as Error).message}`));
|
|
409
|
+
process.exit(1);
|
|
410
|
+
}
|
|
307
411
|
}
|
|
308
|
-
|
|
412
|
+
);
|
|
309
413
|
|
|
310
414
|
return cmd;
|
|
311
415
|
}
|
package/src/commands/redteam.ts
CHANGED
|
@@ -19,14 +19,18 @@ import {
|
|
|
19
19
|
parseScenarioFile,
|
|
20
20
|
} from '@artemiskit/core';
|
|
21
21
|
import {
|
|
22
|
+
type ConversationTurn,
|
|
22
23
|
CotInjectionMutation,
|
|
24
|
+
EncodingMutation,
|
|
23
25
|
InstructionFlipMutation,
|
|
26
|
+
MultiTurnMutation,
|
|
24
27
|
type Mutation,
|
|
25
28
|
RedTeamGenerator,
|
|
26
29
|
RoleSpoofMutation,
|
|
27
30
|
SeverityMapper,
|
|
28
31
|
TypoMutation,
|
|
29
32
|
UnsafeResponseDetector,
|
|
33
|
+
loadCustomAttacks,
|
|
30
34
|
} from '@artemiskit/redteam';
|
|
31
35
|
import { generateJSONReport, generateRedTeamHTMLReport } from '@artemiskit/reports';
|
|
32
36
|
import chalk from 'chalk';
|
|
@@ -55,6 +59,7 @@ interface RedteamOptions {
|
|
|
55
59
|
model?: string;
|
|
56
60
|
mutations?: string[];
|
|
57
61
|
count?: number;
|
|
62
|
+
customAttacks?: string;
|
|
58
63
|
save?: boolean;
|
|
59
64
|
output?: string;
|
|
60
65
|
verbose?: boolean;
|
|
@@ -73,9 +78,10 @@ export function redteamCommand(): Command {
|
|
|
73
78
|
.option('-m, --model <model>', 'Model to use')
|
|
74
79
|
.option(
|
|
75
80
|
'--mutations <mutations...>',
|
|
76
|
-
'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection)'
|
|
81
|
+
'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn)'
|
|
77
82
|
)
|
|
78
83
|
.option('-c, --count <number>', 'Number of mutated prompts per case', '5')
|
|
84
|
+
.option('--custom-attacks <path>', 'Path to custom attacks YAML file')
|
|
79
85
|
.option('--save', 'Save results to storage')
|
|
80
86
|
.option('-o, --output <dir>', 'Output directory for reports')
|
|
81
87
|
.option('-v, --verbose', 'Verbose output')
|
|
@@ -131,7 +137,7 @@ export function redteamCommand(): Command {
|
|
|
131
137
|
spinner.succeed(`Connected to ${provider}`);
|
|
132
138
|
|
|
133
139
|
// Set up mutations
|
|
134
|
-
const mutations = selectMutations(options.mutations);
|
|
140
|
+
const mutations = selectMutations(options.mutations, options.customAttacks);
|
|
135
141
|
const generator = new RedTeamGenerator(mutations);
|
|
136
142
|
const detector = new UnsafeResponseDetector();
|
|
137
143
|
const count = Number.parseInt(String(options.count)) || 5;
|
|
@@ -179,12 +185,47 @@ export function redteamCommand(): Command {
|
|
|
179
185
|
for (const testCase of scenario.cases) {
|
|
180
186
|
console.log(chalk.bold(`Testing case: ${testCase.id}`));
|
|
181
187
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
188
|
+
// Handle both string and array prompts (consistent with run command)
|
|
189
|
+
// For array prompts: last user message is the attack target, rest is context
|
|
190
|
+
let attackPrompt: string;
|
|
191
|
+
let conversationPrefix: ConversationTurn[] | undefined;
|
|
192
|
+
|
|
193
|
+
if (typeof testCase.prompt === 'string') {
|
|
194
|
+
// Simple string prompt - use directly
|
|
195
|
+
attackPrompt = testCase.prompt;
|
|
196
|
+
} else {
|
|
197
|
+
// Array prompt - extract last user message as attack, rest as context
|
|
198
|
+
const messages = testCase.prompt;
|
|
199
|
+
const lastUserIndex = messages.map((m) => m.role).lastIndexOf('user');
|
|
200
|
+
|
|
201
|
+
if (lastUserIndex === -1) {
|
|
202
|
+
// No user message found - use concatenated content
|
|
203
|
+
attackPrompt = messages.map((m) => m.content).join('\n');
|
|
204
|
+
} else {
|
|
205
|
+
// Extract attack prompt (last user message)
|
|
206
|
+
attackPrompt = messages[lastUserIndex].content;
|
|
207
|
+
|
|
208
|
+
// Extract conversation prefix (everything before the last user message)
|
|
209
|
+
if (lastUserIndex > 0) {
|
|
210
|
+
conversationPrefix = messages.slice(0, lastUserIndex).map((m) => ({
|
|
211
|
+
role: m.role as 'user' | 'assistant' | 'system',
|
|
212
|
+
content: m.content,
|
|
213
|
+
}));
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Clear any previous prefix and set new one if applicable
|
|
219
|
+
for (const mutation of mutations) {
|
|
220
|
+
if (mutation instanceof MultiTurnMutation) {
|
|
221
|
+
mutation.clearConversationPrefix();
|
|
222
|
+
if (conversationPrefix && conversationPrefix.length > 0) {
|
|
223
|
+
mutation.setConversationPrefix(conversationPrefix);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
186
227
|
|
|
187
|
-
const mutatedPrompts = generator.generate(
|
|
228
|
+
const mutatedPrompts = generator.generate(attackPrompt, count);
|
|
188
229
|
|
|
189
230
|
for (const mutated of mutatedPrompts) {
|
|
190
231
|
completedTests++;
|
|
@@ -474,19 +515,31 @@ export function redteamCommand(): Command {
|
|
|
474
515
|
return cmd;
|
|
475
516
|
}
|
|
476
517
|
|
|
477
|
-
function selectMutations(names?: string[]): Mutation[] {
|
|
518
|
+
function selectMutations(names?: string[], customAttacksPath?: string): Mutation[] {
|
|
478
519
|
const allMutations: Record<string, Mutation> = {
|
|
479
520
|
typo: new TypoMutation(),
|
|
480
521
|
'role-spoof': new RoleSpoofMutation(),
|
|
481
522
|
'instruction-flip': new InstructionFlipMutation(),
|
|
482
523
|
'cot-injection': new CotInjectionMutation(),
|
|
524
|
+
encoding: new EncodingMutation(),
|
|
525
|
+
'multi-turn': new MultiTurnMutation(),
|
|
483
526
|
};
|
|
484
527
|
|
|
528
|
+
let mutations: Mutation[];
|
|
529
|
+
|
|
485
530
|
if (!names || names.length === 0) {
|
|
486
|
-
|
|
531
|
+
mutations = Object.values(allMutations);
|
|
532
|
+
} else {
|
|
533
|
+
mutations = names.filter((name) => name in allMutations).map((name) => allMutations[name]);
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Load custom attacks if path provided
|
|
537
|
+
if (customAttacksPath) {
|
|
538
|
+
const customMutations = loadCustomAttacks(customAttacksPath);
|
|
539
|
+
mutations.push(...customMutations);
|
|
487
540
|
}
|
|
488
541
|
|
|
489
|
-
return
|
|
542
|
+
return mutations;
|
|
490
543
|
}
|
|
491
544
|
|
|
492
545
|
/**
|