@artemiskit/cli 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,14 +19,18 @@ import {
19
19
  parseScenarioFile,
20
20
  } from '@artemiskit/core';
21
21
  import {
22
+ type ConversationTurn,
22
23
  CotInjectionMutation,
24
+ EncodingMutation,
23
25
  InstructionFlipMutation,
26
+ MultiTurnMutation,
24
27
  type Mutation,
25
28
  RedTeamGenerator,
26
29
  RoleSpoofMutation,
27
30
  SeverityMapper,
28
31
  TypoMutation,
29
32
  UnsafeResponseDetector,
33
+ loadCustomAttacks,
30
34
  } from '@artemiskit/redteam';
31
35
  import { generateJSONReport, generateRedTeamHTMLReport } from '@artemiskit/reports';
32
36
  import chalk from 'chalk';
@@ -55,6 +59,7 @@ interface RedteamOptions {
55
59
  model?: string;
56
60
  mutations?: string[];
57
61
  count?: number;
62
+ customAttacks?: string;
58
63
  save?: boolean;
59
64
  output?: string;
60
65
  verbose?: boolean;
@@ -73,9 +78,10 @@ export function redteamCommand(): Command {
73
78
  .option('-m, --model <model>', 'Model to use')
74
79
  .option(
75
80
  '--mutations <mutations...>',
76
- 'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection)'
81
+ 'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn)'
77
82
  )
78
83
  .option('-c, --count <number>', 'Number of mutated prompts per case', '5')
84
+ .option('--custom-attacks <path>', 'Path to custom attacks YAML file')
79
85
  .option('--save', 'Save results to storage')
80
86
  .option('-o, --output <dir>', 'Output directory for reports')
81
87
  .option('-v, --verbose', 'Verbose output')
@@ -131,7 +137,7 @@ export function redteamCommand(): Command {
131
137
  spinner.succeed(`Connected to ${provider}`);
132
138
 
133
139
  // Set up mutations
134
- const mutations = selectMutations(options.mutations);
140
+ const mutations = selectMutations(options.mutations, options.customAttacks);
135
141
  const generator = new RedTeamGenerator(mutations);
136
142
  const detector = new UnsafeResponseDetector();
137
143
  const count = Number.parseInt(String(options.count)) || 5;
@@ -179,12 +185,47 @@ export function redteamCommand(): Command {
179
185
  for (const testCase of scenario.cases) {
180
186
  console.log(chalk.bold(`Testing case: ${testCase.id}`));
181
187
 
182
- const originalPrompt =
183
- typeof testCase.prompt === 'string'
184
- ? testCase.prompt
185
- : testCase.prompt.map((m) => m.content).join('\n');
188
+ // Handle both string and array prompts (consistent with run command)
189
+ // For array prompts: last user message is the attack target, rest is context
190
+ let attackPrompt: string;
191
+ let conversationPrefix: ConversationTurn[] | undefined;
192
+
193
+ if (typeof testCase.prompt === 'string') {
194
+ // Simple string prompt - use directly
195
+ attackPrompt = testCase.prompt;
196
+ } else {
197
+ // Array prompt - extract last user message as attack, rest as context
198
+ const messages = testCase.prompt;
199
+ const lastUserIndex = messages.map((m) => m.role).lastIndexOf('user');
200
+
201
+ if (lastUserIndex === -1) {
202
+ // No user message found - use concatenated content
203
+ attackPrompt = messages.map((m) => m.content).join('\n');
204
+ } else {
205
+ // Extract attack prompt (last user message)
206
+ attackPrompt = messages[lastUserIndex].content;
207
+
208
+ // Extract conversation prefix (everything before the last user message)
209
+ if (lastUserIndex > 0) {
210
+ conversationPrefix = messages.slice(0, lastUserIndex).map((m) => ({
211
+ role: m.role as 'user' | 'assistant' | 'system',
212
+ content: m.content,
213
+ }));
214
+ }
215
+ }
216
+ }
217
+
218
+ // Clear any previous prefix and set new one if applicable
219
+ for (const mutation of mutations) {
220
+ if (mutation instanceof MultiTurnMutation) {
221
+ mutation.clearConversationPrefix();
222
+ if (conversationPrefix && conversationPrefix.length > 0) {
223
+ mutation.setConversationPrefix(conversationPrefix);
224
+ }
225
+ }
226
+ }
186
227
 
187
- const mutatedPrompts = generator.generate(originalPrompt, count);
228
+ const mutatedPrompts = generator.generate(attackPrompt, count);
188
229
 
189
230
  for (const mutated of mutatedPrompts) {
190
231
  completedTests++;
@@ -474,19 +515,31 @@ export function redteamCommand(): Command {
474
515
  return cmd;
475
516
  }
476
517
 
477
- function selectMutations(names?: string[]): Mutation[] {
518
+ function selectMutations(names?: string[], customAttacksPath?: string): Mutation[] {
478
519
  const allMutations: Record<string, Mutation> = {
479
520
  typo: new TypoMutation(),
480
521
  'role-spoof': new RoleSpoofMutation(),
481
522
  'instruction-flip': new InstructionFlipMutation(),
482
523
  'cot-injection': new CotInjectionMutation(),
524
+ encoding: new EncodingMutation(),
525
+ 'multi-turn': new MultiTurnMutation(),
483
526
  };
484
527
 
528
+ let mutations: Mutation[];
529
+
485
530
  if (!names || names.length === 0) {
486
- return Object.values(allMutations);
531
+ mutations = Object.values(allMutations);
532
+ } else {
533
+ mutations = names.filter((name) => name in allMutations).map((name) => allMutations[name]);
534
+ }
535
+
536
+ // Load custom attacks if path provided
537
+ if (customAttacksPath) {
538
+ const customMutations = loadCustomAttacks(customAttacksPath);
539
+ mutations.push(...customMutations);
487
540
  }
488
541
 
489
- return names.filter((name) => name in allMutations).map((name) => allMutations[name]);
542
+ return mutations;
490
543
  }
491
544
 
492
545
  /**