@artemiskit/cli 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +1 -0
- package/dist/index.js +19129 -20009
- package/dist/src/commands/compare.d.ts.map +1 -1
- package/dist/src/commands/history.d.ts.map +1 -1
- package/dist/src/commands/init.d.ts.map +1 -1
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/report.d.ts.map +1 -1
- package/dist/src/commands/run.d.ts.map +1 -1
- package/dist/src/commands/stress.d.ts.map +1 -1
- package/dist/src/ui/colors.d.ts +44 -0
- package/dist/src/ui/colors.d.ts.map +1 -0
- package/dist/src/ui/errors.d.ts +39 -0
- package/dist/src/ui/errors.d.ts.map +1 -0
- package/dist/src/ui/index.d.ts +16 -0
- package/dist/src/ui/index.d.ts.map +1 -0
- package/dist/src/ui/live-status.d.ts +82 -0
- package/dist/src/ui/live-status.d.ts.map +1 -0
- package/dist/src/ui/panels.d.ts +49 -0
- package/dist/src/ui/panels.d.ts.map +1 -0
- package/dist/src/ui/progress.d.ts +60 -0
- package/dist/src/ui/progress.d.ts.map +1 -0
- package/dist/src/ui/utils.d.ts +42 -0
- package/dist/src/ui/utils.d.ts.map +1 -0
- package/package.json +6 -6
- package/src/__tests__/helpers/index.ts +6 -0
- package/src/__tests__/helpers/mock-adapter.ts +90 -0
- package/src/__tests__/helpers/test-utils.ts +205 -0
- package/src/__tests__/integration/compare-command.test.ts +236 -0
- package/src/__tests__/integration/config.test.ts +125 -0
- package/src/__tests__/integration/history-command.test.ts +251 -0
- package/src/__tests__/integration/init-command.test.ts +177 -0
- package/src/__tests__/integration/report-command.test.ts +245 -0
- package/src/__tests__/integration/ui.test.ts +230 -0
- package/src/commands/compare.ts +158 -49
- package/src/commands/history.ts +131 -30
- package/src/commands/init.ts +181 -21
- package/src/commands/redteam.ts +118 -75
- package/src/commands/report.ts +29 -14
- package/src/commands/run.ts +86 -66
- package/src/commands/stress.ts +61 -63
- package/src/ui/colors.ts +62 -0
- package/src/ui/errors.ts +248 -0
- package/src/ui/index.ts +42 -0
- package/src/ui/live-status.ts +259 -0
- package/src/ui/panels.ts +216 -0
- package/src/ui/progress.ts +139 -0
- package/src/ui/utils.ts +88 -0
package/src/commands/init.ts
CHANGED
|
@@ -2,23 +2,25 @@
|
|
|
2
2
|
* Init command - Initialize ArtemisKit in a project
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import { existsSync } from 'node:fs';
|
|
6
|
+
import { mkdir, readFile, writeFile, appendFile } from 'node:fs/promises';
|
|
6
7
|
import { join } from 'node:path';
|
|
7
8
|
import chalk from 'chalk';
|
|
8
9
|
import { Command } from 'commander';
|
|
10
|
+
import { createSpinner, icons } from '../ui/index.js';
|
|
9
11
|
|
|
10
12
|
const DEFAULT_CONFIG = `# ArtemisKit Configuration
|
|
11
13
|
project: my-project
|
|
12
14
|
|
|
13
15
|
# Default provider settings
|
|
14
16
|
provider: openai
|
|
15
|
-
model: gpt-
|
|
17
|
+
model: gpt-4o-mini
|
|
16
18
|
|
|
17
19
|
# Provider configurations
|
|
18
20
|
providers:
|
|
19
21
|
openai:
|
|
20
22
|
apiKey: \${OPENAI_API_KEY}
|
|
21
|
-
defaultModel: gpt-
|
|
23
|
+
defaultModel: gpt-4o-mini
|
|
22
24
|
|
|
23
25
|
azure-openai:
|
|
24
26
|
apiKey: \${AZURE_OPENAI_API_KEY}
|
|
@@ -26,6 +28,10 @@ providers:
|
|
|
26
28
|
deploymentName: \${AZURE_OPENAI_DEPLOYMENT}
|
|
27
29
|
apiVersion: "2024-02-15-preview"
|
|
28
30
|
|
|
31
|
+
anthropic:
|
|
32
|
+
apiKey: \${ANTHROPIC_API_KEY}
|
|
33
|
+
defaultModel: claude-sonnet-4-20250514
|
|
34
|
+
|
|
29
35
|
# Storage configuration
|
|
30
36
|
storage:
|
|
31
37
|
type: local
|
|
@@ -44,7 +50,7 @@ const DEFAULT_SCENARIO = `name: Example Scenario
|
|
|
44
50
|
description: Basic example scenario for testing
|
|
45
51
|
version: "1.0"
|
|
46
52
|
provider: openai
|
|
47
|
-
model: gpt-
|
|
53
|
+
model: gpt-4o-mini
|
|
48
54
|
temperature: 0
|
|
49
55
|
|
|
50
56
|
cases:
|
|
@@ -69,40 +75,194 @@ cases:
|
|
|
69
75
|
- basic
|
|
70
76
|
`;
|
|
71
77
|
|
|
78
|
+
const ENV_KEYS = [
|
|
79
|
+
'# ArtemisKit Environment Variables',
|
|
80
|
+
'OPENAI_API_KEY=',
|
|
81
|
+
'AZURE_OPENAI_API_KEY=',
|
|
82
|
+
'AZURE_OPENAI_RESOURCE=',
|
|
83
|
+
'AZURE_OPENAI_DEPLOYMENT=',
|
|
84
|
+
'AZURE_OPENAI_API_VERSION=',
|
|
85
|
+
'ANTHROPIC_API_KEY=',
|
|
86
|
+
];
|
|
87
|
+
|
|
88
|
+
function renderWelcomeBanner(): string {
|
|
89
|
+
const lines = [
|
|
90
|
+
'',
|
|
91
|
+
chalk.cyan(' ╔═══════════════════════════════════════════════════════╗'),
|
|
92
|
+
chalk.cyan(' ║ ║'),
|
|
93
|
+
chalk.cyan(' ║') +
|
|
94
|
+
chalk.bold.white(' 🎯 Welcome to ArtemisKit ') +
|
|
95
|
+
chalk.cyan('║'),
|
|
96
|
+
chalk.cyan(' ║') +
|
|
97
|
+
chalk.gray(' LLM Testing & Evaluation Toolkit ') +
|
|
98
|
+
chalk.cyan('║'),
|
|
99
|
+
chalk.cyan(' ║ ║'),
|
|
100
|
+
chalk.cyan(' ╚═══════════════════════════════════════════════════════╝'),
|
|
101
|
+
'',
|
|
102
|
+
];
|
|
103
|
+
return lines.join('\n');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function renderSuccessPanel(): string {
|
|
107
|
+
const lines = [
|
|
108
|
+
'',
|
|
109
|
+
chalk.green(' ╭─────────────────────────────────────────────────────────╮'),
|
|
110
|
+
chalk.green(' │') +
|
|
111
|
+
chalk.bold.green(' ✓ ArtemisKit initialized successfully! ') +
|
|
112
|
+
chalk.green('│'),
|
|
113
|
+
chalk.green(' ├─────────────────────────────────────────────────────────┤'),
|
|
114
|
+
chalk.green(' │ │'),
|
|
115
|
+
chalk.green(' │') +
|
|
116
|
+
chalk.white(' Next steps: ') +
|
|
117
|
+
chalk.green('│'),
|
|
118
|
+
chalk.green(' │ │'),
|
|
119
|
+
chalk.green(' │') +
|
|
120
|
+
chalk.white(' 1. Set your API key: ') +
|
|
121
|
+
chalk.green('│'),
|
|
122
|
+
chalk.green(' │') +
|
|
123
|
+
chalk.cyan(' export OPENAI_API_KEY="sk-..." ') +
|
|
124
|
+
chalk.green('│'),
|
|
125
|
+
chalk.green(' │ │'),
|
|
126
|
+
chalk.green(' │') +
|
|
127
|
+
chalk.white(' 2. Run your first test: ') +
|
|
128
|
+
chalk.green('│'),
|
|
129
|
+
chalk.green(' │') +
|
|
130
|
+
chalk.cyan(' artemiskit run scenarios/example.yaml ') +
|
|
131
|
+
chalk.green('│'),
|
|
132
|
+
chalk.green(' │ │'),
|
|
133
|
+
chalk.green(' │') +
|
|
134
|
+
chalk.white(' 3. View the docs: ') +
|
|
135
|
+
chalk.green('│'),
|
|
136
|
+
chalk.green(' │') +
|
|
137
|
+
chalk.cyan(' https://artemiskit.vercel.app/docs ') +
|
|
138
|
+
chalk.green('│'),
|
|
139
|
+
chalk.green(' │ │'),
|
|
140
|
+
chalk.green(' ╰─────────────────────────────────────────────────────────╯'),
|
|
141
|
+
'',
|
|
142
|
+
];
|
|
143
|
+
return lines.join('\n');
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
async function appendEnvKeys(cwd: string): Promise<{ added: string[]; skipped: string[] }> {
|
|
147
|
+
const envPath = join(cwd, '.env');
|
|
148
|
+
const added: string[] = [];
|
|
149
|
+
const skipped: string[] = [];
|
|
150
|
+
|
|
151
|
+
let existingContent = '';
|
|
152
|
+
if (existsSync(envPath)) {
|
|
153
|
+
existingContent = await readFile(envPath, 'utf-8');
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const linesToAdd: string[] = [];
|
|
157
|
+
|
|
158
|
+
for (const key of ENV_KEYS) {
|
|
159
|
+
// Skip comments
|
|
160
|
+
if (key.startsWith('#')) {
|
|
161
|
+
// Only add comment if we're adding new keys and it's not already there
|
|
162
|
+
if (!existingContent.includes(key)) {
|
|
163
|
+
linesToAdd.push(key);
|
|
164
|
+
}
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const keyName = key.split('=')[0];
|
|
169
|
+
// Check if key already exists (with or without value)
|
|
170
|
+
const keyPattern = new RegExp(`^${keyName}=`, 'm');
|
|
171
|
+
if (keyPattern.test(existingContent)) {
|
|
172
|
+
skipped.push(keyName);
|
|
173
|
+
} else {
|
|
174
|
+
linesToAdd.push(key);
|
|
175
|
+
added.push(keyName);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (linesToAdd.length > 0) {
|
|
180
|
+
// Add newline before our content if file exists and doesn't end with newline
|
|
181
|
+
const prefix =
|
|
182
|
+
existingContent && !existingContent.endsWith('\n') ? '\n\n' : existingContent ? '\n' : '';
|
|
183
|
+
await appendFile(envPath, prefix + linesToAdd.join('\n') + '\n');
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return { added, skipped };
|
|
187
|
+
}
|
|
188
|
+
|
|
72
189
|
export function initCommand(): Command {
|
|
73
190
|
const cmd = new Command('init');
|
|
74
191
|
|
|
75
192
|
cmd
|
|
76
193
|
.description('Initialize ArtemisKit in the current directory')
|
|
77
194
|
.option('-f, --force', 'Overwrite existing configuration')
|
|
78
|
-
.
|
|
195
|
+
.option('--skip-env', 'Skip adding environment variables to .env')
|
|
196
|
+
.action(async (options: { force?: boolean; skipEnv?: boolean }) => {
|
|
197
|
+
const spinner = createSpinner();
|
|
198
|
+
|
|
79
199
|
try {
|
|
80
200
|
const cwd = process.cwd();
|
|
81
201
|
|
|
82
|
-
//
|
|
202
|
+
// Show welcome banner
|
|
203
|
+
console.log(renderWelcomeBanner());
|
|
204
|
+
|
|
205
|
+
// Step 1: Create directories
|
|
206
|
+
spinner.start('Creating project structure...');
|
|
83
207
|
await mkdir(join(cwd, 'scenarios'), { recursive: true });
|
|
84
208
|
await mkdir(join(cwd, 'artemis-runs'), { recursive: true });
|
|
85
209
|
await mkdir(join(cwd, 'artemis-output'), { recursive: true });
|
|
210
|
+
spinner.succeed('Created project structure');
|
|
86
211
|
|
|
87
|
-
// Write config file
|
|
212
|
+
// Step 2: Write config file
|
|
88
213
|
const configPath = join(cwd, 'artemis.config.yaml');
|
|
89
|
-
|
|
90
|
-
console.log(chalk.green('✓'), 'Created artemis.config.yaml');
|
|
214
|
+
const configExists = existsSync(configPath);
|
|
91
215
|
|
|
92
|
-
|
|
216
|
+
if (configExists && !options.force) {
|
|
217
|
+
spinner.info('Config file already exists (use --force to overwrite)');
|
|
218
|
+
} else {
|
|
219
|
+
spinner.start('Writing configuration...');
|
|
220
|
+
await writeFile(configPath, DEFAULT_CONFIG);
|
|
221
|
+
spinner.succeed(
|
|
222
|
+
configExists ? 'Overwrote artemis.config.yaml' : 'Created artemis.config.yaml'
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Step 3: Write example scenario
|
|
93
227
|
const scenarioPath = join(cwd, 'scenarios', 'example.yaml');
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
228
|
+
const scenarioExists = existsSync(scenarioPath);
|
|
229
|
+
|
|
230
|
+
if (scenarioExists && !options.force) {
|
|
231
|
+
spinner.info('Example scenario already exists (use --force to overwrite)');
|
|
232
|
+
} else {
|
|
233
|
+
spinner.start('Creating example scenario...');
|
|
234
|
+
await writeFile(scenarioPath, DEFAULT_SCENARIO);
|
|
235
|
+
spinner.succeed(
|
|
236
|
+
scenarioExists ? 'Overwrote scenarios/example.yaml' : 'Created scenarios/example.yaml'
|
|
237
|
+
);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Step 4: Update .env file
|
|
241
|
+
if (!options.skipEnv) {
|
|
242
|
+
spinner.start('Updating .env file...');
|
|
243
|
+
const { added, skipped } = await appendEnvKeys(cwd);
|
|
244
|
+
|
|
245
|
+
if (added.length > 0) {
|
|
246
|
+
spinner.succeed(`Added ${added.length} environment variable(s) to .env`);
|
|
247
|
+
if (skipped.length > 0) {
|
|
248
|
+
console.log(
|
|
249
|
+
chalk.dim(
|
|
250
|
+
` ${icons.info} Skipped ${skipped.length} existing key(s): ${skipped.join(', ')}`
|
|
251
|
+
)
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
} else if (skipped.length > 0) {
|
|
255
|
+
spinner.info('All environment variables already exist in .env');
|
|
256
|
+
} else {
|
|
257
|
+
spinner.succeed('Created .env with environment variables');
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Show success panel
|
|
262
|
+
console.log(renderSuccessPanel());
|
|
104
263
|
} catch (error) {
|
|
105
|
-
|
|
264
|
+
spinner.fail('Error');
|
|
265
|
+
console.error(chalk.red(`\n${icons.failed} ${(error as Error).message}`));
|
|
106
266
|
process.exit(1);
|
|
107
267
|
}
|
|
108
268
|
});
|
package/src/commands/redteam.ts
CHANGED
|
@@ -30,17 +30,25 @@ import {
|
|
|
30
30
|
} from '@artemiskit/redteam';
|
|
31
31
|
import { generateJSONReport, generateRedTeamHTMLReport } from '@artemiskit/reports';
|
|
32
32
|
import chalk from 'chalk';
|
|
33
|
-
import Table from 'cli-table3';
|
|
34
33
|
import { Command } from 'commander';
|
|
35
34
|
import { nanoid } from 'nanoid';
|
|
36
|
-
import
|
|
37
|
-
import {
|
|
35
|
+
import { loadConfig } from '../config/loader.js';
|
|
36
|
+
import {
|
|
37
|
+
createSpinner,
|
|
38
|
+
renderRedteamSummaryPanel,
|
|
39
|
+
renderError,
|
|
40
|
+
renderInfoBox,
|
|
41
|
+
renderProgressBar,
|
|
42
|
+
getProviderErrorContext,
|
|
43
|
+
isTTY,
|
|
44
|
+
icons,
|
|
45
|
+
} from '../ui/index.js';
|
|
38
46
|
import {
|
|
39
47
|
buildAdapterConfig,
|
|
40
48
|
resolveModelWithSource,
|
|
41
49
|
resolveProviderWithSource,
|
|
42
|
-
} from '../utils/adapter';
|
|
43
|
-
import { createStorage } from '../utils/storage';
|
|
50
|
+
} from '../utils/adapter.js';
|
|
51
|
+
import { createStorage } from '../utils/storage.js';
|
|
44
52
|
|
|
45
53
|
interface RedteamOptions {
|
|
46
54
|
provider?: string;
|
|
@@ -78,7 +86,8 @@ export function redteamCommand(): Command {
|
|
|
78
86
|
'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
|
|
79
87
|
)
|
|
80
88
|
.action(async (scenarioPath: string, options: RedteamOptions) => {
|
|
81
|
-
const spinner =
|
|
89
|
+
const spinner = createSpinner('Loading configuration...');
|
|
90
|
+
spinner.start();
|
|
82
91
|
const startTime = new Date();
|
|
83
92
|
|
|
84
93
|
try {
|
|
@@ -125,10 +134,22 @@ export function redteamCommand(): Command {
|
|
|
125
134
|
const mutations = selectMutations(options.mutations);
|
|
126
135
|
const generator = new RedTeamGenerator(mutations);
|
|
127
136
|
const detector = new UnsafeResponseDetector();
|
|
137
|
+
const count = Number.parseInt(String(options.count)) || 5;
|
|
128
138
|
|
|
139
|
+
// Display configuration using info box
|
|
140
|
+
console.log();
|
|
141
|
+
const configLines = [
|
|
142
|
+
`Mutations: ${mutations.map((m) => m.name).join(', ')}`,
|
|
143
|
+
`Prompts per case: ${count}`,
|
|
144
|
+
`Total cases: ${scenario.cases.length}`,
|
|
145
|
+
];
|
|
146
|
+
if (options.redact) {
|
|
147
|
+
configLines.push(
|
|
148
|
+
`Redaction: enabled${options.redactPatterns ? ` (${options.redactPatterns.join(', ')})` : ''}`
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
console.log(renderInfoBox('Red Team Configuration', configLines));
|
|
129
152
|
console.log();
|
|
130
|
-
console.log(chalk.bold('Red-Team Testing'));
|
|
131
|
-
console.log(chalk.dim(`Mutations: ${mutations.map((m) => m.name).join(', ')}`));
|
|
132
153
|
|
|
133
154
|
// Set up redaction if enabled
|
|
134
155
|
let redactionConfig: RedactionConfig | undefined;
|
|
@@ -143,20 +164,17 @@ export function redteamCommand(): Command {
|
|
|
143
164
|
replacement: '[REDACTED]',
|
|
144
165
|
};
|
|
145
166
|
redactor = new Redactor(redactionConfig);
|
|
146
|
-
console.log(
|
|
147
|
-
chalk.dim(
|
|
148
|
-
`Redaction enabled${options.redactPatterns ? ` with patterns: ${options.redactPatterns.join(', ')}` : ' (default patterns)'}`
|
|
149
|
-
)
|
|
150
|
-
);
|
|
151
167
|
}
|
|
152
|
-
console.log();
|
|
153
168
|
|
|
154
|
-
const count = Number.parseInt(String(options.count)) || 5;
|
|
155
169
|
const results: RedTeamCaseResult[] = [];
|
|
156
170
|
let promptsRedacted = 0;
|
|
157
171
|
let responsesRedacted = 0;
|
|
158
172
|
let totalRedactions = 0;
|
|
159
173
|
|
|
174
|
+
// Calculate total tests for progress
|
|
175
|
+
const totalTests = scenario.cases.length * count;
|
|
176
|
+
let completedTests = 0;
|
|
177
|
+
|
|
160
178
|
// Run mutated tests for each case
|
|
161
179
|
for (const testCase of scenario.cases) {
|
|
162
180
|
console.log(chalk.bold(`Testing case: ${testCase.id}`));
|
|
@@ -169,6 +187,18 @@ export function redteamCommand(): Command {
|
|
|
169
187
|
const mutatedPrompts = generator.generate(originalPrompt, count);
|
|
170
188
|
|
|
171
189
|
for (const mutated of mutatedPrompts) {
|
|
190
|
+
completedTests++;
|
|
191
|
+
|
|
192
|
+
// Show progress bar in TTY mode
|
|
193
|
+
if (isTTY) {
|
|
194
|
+
const progressBar = renderProgressBar(completedTests, totalTests, {
|
|
195
|
+
width: 15,
|
|
196
|
+
showPercentage: true,
|
|
197
|
+
showCount: false,
|
|
198
|
+
});
|
|
199
|
+
process.stdout.write(`\r ${progressBar} `);
|
|
200
|
+
}
|
|
201
|
+
|
|
172
202
|
const requestStart = Date.now();
|
|
173
203
|
try {
|
|
174
204
|
const result = await client.generate({
|
|
@@ -180,14 +210,21 @@ export function redteamCommand(): Command {
|
|
|
180
210
|
const detection = detector.detect(result.text);
|
|
181
211
|
|
|
182
212
|
const resultStatus: RedTeamStatus = detection.unsafe ? 'unsafe' : 'safe';
|
|
183
|
-
|
|
213
|
+
|
|
214
|
+
// Clear progress line
|
|
215
|
+
if (isTTY) {
|
|
216
|
+
process.stdout.write('\r' + ' '.repeat(60) + '\r');
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Display status with appropriate icon
|
|
220
|
+
const statusIcon = detection.unsafe ? icons.failed : icons.passed;
|
|
221
|
+
const statusLabel = detection.unsafe
|
|
184
222
|
? chalk.red(`UNSAFE (${detection.severity})`)
|
|
185
223
|
: chalk.green('SAFE');
|
|
186
|
-
|
|
187
|
-
console.log(` ${statusDisplay} [${mutated.mutations.join(', ')}]`);
|
|
224
|
+
console.log(` ${statusIcon} ${statusLabel} [${mutated.mutations.join(', ')}]`);
|
|
188
225
|
|
|
189
226
|
if (detection.unsafe && options.verbose) {
|
|
190
|
-
console.log(chalk.dim(`
|
|
227
|
+
console.log(chalk.dim(` Reasons: ${detection.reasons.join(', ')}`));
|
|
191
228
|
}
|
|
192
229
|
|
|
193
230
|
// Apply redaction if enabled
|
|
@@ -228,6 +265,11 @@ export function redteamCommand(): Command {
|
|
|
228
265
|
const errorMessage = (error as Error).message;
|
|
229
266
|
const isContentFiltered = isProviderContentFilter(errorMessage);
|
|
230
267
|
|
|
268
|
+
// Clear progress line
|
|
269
|
+
if (isTTY) {
|
|
270
|
+
process.stdout.write('\r' + ' '.repeat(60) + '\r');
|
|
271
|
+
}
|
|
272
|
+
|
|
231
273
|
// Apply redaction to prompt even for errors/blocked
|
|
232
274
|
let errorPrompt = mutated.mutated;
|
|
233
275
|
let errorCaseRedaction: CaseRedactionInfo | undefined;
|
|
@@ -249,8 +291,11 @@ export function redteamCommand(): Command {
|
|
|
249
291
|
|
|
250
292
|
if (isContentFiltered) {
|
|
251
293
|
console.log(
|
|
252
|
-
` ${chalk.cyan('BLOCKED')} [${mutated.mutations.join(', ')}]
|
|
294
|
+
` ${chalk.cyan('⊘')} ${chalk.cyan('BLOCKED')} [${mutated.mutations.join(', ')}]`
|
|
253
295
|
);
|
|
296
|
+
if (options.verbose) {
|
|
297
|
+
console.log(chalk.dim(' Provider content filter triggered'));
|
|
298
|
+
}
|
|
254
299
|
results.push({
|
|
255
300
|
caseId: testCase.id,
|
|
256
301
|
mutation: mutated.mutations.join('+'),
|
|
@@ -264,8 +309,11 @@ export function redteamCommand(): Command {
|
|
|
264
309
|
});
|
|
265
310
|
} else {
|
|
266
311
|
console.log(
|
|
267
|
-
` ${chalk.yellow('ERROR')} [${mutated.mutations.join(', ')}]
|
|
312
|
+
` ${icons.warning} ${chalk.yellow('ERROR')} [${mutated.mutations.join(', ')}]`
|
|
268
313
|
);
|
|
314
|
+
if (options.verbose) {
|
|
315
|
+
console.log(chalk.dim(` ${errorMessage}`));
|
|
316
|
+
}
|
|
269
317
|
results.push({
|
|
270
318
|
caseId: testCase.id,
|
|
271
319
|
mutation: mutated.mutations.join('+'),
|
|
@@ -335,8 +383,48 @@ export function redteamCommand(): Command {
|
|
|
335
383
|
redaction: redactionInfo,
|
|
336
384
|
};
|
|
337
385
|
|
|
338
|
-
// Display summary
|
|
339
|
-
|
|
386
|
+
// Display summary using enhanced panel
|
|
387
|
+
const summaryData = {
|
|
388
|
+
totalCases: metrics.total_tests,
|
|
389
|
+
safeResponses: metrics.safe_responses,
|
|
390
|
+
unsafeResponses: metrics.unsafe_responses,
|
|
391
|
+
blockedResponses: metrics.blocked_responses,
|
|
392
|
+
errorResponses: metrics.error_responses,
|
|
393
|
+
defenseRate: metrics.defense_rate * 100,
|
|
394
|
+
severityBreakdown: metrics.by_severity,
|
|
395
|
+
};
|
|
396
|
+
console.log(renderRedteamSummaryPanel(summaryData));
|
|
397
|
+
|
|
398
|
+
// Show run ID and defense rate detail
|
|
399
|
+
console.log();
|
|
400
|
+
const testableResults = metrics.total_tests - metrics.error_responses;
|
|
401
|
+
console.log(
|
|
402
|
+
chalk.dim(
|
|
403
|
+
`Run ID: ${runId} | Defense Rate: ${(metrics.defense_rate * 100).toFixed(1)}% (${metrics.defended}/${testableResults})`
|
|
404
|
+
)
|
|
405
|
+
);
|
|
406
|
+
|
|
407
|
+
// Show severity breakdown if there were unsafe responses
|
|
408
|
+
if (metrics.unsafe_responses > 0) {
|
|
409
|
+
console.log();
|
|
410
|
+
console.log(chalk.bold('Severity Breakdown:'));
|
|
411
|
+
for (const severity of ['critical', 'high', 'medium', 'low'] as const) {
|
|
412
|
+
if (metrics.by_severity[severity]) {
|
|
413
|
+
const info = SeverityMapper.getInfo(severity);
|
|
414
|
+
console.log(` ${info.label}: ${metrics.by_severity[severity]}`);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
console.log();
|
|
419
|
+
console.log(
|
|
420
|
+
chalk.red(
|
|
421
|
+
`${icons.warning} ${metrics.unsafe_responses} potentially unsafe responses detected`
|
|
422
|
+
)
|
|
423
|
+
);
|
|
424
|
+
} else if (testableResults > 0) {
|
|
425
|
+
console.log();
|
|
426
|
+
console.log(chalk.green(`${icons.passed} No unsafe responses detected`));
|
|
427
|
+
}
|
|
340
428
|
|
|
341
429
|
// Save results if requested
|
|
342
430
|
if (options.save) {
|
|
@@ -372,7 +460,13 @@ export function redteamCommand(): Command {
|
|
|
372
460
|
}
|
|
373
461
|
} catch (error) {
|
|
374
462
|
spinner.fail('Error');
|
|
375
|
-
|
|
463
|
+
|
|
464
|
+
// Display enhanced error message
|
|
465
|
+
const provider = options.provider || 'unknown';
|
|
466
|
+
const errorContext = getProviderErrorContext(provider, error as Error);
|
|
467
|
+
console.log();
|
|
468
|
+
console.log(renderError(errorContext));
|
|
469
|
+
|
|
376
470
|
process.exit(1);
|
|
377
471
|
}
|
|
378
472
|
});
|
|
@@ -458,54 +552,3 @@ function calculateMetrics(results: RedTeamCaseResult[]): RedTeamMetrics {
|
|
|
458
552
|
by_severity: bySeverity,
|
|
459
553
|
};
|
|
460
554
|
}
|
|
461
|
-
|
|
462
|
-
function displaySummary(metrics: RedTeamMetrics, runId: string): void {
|
|
463
|
-
const table = new Table({
|
|
464
|
-
head: [chalk.bold('Metric'), chalk.bold('Value')],
|
|
465
|
-
style: { head: [], border: [] },
|
|
466
|
-
});
|
|
467
|
-
|
|
468
|
-
table.push(
|
|
469
|
-
['Run ID', runId],
|
|
470
|
-
['Total Tests', metrics.total_tests.toString()],
|
|
471
|
-
['Defended', chalk.green(metrics.defended.toString())],
|
|
472
|
-
[` ${chalk.dim('Model handled safely')}`, chalk.green(metrics.safe_responses.toString())],
|
|
473
|
-
[` ${chalk.dim('Provider blocked')}`, chalk.cyan(metrics.blocked_responses.toString())],
|
|
474
|
-
[
|
|
475
|
-
'Unsafe Responses',
|
|
476
|
-
metrics.unsafe_responses > 0 ? chalk.red(metrics.unsafe_responses.toString()) : '0',
|
|
477
|
-
]
|
|
478
|
-
);
|
|
479
|
-
|
|
480
|
-
for (const severity of ['critical', 'high', 'medium', 'low'] as const) {
|
|
481
|
-
if (metrics.by_severity[severity]) {
|
|
482
|
-
const info = SeverityMapper.getInfo(severity);
|
|
483
|
-
table.push([` ${info.label}`, metrics.by_severity[severity].toString()]);
|
|
484
|
-
}
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
if (metrics.error_responses > 0) {
|
|
488
|
-
table.push(['Errors', chalk.yellow(metrics.error_responses.toString())]);
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
console.log(chalk.bold('Summary'));
|
|
492
|
-
console.log(table.toString());
|
|
493
|
-
|
|
494
|
-
// Calculate defense rate (excluding errors from denominator)
|
|
495
|
-
const testableResults = metrics.total_tests - metrics.error_responses;
|
|
496
|
-
if (testableResults > 0) {
|
|
497
|
-
const defenseRate = (metrics.defense_rate * 100).toFixed(1);
|
|
498
|
-
console.log();
|
|
499
|
-
console.log(
|
|
500
|
-
chalk.dim(`Defense Rate: ${defenseRate}% (${metrics.defended}/${testableResults})`)
|
|
501
|
-
);
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
if (metrics.unsafe_responses > 0) {
|
|
505
|
-
console.log();
|
|
506
|
-
console.log(chalk.red(`⚠ ${metrics.unsafe_responses} potentially unsafe responses detected`));
|
|
507
|
-
} else if (testableResults > 0) {
|
|
508
|
-
console.log();
|
|
509
|
-
console.log(chalk.green('✓ No unsafe responses detected'));
|
|
510
|
-
}
|
|
511
|
-
}
|
package/src/commands/report.ts
CHANGED
|
@@ -11,11 +11,10 @@ import {
|
|
|
11
11
|
generateRedTeamHTMLReport,
|
|
12
12
|
generateStressHTMLReport,
|
|
13
13
|
} from '@artemiskit/reports';
|
|
14
|
-
import chalk from 'chalk';
|
|
15
14
|
import { Command } from 'commander';
|
|
16
|
-
import
|
|
17
|
-
import {
|
|
18
|
-
import { createStorage } from '../utils/storage';
|
|
15
|
+
import { loadConfig } from '../config/loader.js';
|
|
16
|
+
import { createSpinner, renderError, renderInfoBox, icons } from '../ui/index.js';
|
|
17
|
+
import { createStorage } from '../utils/storage.js';
|
|
19
18
|
|
|
20
19
|
interface ReportOptions {
|
|
21
20
|
format?: 'html' | 'json' | 'both';
|
|
@@ -74,7 +73,8 @@ export function reportCommand(): Command {
|
|
|
74
73
|
.option('-o, --output <dir>', 'Output directory', './artemis-output')
|
|
75
74
|
.option('--config <path>', 'Path to config file')
|
|
76
75
|
.action(async (runId: string, options: ReportOptions) => {
|
|
77
|
-
const spinner =
|
|
76
|
+
const spinner = createSpinner('Loading run...');
|
|
77
|
+
spinner.start();
|
|
78
78
|
|
|
79
79
|
try {
|
|
80
80
|
const config = await loadConfig(options.config);
|
|
@@ -96,7 +96,7 @@ export function reportCommand(): Command {
|
|
|
96
96
|
const htmlPath = join(outputDir, `${runId}.html`);
|
|
97
97
|
await writeFile(htmlPath, html);
|
|
98
98
|
generatedFiles.push(htmlPath);
|
|
99
|
-
spinner.succeed(`Generated HTML report
|
|
99
|
+
spinner.succeed(`Generated HTML report`);
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
if (format === 'json' || format === 'both') {
|
|
@@ -105,19 +105,34 @@ export function reportCommand(): Command {
|
|
|
105
105
|
const jsonPath = join(outputDir, `${runId}.json`);
|
|
106
106
|
await writeFile(jsonPath, json);
|
|
107
107
|
generatedFiles.push(jsonPath);
|
|
108
|
-
spinner.succeed(`Generated JSON report
|
|
108
|
+
spinner.succeed(`Generated JSON report`);
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
// Show success panel
|
|
111
112
|
console.log();
|
|
112
|
-
console.log(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
113
|
+
console.log(
|
|
114
|
+
renderInfoBox('Report Generated', [
|
|
115
|
+
`Run ID: ${runId}`,
|
|
116
|
+
`Type: ${manifestType}`,
|
|
117
|
+
'',
|
|
118
|
+
'Files:',
|
|
119
|
+
...generatedFiles.map((f) => `${icons.passed} ${f}`),
|
|
120
|
+
])
|
|
121
|
+
);
|
|
118
122
|
} catch (error) {
|
|
119
123
|
spinner.fail('Error');
|
|
120
|
-
console.
|
|
124
|
+
console.log();
|
|
125
|
+
console.log(
|
|
126
|
+
renderError({
|
|
127
|
+
title: 'Failed to Generate Report',
|
|
128
|
+
reason: (error as Error).message,
|
|
129
|
+
suggestions: [
|
|
130
|
+
'Check that the run ID exists',
|
|
131
|
+
'Run "artemiskit history" to see available runs',
|
|
132
|
+
'Verify storage configuration in artemis.config.yaml',
|
|
133
|
+
],
|
|
134
|
+
})
|
|
135
|
+
);
|
|
121
136
|
process.exit(1);
|
|
122
137
|
}
|
|
123
138
|
});
|