@artemiskit/cli 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -0
- package/artemis-runs/my-project/-sEsU7KtJ7VE.json +188 -0
- package/bin/artemis.ts +13 -0
- package/dist/bin/artemis.d.ts +6 -0
- package/dist/bin/artemis.d.ts.map +1 -0
- package/dist/index.js +51297 -0
- package/dist/src/adapters.d.ts +6 -0
- package/dist/src/adapters.d.ts.map +1 -0
- package/dist/src/cli.d.ts +6 -0
- package/dist/src/cli.d.ts.map +1 -0
- package/dist/src/commands/compare.d.ts +6 -0
- package/dist/src/commands/compare.d.ts.map +1 -0
- package/dist/src/commands/history.d.ts +6 -0
- package/dist/src/commands/history.d.ts.map +1 -0
- package/dist/src/commands/index.d.ts +8 -0
- package/dist/src/commands/index.d.ts.map +1 -0
- package/dist/src/commands/init.d.ts +6 -0
- package/dist/src/commands/init.d.ts.map +1 -0
- package/dist/src/commands/redteam.d.ts +6 -0
- package/dist/src/commands/redteam.d.ts.map +1 -0
- package/dist/src/commands/report.d.ts +6 -0
- package/dist/src/commands/report.d.ts.map +1 -0
- package/dist/src/commands/run.d.ts +6 -0
- package/dist/src/commands/run.d.ts.map +1 -0
- package/dist/src/commands/stress.d.ts +6 -0
- package/dist/src/commands/stress.d.ts.map +1 -0
- package/dist/src/config/index.d.ts +6 -0
- package/dist/src/config/index.d.ts.map +1 -0
- package/dist/src/config/loader.d.ts +13 -0
- package/dist/src/config/loader.d.ts.map +1 -0
- package/dist/src/config/schema.d.ts +215 -0
- package/dist/src/config/schema.d.ts.map +1 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/utils/adapter.d.ts +71 -0
- package/dist/src/utils/adapter.d.ts.map +1 -0
- package/dist/src/utils/storage.d.ts +22 -0
- package/dist/src/utils/storage.d.ts.map +1 -0
- package/package.json +65 -0
- package/src/adapters.ts +33 -0
- package/src/cli.ts +34 -0
- package/src/commands/compare.ts +104 -0
- package/src/commands/history.ts +80 -0
- package/src/commands/index.ts +8 -0
- package/src/commands/init.ts +111 -0
- package/src/commands/redteam.ts +511 -0
- package/src/commands/report.ts +126 -0
- package/src/commands/run.ts +233 -0
- package/src/commands/stress.ts +501 -0
- package/src/config/index.ts +6 -0
- package/src/config/loader.ts +112 -0
- package/src/config/schema.ts +56 -0
- package/src/index.ts +6 -0
- package/src/utils/adapter.ts +542 -0
- package/src/utils/storage.ts +67 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run command - Execute test scenarios
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
type RedactionConfig,
|
|
7
|
+
createAdapter,
|
|
8
|
+
parseScenarioFile,
|
|
9
|
+
runScenario,
|
|
10
|
+
} from '@artemiskit/core';
|
|
11
|
+
import chalk from 'chalk';
|
|
12
|
+
import Table from 'cli-table3';
|
|
13
|
+
import { Command } from 'commander';
|
|
14
|
+
import ora from 'ora';
|
|
15
|
+
import { loadConfig } from '../config/loader';
|
|
16
|
+
import {
|
|
17
|
+
buildAdapterConfig,
|
|
18
|
+
resolveModelWithSource,
|
|
19
|
+
resolveProviderWithSource,
|
|
20
|
+
} from '../utils/adapter';
|
|
21
|
+
import { createStorage } from '../utils/storage';
|
|
22
|
+
|
|
23
|
+
interface RunOptions {
|
|
24
|
+
provider?: string;
|
|
25
|
+
model?: string;
|
|
26
|
+
output?: string;
|
|
27
|
+
verbose?: boolean;
|
|
28
|
+
tags?: string[];
|
|
29
|
+
save?: boolean;
|
|
30
|
+
concurrency?: number;
|
|
31
|
+
timeout?: number;
|
|
32
|
+
retries?: number;
|
|
33
|
+
config?: string;
|
|
34
|
+
redact?: boolean;
|
|
35
|
+
redactPatterns?: string[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function runCommand(): Command {
|
|
39
|
+
const cmd = new Command('run');
|
|
40
|
+
|
|
41
|
+
cmd
|
|
42
|
+
.description('Run test scenarios against an LLM')
|
|
43
|
+
.argument('<scenario>', 'Path to scenario YAML file')
|
|
44
|
+
.option('-p, --provider <provider>', 'Provider to use (openai, azure-openai, vercel-ai)')
|
|
45
|
+
.option('-m, --model <model>', 'Model to use')
|
|
46
|
+
.option('-o, --output <dir>', 'Output directory for results')
|
|
47
|
+
.option('-v, --verbose', 'Verbose output')
|
|
48
|
+
.option('-t, --tags <tags...>', 'Filter test cases by tags')
|
|
49
|
+
.option('--save', 'Save results to storage', true)
|
|
50
|
+
.option('-c, --concurrency <number>', 'Number of concurrent test cases', '1')
|
|
51
|
+
.option('--timeout <ms>', 'Timeout per test case in milliseconds')
|
|
52
|
+
.option('--retries <number>', 'Number of retries per test case')
|
|
53
|
+
.option('--config <path>', 'Path to config file')
|
|
54
|
+
.option('--redact', 'Enable PII/sensitive data redaction in results')
|
|
55
|
+
.option(
|
|
56
|
+
'--redact-patterns <patterns...>',
|
|
57
|
+
'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
|
|
58
|
+
)
|
|
59
|
+
.action(async (scenarioPath: string, options: RunOptions) => {
|
|
60
|
+
const spinner = ora('Loading configuration...').start();
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
// Load config file if present
|
|
64
|
+
const config = await loadConfig(options.config);
|
|
65
|
+
if (config) {
|
|
66
|
+
spinner.succeed(`Loaded config from ${(config as { _path?: string })._path}`);
|
|
67
|
+
} else {
|
|
68
|
+
spinner.info('No config file found, using defaults');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Parse scenario
|
|
72
|
+
spinner.start('Loading scenario...');
|
|
73
|
+
const scenario = await parseScenarioFile(scenarioPath);
|
|
74
|
+
spinner.succeed(`Loaded scenario: ${scenario.name}`);
|
|
75
|
+
|
|
76
|
+
// Resolve provider and model with precedence and source tracking:
|
|
77
|
+
// CLI > Scenario > Config > Default
|
|
78
|
+
const { provider, source: providerSource } = resolveProviderWithSource(
|
|
79
|
+
options.provider,
|
|
80
|
+
scenario.provider,
|
|
81
|
+
config?.provider
|
|
82
|
+
);
|
|
83
|
+
const { model, source: modelSource } = resolveModelWithSource(
|
|
84
|
+
options.model,
|
|
85
|
+
scenario.model,
|
|
86
|
+
config?.model
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
// Build adapter config with full precedence chain and source tracking
|
|
90
|
+
spinner.start(`Connecting to ${provider}...`);
|
|
91
|
+
const { adapterConfig, resolvedConfig } = buildAdapterConfig({
|
|
92
|
+
provider,
|
|
93
|
+
model,
|
|
94
|
+
providerSource,
|
|
95
|
+
modelSource,
|
|
96
|
+
scenarioConfig: scenario.providerConfig,
|
|
97
|
+
fileConfig: config,
|
|
98
|
+
});
|
|
99
|
+
const client = await createAdapter(adapterConfig);
|
|
100
|
+
spinner.succeed(`Connected to ${provider}`);
|
|
101
|
+
|
|
102
|
+
console.log();
|
|
103
|
+
console.log(chalk.bold(`Running scenario: ${scenario.name}`));
|
|
104
|
+
console.log();
|
|
105
|
+
|
|
106
|
+
// Build redaction config from CLI options
|
|
107
|
+
let redaction: RedactionConfig | undefined;
|
|
108
|
+
if (options.redact) {
|
|
109
|
+
redaction = {
|
|
110
|
+
enabled: true,
|
|
111
|
+
patterns: options.redactPatterns,
|
|
112
|
+
redactPrompts: true,
|
|
113
|
+
redactResponses: true,
|
|
114
|
+
redactMetadata: false,
|
|
115
|
+
replacement: '[REDACTED]',
|
|
116
|
+
};
|
|
117
|
+
console.log(
|
|
118
|
+
chalk.dim(
|
|
119
|
+
`Redaction enabled${options.redactPatterns ? ` with patterns: ${options.redactPatterns.join(', ')}` : ' (default patterns)'}`
|
|
120
|
+
)
|
|
121
|
+
);
|
|
122
|
+
console.log();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Run scenario using core runner
|
|
126
|
+
const result = await runScenario({
|
|
127
|
+
scenario,
|
|
128
|
+
client,
|
|
129
|
+
project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
|
|
130
|
+
resolvedConfig,
|
|
131
|
+
tags: options.tags,
|
|
132
|
+
concurrency: Number.parseInt(String(options.concurrency)) || 1,
|
|
133
|
+
timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
|
|
134
|
+
retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
|
|
135
|
+
redaction,
|
|
136
|
+
onCaseComplete: (caseResult) => {
|
|
137
|
+
const statusIcon = caseResult.ok ? chalk.green('✓') : chalk.red('✗');
|
|
138
|
+
const scoreStr = `(${(caseResult.score * 100).toFixed(0)}%)`;
|
|
139
|
+
console.log(`${statusIcon} ${caseResult.id} ${chalk.dim(scoreStr)}`);
|
|
140
|
+
|
|
141
|
+
if (!caseResult.ok && options.verbose) {
|
|
142
|
+
console.log(chalk.dim(` Reason: ${caseResult.reason}`));
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
onProgress: (message) => {
|
|
146
|
+
if (options.verbose) {
|
|
147
|
+
console.log(chalk.dim(message));
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
// Display summary
|
|
153
|
+
console.log();
|
|
154
|
+
displaySummary(result.manifest.metrics, result.manifest.run_id, result.manifest.redaction);
|
|
155
|
+
|
|
156
|
+
// Save results
|
|
157
|
+
if (options.save) {
|
|
158
|
+
spinner.start('Saving results...');
|
|
159
|
+
const storage = createStorage({ fileConfig: config });
|
|
160
|
+
const path = await storage.save(result.manifest);
|
|
161
|
+
spinner.succeed(`Results saved: ${path}`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Exit with error if any tests failed
|
|
165
|
+
if (!result.success) {
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
} catch (error) {
|
|
169
|
+
spinner.fail('Error');
|
|
170
|
+
console.error(chalk.red('Error:'), (error as Error).message);
|
|
171
|
+
if (options.verbose) {
|
|
172
|
+
console.error((error as Error).stack);
|
|
173
|
+
}
|
|
174
|
+
process.exit(1);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
return cmd;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function displaySummary(
|
|
182
|
+
metrics: {
|
|
183
|
+
success_rate: number;
|
|
184
|
+
total_cases: number;
|
|
185
|
+
passed_cases: number;
|
|
186
|
+
failed_cases: number;
|
|
187
|
+
median_latency_ms: number;
|
|
188
|
+
total_tokens: number;
|
|
189
|
+
},
|
|
190
|
+
runId: string,
|
|
191
|
+
redaction?: {
|
|
192
|
+
enabled: boolean;
|
|
193
|
+
summary: {
|
|
194
|
+
promptsRedacted: number;
|
|
195
|
+
responsesRedacted: number;
|
|
196
|
+
totalRedactions: number;
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
): void {
|
|
200
|
+
const table = new Table({
|
|
201
|
+
head: [chalk.bold('Metric'), chalk.bold('Value')],
|
|
202
|
+
style: { head: [], border: [] },
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const successColor =
|
|
206
|
+
metrics.success_rate >= 0.9
|
|
207
|
+
? chalk.green
|
|
208
|
+
: metrics.success_rate >= 0.7
|
|
209
|
+
? chalk.yellow
|
|
210
|
+
: chalk.red;
|
|
211
|
+
|
|
212
|
+
table.push(
|
|
213
|
+
['Run ID', runId],
|
|
214
|
+
['Success Rate', successColor(`${(metrics.success_rate * 100).toFixed(1)}%`)],
|
|
215
|
+
['Passed', chalk.green(metrics.passed_cases.toString())],
|
|
216
|
+
['Failed', metrics.failed_cases > 0 ? chalk.red(metrics.failed_cases.toString()) : '0'],
|
|
217
|
+
['Median Latency', `${metrics.median_latency_ms}ms`],
|
|
218
|
+
['Total Tokens', metrics.total_tokens.toLocaleString()]
|
|
219
|
+
);
|
|
220
|
+
|
|
221
|
+
// Add redaction info if enabled
|
|
222
|
+
if (redaction?.enabled) {
|
|
223
|
+
table.push(
|
|
224
|
+
['Redaction', chalk.yellow('Enabled')],
|
|
225
|
+
[
|
|
226
|
+
'Redactions Made',
|
|
227
|
+
`${redaction.summary.totalRedactions} (${redaction.summary.promptsRedacted} prompts, ${redaction.summary.responsesRedacted} responses)`,
|
|
228
|
+
]
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
console.log(table.toString());
|
|
233
|
+
}
|