@artemiskit/cli 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +62 -0
  2. package/artemis-runs/my-project/-sEsU7KtJ7VE.json +188 -0
  3. package/bin/artemis.ts +13 -0
  4. package/dist/bin/artemis.d.ts +6 -0
  5. package/dist/bin/artemis.d.ts.map +1 -0
  6. package/dist/index.js +51297 -0
  7. package/dist/src/adapters.d.ts +6 -0
  8. package/dist/src/adapters.d.ts.map +1 -0
  9. package/dist/src/cli.d.ts +6 -0
  10. package/dist/src/cli.d.ts.map +1 -0
  11. package/dist/src/commands/compare.d.ts +6 -0
  12. package/dist/src/commands/compare.d.ts.map +1 -0
  13. package/dist/src/commands/history.d.ts +6 -0
  14. package/dist/src/commands/history.d.ts.map +1 -0
  15. package/dist/src/commands/index.d.ts +8 -0
  16. package/dist/src/commands/index.d.ts.map +1 -0
  17. package/dist/src/commands/init.d.ts +6 -0
  18. package/dist/src/commands/init.d.ts.map +1 -0
  19. package/dist/src/commands/redteam.d.ts +6 -0
  20. package/dist/src/commands/redteam.d.ts.map +1 -0
  21. package/dist/src/commands/report.d.ts +6 -0
  22. package/dist/src/commands/report.d.ts.map +1 -0
  23. package/dist/src/commands/run.d.ts +6 -0
  24. package/dist/src/commands/run.d.ts.map +1 -0
  25. package/dist/src/commands/stress.d.ts +6 -0
  26. package/dist/src/commands/stress.d.ts.map +1 -0
  27. package/dist/src/config/index.d.ts +6 -0
  28. package/dist/src/config/index.d.ts.map +1 -0
  29. package/dist/src/config/loader.d.ts +13 -0
  30. package/dist/src/config/loader.d.ts.map +1 -0
  31. package/dist/src/config/schema.d.ts +215 -0
  32. package/dist/src/config/schema.d.ts.map +1 -0
  33. package/dist/src/index.d.ts +6 -0
  34. package/dist/src/index.d.ts.map +1 -0
  35. package/dist/src/utils/adapter.d.ts +71 -0
  36. package/dist/src/utils/adapter.d.ts.map +1 -0
  37. package/dist/src/utils/storage.d.ts +22 -0
  38. package/dist/src/utils/storage.d.ts.map +1 -0
  39. package/package.json +65 -0
  40. package/src/adapters.ts +33 -0
  41. package/src/cli.ts +34 -0
  42. package/src/commands/compare.ts +104 -0
  43. package/src/commands/history.ts +80 -0
  44. package/src/commands/index.ts +8 -0
  45. package/src/commands/init.ts +111 -0
  46. package/src/commands/redteam.ts +511 -0
  47. package/src/commands/report.ts +126 -0
  48. package/src/commands/run.ts +233 -0
  49. package/src/commands/stress.ts +501 -0
  50. package/src/config/index.ts +6 -0
  51. package/src/config/loader.ts +112 -0
  52. package/src/config/schema.ts +56 -0
  53. package/src/index.ts +6 -0
  54. package/src/utils/adapter.ts +542 -0
  55. package/src/utils/storage.ts +67 -0
  56. package/tsconfig.json +13 -0
@@ -0,0 +1,233 @@
1
+ /**
2
+ * Run command - Execute test scenarios
3
+ */
4
+
5
+ import {
6
+ type RedactionConfig,
7
+ createAdapter,
8
+ parseScenarioFile,
9
+ runScenario,
10
+ } from '@artemiskit/core';
11
+ import chalk from 'chalk';
12
+ import Table from 'cli-table3';
13
+ import { Command } from 'commander';
14
+ import ora from 'ora';
15
+ import { loadConfig } from '../config/loader';
16
+ import {
17
+ buildAdapterConfig,
18
+ resolveModelWithSource,
19
+ resolveProviderWithSource,
20
+ } from '../utils/adapter';
21
+ import { createStorage } from '../utils/storage';
22
+
23
+ interface RunOptions {
24
+ provider?: string;
25
+ model?: string;
26
+ output?: string;
27
+ verbose?: boolean;
28
+ tags?: string[];
29
+ save?: boolean;
30
+ concurrency?: number;
31
+ timeout?: number;
32
+ retries?: number;
33
+ config?: string;
34
+ redact?: boolean;
35
+ redactPatterns?: string[];
36
+ }
37
+
38
+ export function runCommand(): Command {
39
+ const cmd = new Command('run');
40
+
41
+ cmd
42
+ .description('Run test scenarios against an LLM')
43
+ .argument('<scenario>', 'Path to scenario YAML file')
44
+ .option('-p, --provider <provider>', 'Provider to use (openai, azure-openai, vercel-ai)')
45
+ .option('-m, --model <model>', 'Model to use')
46
+ .option('-o, --output <dir>', 'Output directory for results')
47
+ .option('-v, --verbose', 'Verbose output')
48
+ .option('-t, --tags <tags...>', 'Filter test cases by tags')
49
+ .option('--save', 'Save results to storage', true)
50
+ .option('-c, --concurrency <number>', 'Number of concurrent test cases', '1')
51
+ .option('--timeout <ms>', 'Timeout per test case in milliseconds')
52
+ .option('--retries <number>', 'Number of retries per test case')
53
+ .option('--config <path>', 'Path to config file')
54
+ .option('--redact', 'Enable PII/sensitive data redaction in results')
55
+ .option(
56
+ '--redact-patterns <patterns...>',
57
+ 'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
58
+ )
59
+ .action(async (scenarioPath: string, options: RunOptions) => {
60
+ const spinner = ora('Loading configuration...').start();
61
+
62
+ try {
63
+ // Load config file if present
64
+ const config = await loadConfig(options.config);
65
+ if (config) {
66
+ spinner.succeed(`Loaded config from ${(config as { _path?: string })._path}`);
67
+ } else {
68
+ spinner.info('No config file found, using defaults');
69
+ }
70
+
71
+ // Parse scenario
72
+ spinner.start('Loading scenario...');
73
+ const scenario = await parseScenarioFile(scenarioPath);
74
+ spinner.succeed(`Loaded scenario: ${scenario.name}`);
75
+
76
+ // Resolve provider and model with precedence and source tracking:
77
+ // CLI > Scenario > Config > Default
78
+ const { provider, source: providerSource } = resolveProviderWithSource(
79
+ options.provider,
80
+ scenario.provider,
81
+ config?.provider
82
+ );
83
+ const { model, source: modelSource } = resolveModelWithSource(
84
+ options.model,
85
+ scenario.model,
86
+ config?.model
87
+ );
88
+
89
+ // Build adapter config with full precedence chain and source tracking
90
+ spinner.start(`Connecting to ${provider}...`);
91
+ const { adapterConfig, resolvedConfig } = buildAdapterConfig({
92
+ provider,
93
+ model,
94
+ providerSource,
95
+ modelSource,
96
+ scenarioConfig: scenario.providerConfig,
97
+ fileConfig: config,
98
+ });
99
+ const client = await createAdapter(adapterConfig);
100
+ spinner.succeed(`Connected to ${provider}`);
101
+
102
+ console.log();
103
+ console.log(chalk.bold(`Running scenario: ${scenario.name}`));
104
+ console.log();
105
+
106
+ // Build redaction config from CLI options
107
+ let redaction: RedactionConfig | undefined;
108
+ if (options.redact) {
109
+ redaction = {
110
+ enabled: true,
111
+ patterns: options.redactPatterns,
112
+ redactPrompts: true,
113
+ redactResponses: true,
114
+ redactMetadata: false,
115
+ replacement: '[REDACTED]',
116
+ };
117
+ console.log(
118
+ chalk.dim(
119
+ `Redaction enabled${options.redactPatterns ? ` with patterns: ${options.redactPatterns.join(', ')}` : ' (default patterns)'}`
120
+ )
121
+ );
122
+ console.log();
123
+ }
124
+
125
+ // Run scenario using core runner
126
+ const result = await runScenario({
127
+ scenario,
128
+ client,
129
+ project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
130
+ resolvedConfig,
131
+ tags: options.tags,
132
+ concurrency: Number.parseInt(String(options.concurrency)) || 1,
133
+ timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
134
+ retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
135
+ redaction,
136
+ onCaseComplete: (caseResult) => {
137
+ const statusIcon = caseResult.ok ? chalk.green('✓') : chalk.red('✗');
138
+ const scoreStr = `(${(caseResult.score * 100).toFixed(0)}%)`;
139
+ console.log(`${statusIcon} ${caseResult.id} ${chalk.dim(scoreStr)}`);
140
+
141
+ if (!caseResult.ok && options.verbose) {
142
+ console.log(chalk.dim(` Reason: ${caseResult.reason}`));
143
+ }
144
+ },
145
+ onProgress: (message) => {
146
+ if (options.verbose) {
147
+ console.log(chalk.dim(message));
148
+ }
149
+ },
150
+ });
151
+
152
+ // Display summary
153
+ console.log();
154
+ displaySummary(result.manifest.metrics, result.manifest.run_id, result.manifest.redaction);
155
+
156
+ // Save results
157
+ if (options.save) {
158
+ spinner.start('Saving results...');
159
+ const storage = createStorage({ fileConfig: config });
160
+ const path = await storage.save(result.manifest);
161
+ spinner.succeed(`Results saved: ${path}`);
162
+ }
163
+
164
+ // Exit with error if any tests failed
165
+ if (!result.success) {
166
+ process.exit(1);
167
+ }
168
+ } catch (error) {
169
+ spinner.fail('Error');
170
+ console.error(chalk.red('Error:'), (error as Error).message);
171
+ if (options.verbose) {
172
+ console.error((error as Error).stack);
173
+ }
174
+ process.exit(1);
175
+ }
176
+ });
177
+
178
+ return cmd;
179
+ }
180
+
181
+ function displaySummary(
182
+ metrics: {
183
+ success_rate: number;
184
+ total_cases: number;
185
+ passed_cases: number;
186
+ failed_cases: number;
187
+ median_latency_ms: number;
188
+ total_tokens: number;
189
+ },
190
+ runId: string,
191
+ redaction?: {
192
+ enabled: boolean;
193
+ summary: {
194
+ promptsRedacted: number;
195
+ responsesRedacted: number;
196
+ totalRedactions: number;
197
+ };
198
+ }
199
+ ): void {
200
+ const table = new Table({
201
+ head: [chalk.bold('Metric'), chalk.bold('Value')],
202
+ style: { head: [], border: [] },
203
+ });
204
+
205
+ const successColor =
206
+ metrics.success_rate >= 0.9
207
+ ? chalk.green
208
+ : metrics.success_rate >= 0.7
209
+ ? chalk.yellow
210
+ : chalk.red;
211
+
212
+ table.push(
213
+ ['Run ID', runId],
214
+ ['Success Rate', successColor(`${(metrics.success_rate * 100).toFixed(1)}%`)],
215
+ ['Passed', chalk.green(metrics.passed_cases.toString())],
216
+ ['Failed', metrics.failed_cases > 0 ? chalk.red(metrics.failed_cases.toString()) : '0'],
217
+ ['Median Latency', `${metrics.median_latency_ms}ms`],
218
+ ['Total Tokens', metrics.total_tokens.toLocaleString()]
219
+ );
220
+
221
+ // Add redaction info if enabled
222
+ if (redaction?.enabled) {
223
+ table.push(
224
+ ['Redaction', chalk.yellow('Enabled')],
225
+ [
226
+ 'Redactions Made',
227
+ `${redaction.summary.totalRedactions} (${redaction.summary.promptsRedacted} prompts, ${redaction.summary.responsesRedacted} responses)`,
228
+ ]
229
+ );
230
+ }
231
+
232
+ console.log(table.toString());
233
+ }