@artemiskit/cli 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,23 +2,32 @@
2
2
  * Run command - Execute test scenarios
3
3
  */
4
4
 
5
+ import { basename } from 'node:path';
5
6
  import {
6
7
  type RedactionConfig,
8
+ type RunManifest,
7
9
  createAdapter,
8
10
  parseScenarioFile,
11
+ resolveScenarioPaths,
9
12
  runScenario,
10
13
  } from '@artemiskit/core';
11
14
  import chalk from 'chalk';
12
15
  import { Command } from 'commander';
13
16
  import { loadConfig } from '../config/loader.js';
17
+ import type { ArtemisConfig } from '../config/schema.js';
14
18
  import {
15
19
  createSpinner,
16
20
  formatDuration,
17
21
  getProviderErrorContext,
18
22
  icons,
23
+ isInteractive,
19
24
  isTTY,
20
25
  padText,
26
+ promptModel,
27
+ promptProvider,
28
+ promptScenarios,
21
29
  renderError,
30
+ renderFailureReason,
22
31
  renderProgressBar,
23
32
  renderSummaryPanel,
24
33
  } from '../ui/index.js';
@@ -42,21 +51,322 @@ interface RunOptions {
42
51
  config?: string;
43
52
  redact?: boolean;
44
53
  redactPatterns?: string[];
54
+ parallel?: number;
55
+ interactive?: boolean;
56
+ }
57
+
58
+ interface ScenarioRunResult {
59
+ scenarioPath: string;
60
+ scenarioName: string;
61
+ success: boolean;
62
+ manifest: RunManifest;
63
+ error?: string;
64
+ }
65
+
66
+ /**
67
+ * Run a single scenario and return the result (quiet mode for parallel execution)
68
+ */
69
+ async function runSingleScenarioQuiet(
70
+ scenarioPath: string,
71
+ options: RunOptions,
72
+ config: ArtemisConfig | null
73
+ ): Promise<ScenarioRunResult> {
74
+ // Parse scenario
75
+ const scenario = await parseScenarioFile(scenarioPath);
76
+
77
+ // Resolve provider and model with precedence and source tracking:
78
+ // CLI > Scenario > Config > Default
79
+ const { provider, source: providerSource } = resolveProviderWithSource(
80
+ options.provider,
81
+ scenario.provider,
82
+ config?.provider
83
+ );
84
+ const { model, source: modelSource } = resolveModelWithSource(
85
+ options.model,
86
+ scenario.model,
87
+ config?.model
88
+ );
89
+
90
+ // Build adapter config with full precedence chain and source tracking
91
+ const { adapterConfig, resolvedConfig } = buildAdapterConfig({
92
+ provider,
93
+ model,
94
+ providerSource,
95
+ modelSource,
96
+ scenarioConfig: scenario.providerConfig,
97
+ fileConfig: config,
98
+ });
99
+ const client = await createAdapter(adapterConfig);
100
+
101
+ // Build redaction config from CLI options
102
+ let redaction: RedactionConfig | undefined;
103
+ if (options.redact) {
104
+ redaction = {
105
+ enabled: true,
106
+ patterns: options.redactPatterns,
107
+ redactPrompts: true,
108
+ redactResponses: true,
109
+ redactMetadata: false,
110
+ replacement: '[REDACTED]',
111
+ };
112
+ }
113
+
114
+ // Run scenario using core runner (no callbacks in quiet mode)
115
+ const result = await runScenario({
116
+ scenario,
117
+ client,
118
+ project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
119
+ resolvedConfig,
120
+ tags: options.tags,
121
+ concurrency: Number.parseInt(String(options.concurrency)) || 1,
122
+ timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
123
+ retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
124
+ redaction,
125
+ });
126
+
127
+ return {
128
+ scenarioPath,
129
+ scenarioName: scenario.name,
130
+ success: result.success,
131
+ manifest: result.manifest,
132
+ };
133
+ }
134
+
135
+ /**
136
+ * Run a single scenario and return the result (verbose mode for sequential execution)
137
+ */
138
+ async function runSingleScenario(
139
+ scenarioPath: string,
140
+ options: RunOptions,
141
+ config: ArtemisConfig | null,
142
+ spinner: ReturnType<typeof createSpinner>,
143
+ isMultiScenario: boolean
144
+ ): Promise<ScenarioRunResult> {
145
+ // Parse scenario
146
+ const scenario = await parseScenarioFile(scenarioPath);
147
+
148
+ if (isMultiScenario) {
149
+ console.log();
150
+ console.log(chalk.bold.cyan(`━━━ ${scenario.name} ━━━`));
151
+ console.log(chalk.dim(`File: ${basename(scenarioPath)}`));
152
+ console.log();
153
+ }
154
+
155
+ // Resolve provider and model with precedence and source tracking:
156
+ // CLI > Scenario > Config > Default
157
+ const { provider, source: providerSource } = resolveProviderWithSource(
158
+ options.provider,
159
+ scenario.provider,
160
+ config?.provider
161
+ );
162
+ const { model, source: modelSource } = resolveModelWithSource(
163
+ options.model,
164
+ scenario.model,
165
+ config?.model
166
+ );
167
+
168
+ // Build adapter config with full precedence chain and source tracking
169
+ if (!isMultiScenario) {
170
+ spinner.start(`Connecting to ${provider}...`);
171
+ }
172
+ const { adapterConfig, resolvedConfig } = buildAdapterConfig({
173
+ provider,
174
+ model,
175
+ providerSource,
176
+ modelSource,
177
+ scenarioConfig: scenario.providerConfig,
178
+ fileConfig: config,
179
+ });
180
+ const client = await createAdapter(adapterConfig);
181
+ if (!isMultiScenario) {
182
+ spinner.succeed(`Connected to ${provider}`);
183
+ console.log();
184
+ console.log(chalk.bold(`Running scenario: ${scenario.name}`));
185
+ console.log();
186
+ }
187
+
188
+ // Build redaction config from CLI options
189
+ let redaction: RedactionConfig | undefined;
190
+ if (options.redact) {
191
+ redaction = {
192
+ enabled: true,
193
+ patterns: options.redactPatterns,
194
+ redactPrompts: true,
195
+ redactResponses: true,
196
+ redactMetadata: false,
197
+ replacement: '[REDACTED]',
198
+ };
199
+ if (!isMultiScenario) {
200
+ console.log(
201
+ chalk.dim(
202
+ `Redaction enabled${options.redactPatterns ? ` with patterns: ${options.redactPatterns.join(', ')}` : ' (default patterns)'}`
203
+ )
204
+ );
205
+ console.log();
206
+ }
207
+ }
208
+
209
+ // Track progress
210
+ const totalCases = scenario.cases.length;
211
+ let completedCases = 0;
212
+
213
+ // Calculate max widths for alignment
214
+ const maxIdLength = Math.max(...scenario.cases.map((c) => c.id.length));
215
+ const maxScoreLength = 6; // "(100%)"
216
+ const maxDurationLength = 6; // "10.0s" or "999ms"
217
+
218
+ // Run scenario using core runner
219
+ const result = await runScenario({
220
+ scenario,
221
+ client,
222
+ project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
223
+ resolvedConfig,
224
+ tags: options.tags,
225
+ concurrency: Number.parseInt(String(options.concurrency)) || 1,
226
+ timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
227
+ retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
228
+ redaction,
229
+ onCaseComplete: (caseResult) => {
230
+ completedCases++;
231
+
232
+ const statusIcon = caseResult.ok ? icons.passed : icons.failed;
233
+ const scoreStr = `(${(caseResult.score * 100).toFixed(0)}%)`;
234
+ const durationStr = caseResult.latencyMs ? formatDuration(caseResult.latencyMs) : '';
235
+
236
+ // Pad columns for alignment
237
+ const paddedId = padText(caseResult.id, maxIdLength);
238
+ const paddedScore = padText(scoreStr, maxScoreLength, 'right');
239
+ const paddedDuration = padText(durationStr, maxDurationLength, 'right');
240
+
241
+ // Show result - with progress bar in TTY, simple format in CI/CD
242
+ if (isTTY) {
243
+ const progressBar = renderProgressBar(completedCases, totalCases, { width: 15 });
244
+ console.log(
245
+ `${statusIcon} ${paddedId} ${chalk.dim(paddedScore)} ${chalk.dim(paddedDuration)} ${progressBar}`
246
+ );
247
+ } else {
248
+ // CI/CD friendly output - no progress bar, just count
249
+ console.log(
250
+ `${statusIcon} ${paddedId} ${chalk.dim(paddedScore)} ${chalk.dim(paddedDuration)} [${completedCases}/${totalCases}]`
251
+ );
252
+ }
253
+
254
+ if (!caseResult.ok && options.verbose && caseResult.reason) {
255
+ console.log(
256
+ renderFailureReason(caseResult.reason, { matcherType: caseResult.matcherType })
257
+ );
258
+ }
259
+ },
260
+ onProgress: (message) => {
261
+ if (options.verbose) {
262
+ console.log(chalk.dim(message));
263
+ }
264
+ },
265
+ });
266
+
267
+ return {
268
+ scenarioPath,
269
+ scenarioName: scenario.name,
270
+ success: result.success,
271
+ manifest: result.manifest,
272
+ };
273
+ }
274
+
275
+ /**
276
+ * Run scenarios in parallel with a concurrency limit
277
+ */
278
+ async function runScenariosInParallel(
279
+ scenarioPaths: string[],
280
+ options: RunOptions,
281
+ config: ArtemisConfig | null,
282
+ parallelLimit: number,
283
+ storage: ReturnType<typeof createStorage>
284
+ ): Promise<ScenarioRunResult[]> {
285
+ const results: ScenarioRunResult[] = [];
286
+ let completedCount = 0;
287
+ const totalCount = scenarioPaths.length;
288
+
289
+ // Create a queue of scenario paths
290
+ const queue = [...scenarioPaths];
291
+ const inProgress = new Set<Promise<void>>();
292
+
293
+ // Progress display function
294
+ const updateProgress = (scenarioName: string, success: boolean) => {
295
+ completedCount++;
296
+ const icon = success ? icons.passed : icons.failed;
297
+ const status = success ? chalk.green('passed') : chalk.red('failed');
298
+
299
+ if (isTTY) {
300
+ const progressBar = renderProgressBar(completedCount, totalCount, { width: 20 });
301
+ console.log(`${icon} ${scenarioName} ${status} ${progressBar}`);
302
+ } else {
303
+ console.log(`${icon} ${scenarioName} ${status} [${completedCount}/${totalCount}]`);
304
+ }
305
+ };
306
+
307
+ // Process a single scenario
308
+ const processScenario = async (path: string): Promise<void> => {
309
+ try {
310
+ const result = await runSingleScenarioQuiet(path, options, config);
311
+ results.push(result);
312
+ updateProgress(result.scenarioName, result.success);
313
+
314
+ // Save results if enabled
315
+ if (options.save && result.manifest.run_id) {
316
+ await storage.save(result.manifest);
317
+ }
318
+ } catch (error) {
319
+ const scenarioName = basename(path);
320
+ results.push({
321
+ scenarioPath: path,
322
+ scenarioName,
323
+ success: false,
324
+ manifest: {} as RunManifest,
325
+ error: (error as Error).message,
326
+ });
327
+ updateProgress(scenarioName, false);
328
+ }
329
+ };
330
+
331
+ // Run with concurrency limit
332
+ while (queue.length > 0 || inProgress.size > 0) {
333
+ // Start new tasks up to the limit
334
+ while (queue.length > 0 && inProgress.size < parallelLimit) {
335
+ const path = queue.shift()!;
336
+ const promise = processScenario(path).then(() => {
337
+ inProgress.delete(promise);
338
+ });
339
+ inProgress.add(promise);
340
+ }
341
+
342
+ // Wait for at least one task to complete
343
+ if (inProgress.size > 0) {
344
+ await Promise.race(inProgress);
345
+ }
346
+ }
347
+
348
+ return results;
45
349
  }
46
350
 
47
351
  export function runCommand(): Command {
48
352
  const cmd = new Command('run');
49
353
 
50
354
  cmd
51
- .description('Run test scenarios against an LLM')
52
- .argument('<scenario>', 'Path to scenario YAML file')
355
+ .description(
356
+ 'Run test scenarios against an LLM. Accepts a file path, directory, or glob pattern.'
357
+ )
358
+ .argument(
359
+ '[scenario]',
360
+ 'Path to scenario file, directory, or glob pattern (e.g., scenarios/**/*.yaml)'
361
+ )
53
362
  .option('-p, --provider <provider>', 'Provider to use (openai, azure-openai, vercel-ai)')
54
363
  .option('-m, --model <model>', 'Model to use')
55
364
  .option('-o, --output <dir>', 'Output directory for results')
56
365
  .option('-v, --verbose', 'Verbose output')
57
366
  .option('-t, --tags <tags...>', 'Filter test cases by tags')
58
367
  .option('--save', 'Save results to storage', true)
59
- .option('-c, --concurrency <number>', 'Number of concurrent test cases', '1')
368
+ .option('-c, --concurrency <number>', 'Number of concurrent test cases per scenario', '1')
369
+ .option('--parallel <number>', 'Number of scenarios to run in parallel (default: sequential)')
60
370
  .option('--timeout <ms>', 'Timeout per test case in milliseconds')
61
371
  .option('--retries <number>', 'Number of retries per test case')
62
372
  .option('--config <path>', 'Path to config file')
@@ -65,7 +375,8 @@ export function runCommand(): Command {
65
375
  '--redact-patterns <patterns...>',
66
376
  'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
67
377
  )
68
- .action(async (scenarioPath: string, options: RunOptions) => {
378
+ .option('-i, --interactive', 'Enable interactive mode for scenario/provider selection')
379
+ .action(async (scenarioPath: string | undefined, options: RunOptions) => {
69
380
  const spinner = createSpinner('Loading configuration...');
70
381
  spinner.start();
71
382
 
@@ -73,161 +384,255 @@ export function runCommand(): Command {
73
384
  // Load config file if present
74
385
  const config = await loadConfig(options.config);
75
386
  if (config) {
76
- spinner.succeed(`Loaded config from ${(config as { _path?: string })._path}`);
387
+ spinner.succeed(`Loaded config from ${config._path}`);
77
388
  } else {
78
389
  spinner.info('No config file found, using defaults');
79
390
  }
80
391
 
81
- // Parse scenario
82
- spinner.start('Loading scenario...');
83
- const scenario = await parseScenarioFile(scenarioPath);
84
- spinner.succeed(`Loaded scenario: ${scenario.name}`);
85
-
86
- // Resolve provider and model with precedence and source tracking:
87
- // CLI > Scenario > Config > Default
88
- const { provider, source: providerSource } = resolveProviderWithSource(
89
- options.provider,
90
- scenario.provider,
91
- config?.provider
92
- );
93
- const { model, source: modelSource } = resolveModelWithSource(
94
- options.model,
95
- scenario.model,
96
- config?.model
97
- );
392
+ // Determine if we should use interactive mode
393
+ const useInteractive = options.interactive || (!scenarioPath && isInteractive());
98
394
 
99
- // Build adapter config with full precedence chain and source tracking
100
- spinner.start(`Connecting to ${provider}...`);
101
- const { adapterConfig, resolvedConfig } = buildAdapterConfig({
102
- provider,
103
- model,
104
- providerSource,
105
- modelSource,
106
- scenarioConfig: scenario.providerConfig,
107
- fileConfig: config,
108
- });
109
- const client = await createAdapter(adapterConfig);
110
- spinner.succeed(`Connected to ${provider}`);
395
+ // Interactive provider/model selection if requested
396
+ if (useInteractive && !options.provider) {
397
+ spinner.stop();
398
+ console.log(chalk.cyan('\n Interactive mode enabled\n'));
111
399
 
112
- console.log();
113
- console.log(chalk.bold(`Running scenario: ${scenario.name}`));
114
- console.log();
400
+ const provider = await promptProvider('Select a provider:');
401
+ options.provider = provider;
115
402
 
116
- // Build redaction config from CLI options
117
- let redaction: RedactionConfig | undefined;
118
- if (options.redact) {
119
- redaction = {
120
- enabled: true,
121
- patterns: options.redactPatterns,
122
- redactPrompts: true,
123
- redactResponses: true,
124
- redactMetadata: false,
125
- replacement: '[REDACTED]',
126
- };
127
- console.log(
128
- chalk.dim(
129
- `Redaction enabled${options.redactPatterns ? ` with patterns: ${options.redactPatterns.join(', ')}` : ' (default patterns)'}`
130
- )
131
- );
403
+ const model = await promptModel(provider, 'Select a model:');
404
+ options.model = model;
405
+
406
+ console.log(''); // spacing
407
+ spinner.start('Discovering scenarios...');
408
+ }
409
+
410
+ // If no scenario path provided, try to find scenarios or prompt
411
+ let resolvedScenarioPath = scenarioPath;
412
+ if (!resolvedScenarioPath) {
413
+ // Try default scenarios directory
414
+ const defaultPath = config?.scenariosDir || './scenarios';
415
+ spinner.start(`Looking for scenarios in ${defaultPath}...`);
416
+
417
+ try {
418
+ const defaultScenarios = await resolveScenarioPaths(defaultPath);
419
+ if (defaultScenarios.length > 0) {
420
+ spinner.stop();
421
+
422
+ if (useInteractive) {
423
+ // Let user select which scenarios to run
424
+ const scenarioChoices = await Promise.all(
425
+ defaultScenarios.map(async (path) => {
426
+ try {
427
+ const scenario = await parseScenarioFile(path);
428
+ return { path, name: scenario.name || basename(path) };
429
+ } catch {
430
+ return { path, name: basename(path) };
431
+ }
432
+ })
433
+ );
434
+
435
+ const selectedPaths = await promptScenarios(
436
+ scenarioChoices,
437
+ 'Select scenarios to run:'
438
+ );
439
+
440
+ if (selectedPaths.length === 0) {
441
+ console.log(chalk.yellow('\nNo scenarios selected. Exiting.'));
442
+ process.exit(0);
443
+ }
444
+
445
+ // Use the first selected scenario or create a temp pattern
446
+ resolvedScenarioPath =
447
+ selectedPaths.length === 1 ? selectedPaths[0] : `{${selectedPaths.join(',')}}`;
448
+
449
+ console.log(''); // spacing
450
+ spinner.start('Preparing scenarios...');
451
+ } else {
452
+ spinner.succeed(`Found ${defaultScenarios.length} scenarios in ${defaultPath}`);
453
+ resolvedScenarioPath = defaultPath;
454
+ }
455
+ } else {
456
+ spinner.fail(`No scenarios found in ${defaultPath}`);
457
+ console.log();
458
+ console.log(chalk.yellow('Please provide a scenario path:'));
459
+ console.log(chalk.dim(' artemiskit run <path-to-scenario.yaml>'));
460
+ console.log(chalk.dim(' artemiskit run scenarios/'));
461
+ console.log(chalk.dim(' artemiskit run "scenarios/**/*.yaml"'));
462
+ process.exit(1);
463
+ }
464
+ } catch {
465
+ spinner.fail('No scenario path provided');
466
+ console.log();
467
+ console.log(chalk.yellow('Usage: artemiskit run <scenario>'));
468
+ console.log(chalk.dim(' <scenario> can be a file, directory, or glob pattern'));
469
+ process.exit(1);
470
+ }
471
+ }
472
+
473
+ // Resolve scenario paths (handles files, directories, and globs)
474
+ spinner.start('Discovering scenarios...');
475
+ const scenarioPaths = await resolveScenarioPaths(resolvedScenarioPath);
476
+
477
+ if (scenarioPaths.length === 0) {
478
+ spinner.fail('No scenario files found');
132
479
  console.log();
480
+ console.log(chalk.yellow(`No .yaml or .yml files found matching: ${scenarioPath}`));
481
+ console.log(chalk.dim('Make sure the path exists and contains valid scenario files.'));
482
+ process.exit(1);
133
483
  }
134
484
 
135
- // Track progress
136
- const totalCases = scenario.cases.length;
137
- let completedCases = 0;
138
-
139
- // Calculate max widths for alignment
140
- const maxIdLength = Math.max(...scenario.cases.map((c) => c.id.length));
141
- const maxScoreLength = 6; // "(100%)"
142
- const maxDurationLength = 6; // "10.0s" or "999ms"
143
-
144
- // Run scenario using core runner
145
- const result = await runScenario({
146
- scenario,
147
- client,
148
- project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
149
- resolvedConfig,
150
- tags: options.tags,
151
- concurrency: Number.parseInt(String(options.concurrency)) || 1,
152
- timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
153
- retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
154
- redaction,
155
- onCaseComplete: (caseResult) => {
156
- completedCases++;
157
-
158
- const statusIcon = caseResult.ok ? icons.passed : icons.failed;
159
- const scoreStr = `(${(caseResult.score * 100).toFixed(0)}%)`;
160
- const durationStr = caseResult.latencyMs ? formatDuration(caseResult.latencyMs) : '';
161
-
162
- // Pad columns for alignment
163
- const paddedId = padText(caseResult.id, maxIdLength);
164
- const paddedScore = padText(scoreStr, maxScoreLength, 'right');
165
- const paddedDuration = padText(durationStr, maxDurationLength, 'right');
166
-
167
- // Show result - with progress bar in TTY, simple format in CI/CD
168
- if (isTTY) {
169
- const progressBar = renderProgressBar(completedCases, totalCases, { width: 15 });
170
- console.log(
171
- `${statusIcon} ${paddedId} ${chalk.dim(paddedScore)} ${chalk.dim(paddedDuration)} ${progressBar}`
485
+ const isMultiScenario = scenarioPaths.length > 1;
486
+ const parallelLimit = options.parallel ? Number.parseInt(String(options.parallel)) : 0;
487
+ const runInParallel = parallelLimit > 0 && isMultiScenario;
488
+
489
+ if (isMultiScenario) {
490
+ const modeStr = runInParallel
491
+ ? chalk.cyan(`parallel (${parallelLimit} concurrent)`)
492
+ : chalk.dim('sequential');
493
+ spinner.succeed(`Found ${scenarioPaths.length} scenario files`);
494
+ console.log();
495
+ console.log(chalk.bold(`Running ${scenarioPaths.length} scenarios ${modeStr}...`));
496
+ console.log();
497
+ } else {
498
+ spinner.succeed('Loaded scenario file');
499
+ }
500
+
501
+ // Run all scenarios
502
+ const storage = createStorage({ fileConfig: config });
503
+ let results: ScenarioRunResult[];
504
+
505
+ if (runInParallel) {
506
+ // Parallel execution
507
+ results = await runScenariosInParallel(
508
+ scenarioPaths,
509
+ options,
510
+ config,
511
+ parallelLimit,
512
+ storage
513
+ );
514
+ } else {
515
+ // Sequential execution
516
+ results = [];
517
+ for (const path of scenarioPaths) {
518
+ try {
519
+ const result = await runSingleScenario(
520
+ path,
521
+ options,
522
+ config,
523
+ spinner,
524
+ isMultiScenario
172
525
  );
173
- } else {
174
- // CI/CD friendly output - no progress bar, just count
526
+ results.push(result);
527
+
528
+ // Display per-scenario summary
529
+ const summaryData = {
530
+ passed: result.manifest.metrics.passed_cases,
531
+ failed: result.manifest.metrics.failed_cases,
532
+ skipped: 0,
533
+ successRate: result.manifest.metrics.success_rate * 100,
534
+ duration: result.manifest.duration_ms,
535
+ title: isMultiScenario ? result.scenarioName.toUpperCase() : 'TEST RESULTS',
536
+ };
537
+ console.log();
538
+ console.log(renderSummaryPanel(summaryData));
539
+
540
+ // Show additional metrics
541
+ console.log();
175
542
  console.log(
176
- `${statusIcon} ${paddedId} ${chalk.dim(paddedScore)} ${chalk.dim(paddedDuration)} [${completedCases}/${totalCases}]`
543
+ chalk.dim(
544
+ `Run ID: ${result.manifest.run_id} | Median Latency: ${result.manifest.metrics.median_latency_ms}ms | Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}`
545
+ )
177
546
  );
178
- }
179
547
 
180
- if (!caseResult.ok && options.verbose) {
181
- console.log(chalk.dim(` Reason: ${caseResult.reason}`));
182
- }
183
- },
184
- onProgress: (message) => {
185
- if (options.verbose) {
186
- console.log(chalk.dim(message));
548
+ // Show redaction info if enabled
549
+ if (result.manifest.redaction?.enabled) {
550
+ const r = result.manifest.redaction;
551
+ console.log(
552
+ chalk.dim(
553
+ `Redactions: ${r.summary.totalRedactions} (${r.summary.promptsRedacted} prompts, ${r.summary.responsesRedacted} responses)`
554
+ )
555
+ );
556
+ }
557
+
558
+ // Save results
559
+ if (options.save) {
560
+ const savedPath = await storage.save(result.manifest);
561
+ console.log(chalk.dim(`Saved: ${savedPath}`));
562
+ }
563
+ } catch (error) {
564
+ // Record failed scenario
565
+ console.log();
566
+ console.log(chalk.red(`${icons.failed} Failed to run: ${basename(path)}`));
567
+ if (options.verbose) {
568
+ console.log(chalk.dim((error as Error).message));
569
+ }
570
+ results.push({
571
+ scenarioPath: path,
572
+ scenarioName: basename(path),
573
+ success: false,
574
+ manifest: {} as RunManifest,
575
+ });
187
576
  }
188
- },
189
- });
577
+ }
578
+ }
190
579
 
191
- // Display summary using enhanced panel
192
- console.log();
193
- const summaryData = {
194
- passed: result.manifest.metrics.passed_cases,
195
- failed: result.manifest.metrics.failed_cases,
196
- skipped: 0,
197
- successRate: result.manifest.metrics.success_rate * 100,
198
- duration: result.manifest.duration_ms,
199
- title: 'TEST RESULTS',
200
- };
201
- console.log(renderSummaryPanel(summaryData));
202
-
203
- // Show additional metrics
204
- console.log();
205
- console.log(
206
- chalk.dim(
207
- `Run ID: ${result.manifest.run_id} | Median Latency: ${result.manifest.metrics.median_latency_ms}ms | Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}`
208
- )
209
- );
580
+ // Display aggregate summary for multiple scenarios
581
+ if (isMultiScenario) {
582
+ console.log();
583
+ console.log(chalk.bold.cyan('━━━ AGGREGATE SUMMARY ━━━'));
584
+ console.log();
585
+
586
+ const totalScenarios = results.length;
587
+ const passedScenarios = results.filter((r) => r.success).length;
588
+ const failedScenarios = totalScenarios - passedScenarios;
589
+
590
+ const totalCases = results.reduce(
591
+ (sum, r) => sum + (r.manifest.metrics?.total_cases || 0),
592
+ 0
593
+ );
594
+ const passedCases = results.reduce(
595
+ (sum, r) => sum + (r.manifest.metrics?.passed_cases || 0),
596
+ 0
597
+ );
598
+ const failedCases = results.reduce(
599
+ (sum, r) => sum + (r.manifest.metrics?.failed_cases || 0),
600
+ 0
601
+ );
602
+ const totalDuration = results.reduce((sum, r) => sum + (r.manifest.duration_ms || 0), 0);
210
603
 
211
- // Show redaction info if enabled
212
- if (result.manifest.redaction?.enabled) {
213
- const r = result.manifest.redaction;
214
604
  console.log(
215
- chalk.dim(
216
- `Redactions: ${r.summary.totalRedactions} (${r.summary.promptsRedacted} prompts, ${r.summary.responsesRedacted} responses)`
217
- )
605
+ `Scenarios: ${chalk.green(`${passedScenarios} passed`)} ${failedScenarios > 0 ? chalk.red(`${failedScenarios} failed`) : ''} ${chalk.dim(`(${totalScenarios} total)`)}`
218
606
  );
219
- }
607
+ console.log(
608
+ `Test Cases: ${chalk.green(`${passedCases} passed`)} ${failedCases > 0 ? chalk.red(`${failedCases} failed`) : ''} ${chalk.dim(`(${totalCases} total)`)}`
609
+ );
610
+ console.log(`Duration: ${chalk.dim(formatDuration(totalDuration))}`);
220
611
 
221
- // Save results
222
- if (options.save) {
223
- spinner.start('Saving results...');
224
- const storage = createStorage({ fileConfig: config });
225
- const path = await storage.save(result.manifest);
226
- spinner.succeed(`Results saved: ${path}`);
612
+ if (runInParallel) {
613
+ console.log(
614
+ `Mode: ${chalk.cyan('parallel')} ${chalk.dim(`(${parallelLimit} concurrent)`)}`
615
+ );
616
+ }
617
+ console.log();
618
+
619
+ // List failed scenarios
620
+ const failedResults = results.filter((r) => !r.success);
621
+ if (failedResults.length > 0) {
622
+ console.log(chalk.red('Failed scenarios:'));
623
+ for (const result of failedResults) {
624
+ console.log(chalk.red(` ${icons.failed} ${result.scenarioName}`));
625
+ if (result.error && options.verbose) {
626
+ console.log(chalk.dim(` ${result.error}`));
627
+ }
628
+ }
629
+ console.log();
630
+ }
227
631
  }
228
632
 
229
- // Exit with error if any tests failed
230
- if (!result.success) {
633
+ // Exit with error if any scenarios failed
634
+ const hasFailures = results.some((r) => !r.success);
635
+ if (hasFailures) {
231
636
  process.exit(1);
232
637
  }
233
638
  } catch (error) {