npm - @artemiskit/cli - Versions diffs - 0.1.8 → 0.2.0 - Mend

@artemiskit/cli 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +83 -0
package/bin/artemis.ts +0 -0
package/dist/index.js +70637 -33387
package/dist/src/commands/compare.d.ts.map +1 -1
package/dist/src/commands/init.d.ts.map +1 -1
package/dist/src/commands/redteam.d.ts.map +1 -1
package/dist/src/commands/run.d.ts.map +1 -1
package/dist/src/commands/stress.d.ts.map +1 -1
package/dist/src/config/loader.d.ts +3 -1
package/dist/src/config/loader.d.ts.map +1 -1
package/dist/src/config/schema.d.ts +8 -0
package/dist/src/config/schema.d.ts.map +1 -1
package/dist/src/ui/index.d.ts +3 -1
package/dist/src/ui/index.d.ts.map +1 -1
package/dist/src/ui/panels.d.ts +21 -0
package/dist/src/ui/panels.d.ts.map +1 -1
package/dist/src/ui/prompts.d.ts +92 -0
package/dist/src/ui/prompts.d.ts.map +1 -0
package/dist/src/utils/adapter.d.ts.map +1 -1
package/package.json +6 -6
package/src/commands/compare.ts +25 -0
package/src/commands/init.ts +173 -69
package/src/commands/redteam.ts +63 -10
package/src/commands/run.ts +542 -137
package/src/commands/stress.ts +76 -3
package/src/config/loader.ts +5 -2
package/src/config/schema.ts +1 -0
package/src/ui/index.ts +19 -0
package/src/ui/panels.ts +153 -5
package/src/ui/prompts.ts +749 -0
package/src/utils/adapter.ts +8 -0

package/src/commands/run.ts CHANGED Viewed

@@ -2,23 +2,32 @@
  * Run command - Execute test scenarios
  */
+import { basename } from 'node:path';
 import {
   type RedactionConfig,
+  type RunManifest,
   createAdapter,
   parseScenarioFile,
+  resolveScenarioPaths,
   runScenario,
 } from '@artemiskit/core';
 import chalk from 'chalk';
 import { Command } from 'commander';
 import { loadConfig } from '../config/loader.js';
+import type { ArtemisConfig } from '../config/schema.js';
 import {
   createSpinner,
   formatDuration,
   getProviderErrorContext,
   icons,
+  isInteractive,
   isTTY,
   padText,
+  promptModel,
+  promptProvider,
+  promptScenarios,
   renderError,
+  renderFailureReason,
   renderProgressBar,
   renderSummaryPanel,
 } from '../ui/index.js';
@@ -42,21 +51,322 @@ interface RunOptions {
   config?: string;
   redact?: boolean;
   redactPatterns?: string[];
+  parallel?: number;
+  interactive?: boolean;
+}
+interface ScenarioRunResult {
+  scenarioPath: string;
+  scenarioName: string;
+  success: boolean;
+  manifest: RunManifest;
+  error?: string;
+}
+/**
+ * Run a single scenario and return the result (quiet mode for parallel execution)
+ */
+async function runSingleScenarioQuiet(
+  scenarioPath: string,
+  options: RunOptions,
+  config: ArtemisConfig | null
+): Promise<ScenarioRunResult> {
+  // Parse scenario
+  const scenario = await parseScenarioFile(scenarioPath);
+  // Resolve provider and model with precedence and source tracking:
+  // CLI > Scenario > Config > Default
+  const { provider, source: providerSource } = resolveProviderWithSource(
+    options.provider,
+    scenario.provider,
+    config?.provider
+  );
+  const { model, source: modelSource } = resolveModelWithSource(
+    options.model,
+    scenario.model,
+    config?.model
+  );
+  // Build adapter config with full precedence chain and source tracking
+  const { adapterConfig, resolvedConfig } = buildAdapterConfig({
+    provider,
+    model,
+    providerSource,
+    modelSource,
+    scenarioConfig: scenario.providerConfig,
+    fileConfig: config,
+  });
+  const client = await createAdapter(adapterConfig);
+  // Build redaction config from CLI options
+  let redaction: RedactionConfig | undefined;
+  if (options.redact) {
+    redaction = {
+      enabled: true,
+      patterns: options.redactPatterns,
+      redactPrompts: true,
+      redactResponses: true,
+      redactMetadata: false,
+      replacement: '[REDACTED]',
+    };
+  }
+  // Run scenario using core runner (no callbacks in quiet mode)
+  const result = await runScenario({
+    scenario,
+    client,
+    project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
+    resolvedConfig,
+    tags: options.tags,
+    concurrency: Number.parseInt(String(options.concurrency)) || 1,
+    timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
+    retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
+    redaction,
+  });
+  return {
+    scenarioPath,
+    scenarioName: scenario.name,
+    success: result.success,
+    manifest: result.manifest,
+  };
+}
+/**
+ * Run a single scenario and return the result (verbose mode for sequential execution)
+ */
+async function runSingleScenario(
+  scenarioPath: string,
+  options: RunOptions,
+  config: ArtemisConfig | null,
+  spinner: ReturnType<typeof createSpinner>,
+  isMultiScenario: boolean
+): Promise<ScenarioRunResult> {
+  // Parse scenario
+  const scenario = await parseScenarioFile(scenarioPath);
+  if (isMultiScenario) {
+    console.log();
+    console.log(chalk.bold.cyan(`━━━ ${scenario.name} ━━━`));
+    console.log(chalk.dim(`File: ${basename(scenarioPath)}`));
+    console.log();
+  }
+  // Resolve provider and model with precedence and source tracking:
+  // CLI > Scenario > Config > Default
+  const { provider, source: providerSource } = resolveProviderWithSource(
+    options.provider,
+    scenario.provider,
+    config?.provider
+  );
+  const { model, source: modelSource } = resolveModelWithSource(
+    options.model,
+    scenario.model,
+    config?.model
+  );
+  // Build adapter config with full precedence chain and source tracking
+  if (!isMultiScenario) {
+    spinner.start(`Connecting to ${provider}...`);
+  }
+  const { adapterConfig, resolvedConfig } = buildAdapterConfig({
+    provider,
+    model,
+    providerSource,
+    modelSource,
+    scenarioConfig: scenario.providerConfig,
+    fileConfig: config,
+  });
+  const client = await createAdapter(adapterConfig);
+  if (!isMultiScenario) {
+    spinner.succeed(`Connected to ${provider}`);
+    console.log();
+    console.log(chalk.bold(`Running scenario: ${scenario.name}`));
+    console.log();
+  }
+  // Build redaction config from CLI options
+  let redaction: RedactionConfig | undefined;
+  if (options.redact) {
+    redaction = {
+      enabled: true,
+      patterns: options.redactPatterns,
+      redactPrompts: true,
+      redactResponses: true,
+      redactMetadata: false,
+      replacement: '[REDACTED]',
+    };
+    if (!isMultiScenario) {
+      console.log(
+        chalk.dim(
+          `Redaction enabled${options.redactPatterns ? ` with patterns: ${options.redactPatterns.join(', ')}` : ' (default patterns)'}`
+        )
+      );
+      console.log();
+    }
+  }
+  // Track progress
+  const totalCases = scenario.cases.length;
+  let completedCases = 0;
+  // Calculate max widths for alignment
+  const maxIdLength = Math.max(...scenario.cases.map((c) => c.id.length));
+  const maxScoreLength = 6; // "(100%)"
+  const maxDurationLength = 6; // "10.0s" or "999ms"
+  // Run scenario using core runner
+  const result = await runScenario({
+    scenario,
+    client,
+    project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
+    resolvedConfig,
+    tags: options.tags,
+    concurrency: Number.parseInt(String(options.concurrency)) || 1,
+    timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
+    retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
+    redaction,
+    onCaseComplete: (caseResult) => {
+      completedCases++;
+      const statusIcon = caseResult.ok ? icons.passed : icons.failed;
+      const scoreStr = `(${(caseResult.score * 100).toFixed(0)}%)`;
+      const durationStr = caseResult.latencyMs ? formatDuration(caseResult.latencyMs) : '';
+      // Pad columns for alignment
+      const paddedId = padText(caseResult.id, maxIdLength);
+      const paddedScore = padText(scoreStr, maxScoreLength, 'right');
+      const paddedDuration = padText(durationStr, maxDurationLength, 'right');
+      // Show result - with progress bar in TTY, simple format in CI/CD
+      if (isTTY) {
+        const progressBar = renderProgressBar(completedCases, totalCases, { width: 15 });
+        console.log(
+          `${statusIcon} ${paddedId}  ${chalk.dim(paddedScore)}  ${chalk.dim(paddedDuration)}  ${progressBar}`
+        );
+      } else {
+        // CI/CD friendly output - no progress bar, just count
+        console.log(
+          `${statusIcon} ${paddedId}  ${chalk.dim(paddedScore)}  ${chalk.dim(paddedDuration)}  [${completedCases}/${totalCases}]`
+        );
+      }
+      if (!caseResult.ok && options.verbose && caseResult.reason) {
+        console.log(
+          renderFailureReason(caseResult.reason, { matcherType: caseResult.matcherType })
+        );
+      }
+    },
+    onProgress: (message) => {
+      if (options.verbose) {
+        console.log(chalk.dim(message));
+      }
+    },
+  });
+  return {
+    scenarioPath,
+    scenarioName: scenario.name,
+    success: result.success,
+    manifest: result.manifest,
+  };
+}
+/**
+ * Run scenarios in parallel with a concurrency limit
+ */
+async function runScenariosInParallel(
+  scenarioPaths: string[],
+  options: RunOptions,
+  config: ArtemisConfig | null,
+  parallelLimit: number,
+  storage: ReturnType<typeof createStorage>
+): Promise<ScenarioRunResult[]> {
+  const results: ScenarioRunResult[] = [];
+  let completedCount = 0;
+  const totalCount = scenarioPaths.length;
+  // Create a queue of scenario paths
+  const queue = [...scenarioPaths];
+  const inProgress = new Set<Promise<void>>();
+  // Progress display function
+  const updateProgress = (scenarioName: string, success: boolean) => {
+    completedCount++;
+    const icon = success ? icons.passed : icons.failed;
+    const status = success ? chalk.green('passed') : chalk.red('failed');
+    if (isTTY) {
+      const progressBar = renderProgressBar(completedCount, totalCount, { width: 20 });
+      console.log(`${icon} ${scenarioName}  ${status}  ${progressBar}`);
+    } else {
+      console.log(`${icon} ${scenarioName}  ${status}  [${completedCount}/${totalCount}]`);
+    }
+  };
+  // Process a single scenario
+  const processScenario = async (path: string): Promise<void> => {
+    try {
+      const result = await runSingleScenarioQuiet(path, options, config);
+      results.push(result);
+      updateProgress(result.scenarioName, result.success);
+      // Save results if enabled
+      if (options.save && result.manifest.run_id) {
+        await storage.save(result.manifest);
+      }
+    } catch (error) {
+      const scenarioName = basename(path);
+      results.push({
+        scenarioPath: path,
+        scenarioName,
+        success: false,
+        manifest: {} as RunManifest,
+        error: (error as Error).message,
+      });
+      updateProgress(scenarioName, false);
+    }
+  };
+  // Run with concurrency limit
+  while (queue.length > 0 || inProgress.size > 0) {
+    // Start new tasks up to the limit
+    while (queue.length > 0 && inProgress.size < parallelLimit) {
+      const path = queue.shift()!;
+      const promise = processScenario(path).then(() => {
+        inProgress.delete(promise);
+      });
+      inProgress.add(promise);
+    }
+    // Wait for at least one task to complete
+    if (inProgress.size > 0) {
+      await Promise.race(inProgress);
+    }
+  }
+  return results;
 }
 export function runCommand(): Command {
   const cmd = new Command('run');
   cmd
-    .description('Run test scenarios against an LLM')
-    .argument('<scenario>', 'Path to scenario YAML file')
+    .description(
+      'Run test scenarios against an LLM. Accepts a file path, directory, or glob pattern.'
+    )
+    .argument(
+      '[scenario]',
+      'Path to scenario file, directory, or glob pattern (e.g., scenarios/**/*.yaml)'
+    )
     .option('-p, --provider <provider>', 'Provider to use (openai, azure-openai, vercel-ai)')
     .option('-m, --model <model>', 'Model to use')
     .option('-o, --output <dir>', 'Output directory for results')
     .option('-v, --verbose', 'Verbose output')
     .option('-t, --tags <tags...>', 'Filter test cases by tags')
     .option('--save', 'Save results to storage', true)
-    .option('-c, --concurrency <number>', 'Number of concurrent test cases', '1')
+    .option('-c, --concurrency <number>', 'Number of concurrent test cases per scenario', '1')
+    .option('--parallel <number>', 'Number of scenarios to run in parallel (default: sequential)')
     .option('--timeout <ms>', 'Timeout per test case in milliseconds')
     .option('--retries <number>', 'Number of retries per test case')
     .option('--config <path>', 'Path to config file')
@@ -65,7 +375,8 @@ export function runCommand(): Command {
       '--redact-patterns <patterns...>',
       'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
     )
-    .action(async (scenarioPath: string, options: RunOptions) => {
+    .option('-i, --interactive', 'Enable interactive mode for scenario/provider selection')
+    .action(async (scenarioPath: string | undefined, options: RunOptions) => {
       const spinner = createSpinner('Loading configuration...');
       spinner.start();
@@ -73,161 +384,255 @@ export function runCommand(): Command {
         // Load config file if present
         const config = await loadConfig(options.config);
         if (config) {
-          spinner.succeed(`Loaded config from ${(config as { _path?: string })._path}`);
+          spinner.succeed(`Loaded config from ${config._path}`);
         } else {
           spinner.info('No config file found, using defaults');
         }
-        // Parse scenario
-        spinner.start('Loading scenario...');
-        const scenario = await parseScenarioFile(scenarioPath);
-        spinner.succeed(`Loaded scenario: ${scenario.name}`);
-        // Resolve provider and model with precedence and source tracking:
-        // CLI > Scenario > Config > Default
-        const { provider, source: providerSource } = resolveProviderWithSource(
-          options.provider,
-          scenario.provider,
-          config?.provider
-        );
-        const { model, source: modelSource } = resolveModelWithSource(
-          options.model,
-          scenario.model,
-          config?.model
-        );
+        // Determine if we should use interactive mode
+        const useInteractive = options.interactive || (!scenarioPath && isInteractive());
-        // Build adapter config with full precedence chain and source tracking
-        spinner.start(`Connecting to ${provider}...`);
-        const { adapterConfig, resolvedConfig } = buildAdapterConfig({
-          provider,
-          model,
-          providerSource,
-          modelSource,
-          scenarioConfig: scenario.providerConfig,
-          fileConfig: config,
-        });
-        const client = await createAdapter(adapterConfig);
-        spinner.succeed(`Connected to ${provider}`);
+        // Interactive provider/model selection if requested
+        if (useInteractive && !options.provider) {
+          spinner.stop();
+          console.log(chalk.cyan('\n  Interactive mode enabled\n'));
-        console.log();
-        console.log(chalk.bold(`Running scenario: ${scenario.name}`));
-        console.log();
+          const provider = await promptProvider('Select a provider:');
+          options.provider = provider;
-        // Build redaction config from CLI options
-        let redaction: RedactionConfig | undefined;
-        if (options.redact) {
-          redaction = {
-            enabled: true,
-            patterns: options.redactPatterns,
-            redactPrompts: true,
-            redactResponses: true,
-            redactMetadata: false,
-            replacement: '[REDACTED]',
-          };
-          console.log(
-            chalk.dim(
-              `Redaction enabled${options.redactPatterns ? ` with patterns: ${options.redactPatterns.join(', ')}` : ' (default patterns)'}`
-            )
-          );
+          const model = await promptModel(provider, 'Select a model:');
+          options.model = model;
+          console.log(''); // spacing
+          spinner.start('Discovering scenarios...');
+        }
+        // If no scenario path provided, try to find scenarios or prompt
+        let resolvedScenarioPath = scenarioPath;
+        if (!resolvedScenarioPath) {
+          // Try default scenarios directory
+          const defaultPath = config?.scenariosDir || './scenarios';
+          spinner.start(`Looking for scenarios in ${defaultPath}...`);
+          try {
+            const defaultScenarios = await resolveScenarioPaths(defaultPath);
+            if (defaultScenarios.length > 0) {
+              spinner.stop();
+              if (useInteractive) {
+                // Let user select which scenarios to run
+                const scenarioChoices = await Promise.all(
+                  defaultScenarios.map(async (path) => {
+                    try {
+                      const scenario = await parseScenarioFile(path);
+                      return { path, name: scenario.name || basename(path) };
+                    } catch {
+                      return { path, name: basename(path) };
+                    }
+                  })
+                );
+                const selectedPaths = await promptScenarios(
+                  scenarioChoices,
+                  'Select scenarios to run:'
+                );
+                if (selectedPaths.length === 0) {
+                  console.log(chalk.yellow('\nNo scenarios selected. Exiting.'));
+                  process.exit(0);
+                }
+                // Use the first selected scenario or create a temp pattern
+                resolvedScenarioPath =
+                  selectedPaths.length === 1 ? selectedPaths[0] : `{${selectedPaths.join(',')}}`;
+                console.log(''); // spacing
+                spinner.start('Preparing scenarios...');
+              } else {
+                spinner.succeed(`Found ${defaultScenarios.length} scenarios in ${defaultPath}`);
+                resolvedScenarioPath = defaultPath;
+              }
+            } else {
+              spinner.fail(`No scenarios found in ${defaultPath}`);
+              console.log();
+              console.log(chalk.yellow('Please provide a scenario path:'));
+              console.log(chalk.dim('  artemiskit run <path-to-scenario.yaml>'));
+              console.log(chalk.dim('  artemiskit run scenarios/'));
+              console.log(chalk.dim('  artemiskit run "scenarios/**/*.yaml"'));
+              process.exit(1);
+            }
+          } catch {
+            spinner.fail('No scenario path provided');
+            console.log();
+            console.log(chalk.yellow('Usage: artemiskit run <scenario>'));
+            console.log(chalk.dim('  <scenario> can be a file, directory, or glob pattern'));
+            process.exit(1);
+          }
+        }
+        // Resolve scenario paths (handles files, directories, and globs)
+        spinner.start('Discovering scenarios...');
+        const scenarioPaths = await resolveScenarioPaths(resolvedScenarioPath);
+        if (scenarioPaths.length === 0) {
+          spinner.fail('No scenario files found');
           console.log();
+          console.log(chalk.yellow(`No .yaml or .yml files found matching: ${scenarioPath}`));
+          console.log(chalk.dim('Make sure the path exists and contains valid scenario files.'));
+          process.exit(1);
         }
-        // Track progress
-        const totalCases = scenario.cases.length;
-        let completedCases = 0;
-        // Calculate max widths for alignment
-        const maxIdLength = Math.max(...scenario.cases.map((c) => c.id.length));
-        const maxScoreLength = 6; // "(100%)"
-        const maxDurationLength = 6; // "10.0s" or "999ms"
-        // Run scenario using core runner
-        const result = await runScenario({
-          scenario,
-          client,
-          project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
-          resolvedConfig,
-          tags: options.tags,
-          concurrency: Number.parseInt(String(options.concurrency)) || 1,
-          timeout: options.timeout ? Number.parseInt(String(options.timeout)) : undefined,
-          retries: options.retries ? Number.parseInt(String(options.retries)) : undefined,
-          redaction,
-          onCaseComplete: (caseResult) => {
-            completedCases++;
-            const statusIcon = caseResult.ok ? icons.passed : icons.failed;
-            const scoreStr = `(${(caseResult.score * 100).toFixed(0)}%)`;
-            const durationStr = caseResult.latencyMs ? formatDuration(caseResult.latencyMs) : '';
-            // Pad columns for alignment
-            const paddedId = padText(caseResult.id, maxIdLength);
-            const paddedScore = padText(scoreStr, maxScoreLength, 'right');
-            const paddedDuration = padText(durationStr, maxDurationLength, 'right');
-            // Show result - with progress bar in TTY, simple format in CI/CD
-            if (isTTY) {
-              const progressBar = renderProgressBar(completedCases, totalCases, { width: 15 });
-              console.log(
-                `${statusIcon} ${paddedId}  ${chalk.dim(paddedScore)}  ${chalk.dim(paddedDuration)}  ${progressBar}`
+        const isMultiScenario = scenarioPaths.length > 1;
+        const parallelLimit = options.parallel ? Number.parseInt(String(options.parallel)) : 0;
+        const runInParallel = parallelLimit > 0 && isMultiScenario;
+        if (isMultiScenario) {
+          const modeStr = runInParallel
+            ? chalk.cyan(`parallel (${parallelLimit} concurrent)`)
+            : chalk.dim('sequential');
+          spinner.succeed(`Found ${scenarioPaths.length} scenario files`);
+          console.log();
+          console.log(chalk.bold(`Running ${scenarioPaths.length} scenarios ${modeStr}...`));
+          console.log();
+        } else {
+          spinner.succeed('Loaded scenario file');
+        }
+        // Run all scenarios
+        const storage = createStorage({ fileConfig: config });
+        let results: ScenarioRunResult[];
+        if (runInParallel) {
+          // Parallel execution
+          results = await runScenariosInParallel(
+            scenarioPaths,
+            options,
+            config,
+            parallelLimit,
+            storage
+          );
+        } else {
+          // Sequential execution
+          results = [];
+          for (const path of scenarioPaths) {
+            try {
+              const result = await runSingleScenario(
+                path,
+                options,
+                config,
+                spinner,
+                isMultiScenario
               );
-            } else {
-              // CI/CD friendly output - no progress bar, just count
+              results.push(result);
+              // Display per-scenario summary
+              const summaryData = {
+                passed: result.manifest.metrics.passed_cases,
+                failed: result.manifest.metrics.failed_cases,
+                skipped: 0,
+                successRate: result.manifest.metrics.success_rate * 100,
+                duration: result.manifest.duration_ms,
+                title: isMultiScenario ? result.scenarioName.toUpperCase() : 'TEST RESULTS',
+              };
+              console.log();
+              console.log(renderSummaryPanel(summaryData));
+              // Show additional metrics
+              console.log();
               console.log(
-                `${statusIcon} ${paddedId}  ${chalk.dim(paddedScore)}  ${chalk.dim(paddedDuration)}  [${completedCases}/${totalCases}]`
+                chalk.dim(
+                  `Run ID: ${result.manifest.run_id}  |  Median Latency: ${result.manifest.metrics.median_latency_ms}ms  |  Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}`
+                )
               );
-            }
-            if (!caseResult.ok && options.verbose) {
-              console.log(chalk.dim(`   Reason: ${caseResult.reason}`));
-            }
-          },
-          onProgress: (message) => {
-            if (options.verbose) {
-              console.log(chalk.dim(message));
+              // Show redaction info if enabled
+              if (result.manifest.redaction?.enabled) {
+                const r = result.manifest.redaction;
+                console.log(
+                  chalk.dim(
+                    `Redactions: ${r.summary.totalRedactions} (${r.summary.promptsRedacted} prompts, ${r.summary.responsesRedacted} responses)`
+                  )
+                );
+              }
+              // Save results
+              if (options.save) {
+                const savedPath = await storage.save(result.manifest);
+                console.log(chalk.dim(`Saved: ${savedPath}`));
+              }
+            } catch (error) {
+              // Record failed scenario
+              console.log();
+              console.log(chalk.red(`${icons.failed} Failed to run: ${basename(path)}`));
+              if (options.verbose) {
+                console.log(chalk.dim((error as Error).message));
+              }
+              results.push({
+                scenarioPath: path,
+                scenarioName: basename(path),
+                success: false,
+                manifest: {} as RunManifest,
+              });
             }
-          },
-        });
+          }
+        }
-        // Display summary using enhanced panel
-        console.log();
-        const summaryData = {
-          passed: result.manifest.metrics.passed_cases,
-          failed: result.manifest.metrics.failed_cases,
-          skipped: 0,
-          successRate: result.manifest.metrics.success_rate * 100,
-          duration: result.manifest.duration_ms,
-          title: 'TEST RESULTS',
-        };
-        console.log(renderSummaryPanel(summaryData));
-        // Show additional metrics
-        console.log();
-        console.log(
-          chalk.dim(
-            `Run ID: ${result.manifest.run_id}  |  Median Latency: ${result.manifest.metrics.median_latency_ms}ms  |  Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}`
-          )
-        );
+        // Display aggregate summary for multiple scenarios
+        if (isMultiScenario) {
+          console.log();
+          console.log(chalk.bold.cyan('━━━ AGGREGATE SUMMARY ━━━'));
+          console.log();
+          const totalScenarios = results.length;
+          const passedScenarios = results.filter((r) => r.success).length;
+          const failedScenarios = totalScenarios - passedScenarios;
+          const totalCases = results.reduce(
+            (sum, r) => sum + (r.manifest.metrics?.total_cases || 0),
+            0
+          );
+          const passedCases = results.reduce(
+            (sum, r) => sum + (r.manifest.metrics?.passed_cases || 0),
+            0
+          );
+          const failedCases = results.reduce(
+            (sum, r) => sum + (r.manifest.metrics?.failed_cases || 0),
+            0
+          );
+          const totalDuration = results.reduce((sum, r) => sum + (r.manifest.duration_ms || 0), 0);
-        // Show redaction info if enabled
-        if (result.manifest.redaction?.enabled) {
-          const r = result.manifest.redaction;
           console.log(
-            chalk.dim(
-              `Redactions: ${r.summary.totalRedactions} (${r.summary.promptsRedacted} prompts, ${r.summary.responsesRedacted} responses)`
-            )
+            `Scenarios:  ${chalk.green(`${passedScenarios} passed`)}  ${failedScenarios > 0 ? chalk.red(`${failedScenarios} failed`) : ''}  ${chalk.dim(`(${totalScenarios} total)`)}`
           );
-        }
+          console.log(
+            `Test Cases: ${chalk.green(`${passedCases} passed`)}  ${failedCases > 0 ? chalk.red(`${failedCases} failed`) : ''}  ${chalk.dim(`(${totalCases} total)`)}`
+          );
+          console.log(`Duration:   ${chalk.dim(formatDuration(totalDuration))}`);
-        // Save results
-        if (options.save) {
-          spinner.start('Saving results...');
-          const storage = createStorage({ fileConfig: config });
-          const path = await storage.save(result.manifest);
-          spinner.succeed(`Results saved: ${path}`);
+          if (runInParallel) {
+            console.log(
+              `Mode:       ${chalk.cyan('parallel')} ${chalk.dim(`(${parallelLimit} concurrent)`)}`
+            );
+          }
+          console.log();
+          // List failed scenarios
+          const failedResults = results.filter((r) => !r.success);
+          if (failedResults.length > 0) {
+            console.log(chalk.red('Failed scenarios:'));
+            for (const result of failedResults) {
+              console.log(chalk.red(`  ${icons.failed} ${result.scenarioName}`));
+              if (result.error && options.verbose) {
+                console.log(chalk.dim(`      ${result.error}`));
+              }
+            }
+            console.log();
+          }
         }
-        // Exit with error if any tests failed
-        if (!result.success) {
+        // Exit with error if any scenarios failed
+        const hasFailures = results.some((r) => !r.success);
+        if (hasFailures) {
           process.exit(1);
         }
       } catch (error) {