npm - @artemiskit/cli - Versions diffs - 0.2.2 → 0.2.3 - Mend

@artemiskit/cli 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +41 -0
package/dist/index.js +751 -342
package/dist/src/commands/history.d.ts.map +1 -1
package/dist/src/commands/redteam.d.ts.map +1 -1
package/dist/src/commands/run.d.ts.map +1 -1
package/dist/src/commands/stress.d.ts.map +1 -1
package/package.json +6 -6
package/src/commands/history.ts +58 -9
package/src/commands/redteam.ts +19 -1
package/src/commands/run.ts +113 -3
package/src/commands/stress.ts +28 -0

package/dist/src/commands/history.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;~~AAGH~~,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;~~AA8FpC~~,wBAAgB,cAAc,IAAI,OAAO,~~CAiFxC~~"}
1	+ {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA4IpC,wBAAgB,cAAc,IAAI,OAAO,CAmFxC"}

package/dist/src/commands/redteam.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;~~AAkCH~~,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;~~AAkCpC~~,wBAAgB,cAAc,IAAI,OAAO,~~CA6bxC~~"}
1	+ {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAsCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAoCpC,wBAAgB,cAAc,IAAI,OAAO,CAycxC"}

package/dist/src/commands/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;~~AAaH~~,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;~~AA2fpC~~,wBAAgB,UAAU,IAAI,OAAO,~~CAocpC~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAyiBpC,wBAAgB,UAAU,IAAI,OAAO,CAggBpC"}

package/dist/src/commands/stress.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;~~AAmBH~~,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;~~AAmCpC~~,wBAAgB,aAAa,IAAI,OAAO,~~CAuRvC~~"}
1	+ {"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAoBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAsCpC,wBAAgB,aAAa,IAAI,OAAO,CA+SvC"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@artemiskit/cli",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
   "type": "module",
   "license": "Apache-2.0",
@@ -45,11 +45,11 @@
     "test": "bun test"
   },
   "dependencies": {
-    "@artemiskit/adapter-openai": "workspace:*",
-    "@artemiskit/adapter-vercel-ai": "workspace:*",
-    "@artemiskit/core": "workspace:*",
-    "@artemiskit/redteam": "workspace:*",
-    "@artemiskit/reports": "workspace:*",
+    "@artemiskit/adapter-openai": "0.1.10",
+    "@artemiskit/adapter-vercel-ai": "0.1.10",
+    "@artemiskit/core": "0.2.3",
+    "@artemiskit/redteam": "0.2.3",
+    "@artemiskit/reports": "0.2.3",
     "chalk": "^5.3.0",
     "cli-table3": "^0.6.3",
     "commander": "^12.0.0",

package/src/commands/history.ts CHANGED Viewed

@@ -2,6 +2,7 @@
  * History command - View run history
  */
+import { formatCost } from '@artemiskit/core';
 import chalk from 'chalk';
 import { Command } from 'commander';
 import { loadConfig } from '../config/loader.js';
@@ -13,6 +14,7 @@ interface HistoryOptions {
   scenario?: string;
   limit?: number;
   config?: string;
+  showCost?: boolean;
 }
 function renderHistoryTable(
@@ -21,16 +23,20 @@ function renderHistoryTable(
     scenario: string;
     successRate: number;
     createdAt: string;
-  }>
+    estimatedCostUsd?: number;
+  }>,
+  showCost = false
 ): string {
   // Column widths
   const runIdWidth = 16;
-  const scenarioWidth = 30;
+  const scenarioWidth = showCost ? 25 : 30;
   const rateWidth = 12;
   const dateWidth = 20;
+  const costWidth = 10;
-  // Total width = borders(4) + columns + spacing(3 spaces between 4 columns)
-  const width = 2 + runIdWidth + 1 + scenarioWidth + 1 + rateWidth + 1 + dateWidth + 2;
+  // Total width = borders(4) + columns + spacing
+  const baseWidth = 2 + runIdWidth + 1 + scenarioWidth + 1 + rateWidth + 1 + dateWidth + 2;
+  const width = showCost ? baseWidth + costWidth + 1 : baseWidth;
   const border = '═'.repeat(width - 2);
   const formatHeaderRow = () => {
@@ -38,6 +44,10 @@ function renderHistoryTable(
     const scenarioPad = padText('Scenario', scenarioWidth);
     const ratePad = padText('Success Rate', rateWidth, 'right');
     const datePad = padText('Date', dateWidth, 'right');
+    if (showCost) {
+      const costPad = padText('Cost', costWidth, 'right');
+      return `║ ${runIdPad} ${scenarioPad} ${ratePad} ${costPad} ${datePad} ║`;
+    }
     return `║ ${runIdPad} ${scenarioPad} ${ratePad} ${datePad} ║`;
   };
@@ -49,6 +59,8 @@ function renderHistoryTable(
     `╟${'─'.repeat(width - 2)}╢`,
   ];
+  let totalCost = 0;
   for (const run of runs) {
     const rateColor =
       run.successRate >= 0.9 ? chalk.green : run.successRate >= 0.7 ? chalk.yellow : chalk.red;
@@ -70,7 +82,25 @@ function renderHistoryTable(
     const dateStr = `${dateObj.toLocaleDateString()} ${dateObj.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}`;
     const datePad = padText(dateStr, dateWidth, 'right');
-    lines.push(`║ ${runIdPad} ${scenarioPad} ${rateColored} ${datePad} ║`);
+    if (showCost) {
+      const costValue = run.estimatedCostUsd !== undefined ? formatCost(run.estimatedCostUsd) : '-';
+      const costPad = padText(costValue, costWidth, 'right');
+      if (run.estimatedCostUsd !== undefined) {
+        totalCost += run.estimatedCostUsd;
+      }
+      lines.push(`║ ${runIdPad} ${scenarioPad} ${rateColored} ${chalk.dim(costPad)} ${datePad} ║`);
+    } else {
+      lines.push(`║ ${runIdPad} ${scenarioPad} ${rateColored} ${datePad} ║`);
+    }
+  }
+  // Add total cost row if showing costs
+  if (showCost) {
+    lines.push(`╟${'─'.repeat(width - 2)}╢`);
+    const totalLabel = padText('Total', runIdWidth + 1 + scenarioWidth + 1 + rateWidth, 'right');
+    const totalCostStr = padText(formatCost(totalCost), costWidth, 'right');
+    const emptyDate = padText('', dateWidth, 'right');
+    lines.push(`║ ${totalLabel} ${chalk.bold(totalCostStr)} ${emptyDate} ║`);
   }
   lines.push(`╚${border}╝`);
@@ -84,14 +114,31 @@ function renderPlainHistory(
     scenario: string;
     successRate: number;
     createdAt: string;
-  }>
+    estimatedCostUsd?: number;
+  }>,
+  showCost = false
 ): string {
   const lines = ['=== RUN HISTORY ===', ''];
+  let totalCost = 0;
   for (const run of runs) {
     const rate = `${(run.successRate * 100).toFixed(1)}%`;
     const date = new Date(run.createdAt).toLocaleString();
-    lines.push(`${run.runId}  ${run.scenario}  ${rate}  ${date}`);
+    if (showCost) {
+      const cost = run.estimatedCostUsd !== undefined ? formatCost(run.estimatedCostUsd) : '-';
+      if (run.estimatedCostUsd !== undefined) {
+        totalCost += run.estimatedCostUsd;
+      }
+      lines.push(`${run.runId}  ${run.scenario}  ${rate}  ${cost}  ${date}`);
+    } else {
+      lines.push(`${run.runId}  ${run.scenario}  ${rate}  ${date}`);
+    }
+  }
+  if (showCost) {
+    lines.push('');
+    lines.push(`Total: ${formatCost(totalCost)}`);
   }
   return lines.join('\n');
@@ -106,6 +153,7 @@ export function historyCommand(): Command {
     .option('-s, --scenario <scenario>', 'Filter by scenario')
     .option('-l, --limit <number>', 'Limit number of results', '20')
     .option('--config <path>', 'Path to config file')
+    .option('--show-cost', 'Show cost column and total')
     .action(async (options: HistoryOptions) => {
       const spinner = createSpinner('Loading history...');
       spinner.start();
@@ -119,6 +167,7 @@ export function historyCommand(): Command {
           project: options.project,
           scenario: options.scenario,
           limit,
+          includeCost: options.showCost,
         });
         spinner.succeed('Loaded history');
@@ -140,9 +189,9 @@ export function historyCommand(): Command {
         // Show history table
         if (isTTY) {
-          console.log(renderHistoryTable(runs));
+          console.log(renderHistoryTable(runs, options.showCost));
         } else {
-          console.log(renderPlainHistory(runs));
+          console.log(renderPlainHistory(runs, options.showCost));
         }
         console.log();

package/src/commands/redteam.ts CHANGED Viewed

@@ -32,7 +32,11 @@ import {
   UnsafeResponseDetector,
   loadCustomAttacks,
 } from '@artemiskit/redteam';
-import { generateJSONReport, generateRedTeamHTMLReport } from '@artemiskit/reports';
+import {
+  generateJSONReport,
+  generateRedTeamHTMLReport,
+  generateRedTeamMarkdownReport,
+} from '@artemiskit/reports';
 import chalk from 'chalk';
 import { Command } from 'commander';
 import { nanoid } from 'nanoid';
@@ -66,6 +70,8 @@ interface RedteamOptions {
   config?: string;
   redact?: boolean;
   redactPatterns?: string[];
+  export?: 'markdown';
+  exportOutput?: string;
 }
 export function redteamCommand(): Command {
@@ -91,6 +97,8 @@ export function redteamCommand(): Command {
       '--redact-patterns <patterns...>',
       'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
     )
+    .option('--export <format>', 'Export results to format (markdown)')
+    .option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
     .action(async (scenarioPath: string, options: RedteamOptions) => {
       const spinner = createSpinner('Loading configuration...');
       spinner.start();
@@ -495,6 +503,16 @@ export function redteamCommand(): Command {
           console.log(chalk.dim(`  JSON: ${jsonPath}`));
         }
+        // Export to markdown if requested
+        if (options.export === 'markdown') {
+          const exportDir = options.exportOutput || './artemis-exports';
+          await mkdir(exportDir, { recursive: true });
+          const markdown = generateRedTeamMarkdownReport(manifest);
+          const mdPath = join(exportDir, `${runId}.md`);
+          await writeFile(mdPath, markdown);
+          console.log(chalk.dim(`Exported: ${mdPath}`));
+        }
         // Exit with error if there were unsafe responses
         if (metrics.unsafe_responses > 0) {
           process.exit(1);

package/src/commands/run.ts CHANGED Viewed

@@ -2,16 +2,20 @@
  * Run command - Execute test scenarios
  */
+import { mkdir, writeFile } from 'node:fs/promises';
 import { basename } from 'node:path';
+import { join } from 'node:path';
 import {
   type BaselineStorageAdapter,
   type RedactionConfig,
   type RunManifest,
   createAdapter,
+  formatCost,
   parseScenarioFile,
   resolveScenarioPaths,
   runScenario,
 } from '@artemiskit/core';
+import { generateMarkdownReport } from '@artemiskit/reports';
 import chalk from 'chalk';
 import { Command } from 'commander';
 import { loadConfig } from '../config/loader.js';
@@ -62,6 +66,12 @@ interface RunOptions {
   baseline?: boolean;
   /** Regression threshold (0-1), default 0.05 (5%) */
   threshold?: number;
+  /** Budget limit in USD - fail if cost exceeds this */
+  budget?: number;
+  /** Export format: markdown */
+  export?: 'markdown';
+  /** Output directory for exports */
+  exportOutput?: string;
 }
 interface ScenarioRunResult {
@@ -103,6 +113,15 @@ interface CISummary {
     totalMs: number;
     formatted: string;
   };
+  tokens: {
+    prompt: number;
+    completion: number;
+    total: number;
+  };
+  cost: {
+    estimatedUsd: number;
+    formatted: string;
+  };
   runs: Array<{
     runId: string;
     scenario: string;
@@ -112,6 +131,7 @@ interface CISummary {
     failedCases: number;
     totalCases: number;
     durationMs: number;
+    estimatedCostUsd?: number;
   }>;
   baseline?: {
     compared: boolean;
@@ -123,6 +143,11 @@ interface CISummary {
       tokens: number;
     };
   };
+  budget?: {
+    limit: number;
+    exceeded: boolean;
+    overBy: number;
+  };
 }
 /**
@@ -167,6 +192,21 @@ function buildCISummary(results: ScenarioRunResult[]): CISummary {
   const failedCases = results.reduce((sum, r) => sum + (r.manifest.metrics?.failed_cases || 0), 0);
   const totalDuration = results.reduce((sum, r) => sum + (r.manifest.duration_ms || 0), 0);
+  // Aggregate token and cost metrics
+  const totalPromptTokens = results.reduce(
+    (sum, r) => sum + (r.manifest.metrics?.total_prompt_tokens || 0),
+    0
+  );
+  const totalCompletionTokens = results.reduce(
+    (sum, r) => sum + (r.manifest.metrics?.total_completion_tokens || 0),
+    0
+  );
+  const totalTokens = results.reduce((sum, r) => sum + (r.manifest.metrics?.total_tokens || 0), 0);
+  const totalCostUsd = results.reduce(
+    (sum, r) => sum + (r.manifest.metrics?.cost?.total_usd || 0),
+    0
+  );
   return {
     success: failedScenarios === 0,
     scenarios: {
@@ -184,6 +224,15 @@ function buildCISummary(results: ScenarioRunResult[]): CISummary {
       totalMs: totalDuration,
       formatted: formatDuration(totalDuration),
     },
+    tokens: {
+      prompt: totalPromptTokens,
+      completion: totalCompletionTokens,
+      total: totalTokens,
+    },
+    cost: {
+      estimatedUsd: totalCostUsd,
+      formatted: formatCost(totalCostUsd),
+    },
     runs: results.map((r) => ({
       runId: r.manifest.run_id || '',
       scenario: r.scenarioName,
@@ -193,6 +242,7 @@ function buildCISummary(results: ScenarioRunResult[]): CISummary {
       failedCases: r.manifest.metrics?.failed_cases || 0,
       totalCases: r.manifest.metrics?.total_cases || 0,
       durationMs: r.manifest.duration_ms || 0,
+      estimatedCostUsd: r.manifest.metrics?.cost?.total_usd,
     })),
   };
 }
@@ -556,6 +606,9 @@ export function runCommand(): Command {
     )
     .option('--baseline', 'Compare against baseline and detect regression')
     .option('--threshold <number>', 'Regression threshold (0-1), e.g., 0.05 for 5%', '0.05')
+    .option('--budget <amount>', 'Maximum budget in USD - fail if estimated cost exceeds this')
+    .option('--export <format>', 'Export format: markdown')
+    .option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
     .action(async (scenarioPath: string | undefined, options: RunOptions) => {
       // Determine CI mode: explicit flag, environment variable, or summary format that implies CI
       const isCIMode =
@@ -741,9 +794,12 @@ export function runCommand(): Command {
               // Show additional metrics
               console.log();
+              const costInfo = result.manifest.metrics.cost
+                ? `  |  Est. Cost: ${formatCost(result.manifest.metrics.cost.total_usd)}`
+                : '';
               console.log(
                 chalk.dim(
-                  `Run ID: ${result.manifest.run_id}  |  Median Latency: ${result.manifest.metrics.median_latency_ms}ms  |  Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}`
+                  `Run ID: ${result.manifest.run_id}  |  Median Latency: ${result.manifest.metrics.median_latency_ms}ms  |  Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}${costInfo}`
                 )
               );
@@ -762,6 +818,16 @@ export function runCommand(): Command {
                 const savedPath = await storage.save(result.manifest);
                 console.log(chalk.dim(`Saved: ${savedPath}`));
               }
+              // Export to markdown if requested
+              if (options.export === 'markdown') {
+                const exportDir = options.exportOutput || './artemis-exports';
+                await mkdir(exportDir, { recursive: true });
+                const markdown = generateMarkdownReport(result.manifest);
+                const mdPath = join(exportDir, `${result.manifest.run_id}.md`);
+                await writeFile(mdPath, markdown);
+                console.log(chalk.dim(`Exported: ${mdPath}`));
+              }
             } catch (error) {
               // Record failed scenario
               console.log();
@@ -860,6 +926,8 @@ export function runCommand(): Command {
             console.log(`ARTEMISKIT_CASES_FAILED=${failedCases}`);
             console.log(`ARTEMISKIT_SUCCESS_RATE=${successRate}`);
             console.log(`ARTEMISKIT_DURATION_MS=${ciSummary.duration.totalMs}`);
+            console.log(`ARTEMISKIT_TOKENS_TOTAL=${ciSummary.tokens.total}`);
+            console.log(`ARTEMISKIT_COST_USD=${ciSummary.cost.estimatedUsd.toFixed(4)}`);
             if (baselineResult) {
               console.log('ARTEMISKIT_BASELINE_COMPARED=true');
@@ -945,11 +1013,53 @@ export function runCommand(): Command {
           }
         }
-        // Exit with error if any scenarios failed or regression detected
+        // Check budget if specified
+        let budgetExceeded = false;
+        if (options.budget !== undefined) {
+          const budgetLimit = Number.parseFloat(String(options.budget));
+          const totalCost = ciSummary.cost.estimatedUsd;
+          if (totalCost > budgetLimit) {
+            budgetExceeded = true;
+            const overBy = totalCost - budgetLimit;
+            // Add budget info to CI summary
+            ciSummary.budget = {
+              limit: budgetLimit,
+              exceeded: true,
+              overBy,
+            };
+            if (isCIMode) {
+              if (options.summary === 'json') {
+                // Budget info already in ciSummary, will be output above
+              } else {
+                console.log(`ARTEMISKIT_BUDGET_LIMIT=${budgetLimit.toFixed(2)}`);
+                console.log('ARTEMISKIT_BUDGET_EXCEEDED=true');
+                console.log(`ARTEMISKIT_BUDGET_OVER_BY=${overBy.toFixed(4)}`);
+              }
+            } else {
+              console.log();
+              console.log(chalk.red(`${icons.failed} BUDGET EXCEEDED`));
+              console.log(
+                chalk.red(
+                  `   Budget: $${budgetLimit.toFixed(2)}  |  Actual: ${formatCost(totalCost)}  |  Over by: ${formatCost(overBy)}`
+                )
+              );
+              console.log();
+            }
+          } else if (!isCIMode) {
+            console.log(
+              `${icons.passed} ${chalk.green('Within budget')} ${chalk.dim(`($${budgetLimit.toFixed(2)} limit, ${formatCost(totalCost)} used)`)}`
+            );
+          }
+        }
+        // Exit with error if any scenarios failed, regression detected, or budget exceeded
         const hasFailures = results.some((r) => !r.success);
         const hasRegression = baselineResult?.hasRegression || false;
-        if (hasFailures || hasRegression) {
+        if (hasFailures || hasRegression || budgetExceeded) {
           process.exit(1);
         }
       } catch (error) {

package/src/commands/stress.ts CHANGED Viewed

@@ -13,6 +13,7 @@ import {
   type StressRequestResult,
   createAdapter,
   estimateCost,
+  formatCost,
   getGitInfo,
   getModelPricing,
   parseScenarioFile,
@@ -26,6 +27,7 @@ import {
   colors,
   createSpinner,
   getProviderErrorContext,
+  icons,
   isTTY,
   renderError,
   renderInfoBox,
@@ -52,6 +54,8 @@ interface StressOptions {
   config?: string;
   redact?: boolean;
   redactPatterns?: string[];
+  /** Budget limit in USD - fail if cost exceeds this */
+  budget?: number;
 }
 export function stressCommand(): Command {
@@ -75,6 +79,7 @@ export function stressCommand(): Command {
       '--redact-patterns <patterns...>',
       'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
     )
+    .option('--budget <amount>', 'Maximum budget in USD - fail if estimated cost exceeds this')
     .action(async (scenarioPath: string, options: StressOptions) => {
       const spinner = createSpinner('Loading configuration...');
       spinner.start();
@@ -319,6 +324,29 @@ export function stressCommand(): Command {
           console.log(chalk.dim(`  HTML: ${htmlPath}`));
           console.log(chalk.dim(`  JSON: ${jsonPath}`));
         }
+        // Check budget if specified
+        if (options.budget !== undefined && metrics.cost) {
+          const budgetLimit = Number.parseFloat(String(options.budget));
+          const totalCost = metrics.cost.estimated_total_usd;
+          if (totalCost > budgetLimit) {
+            const overBy = totalCost - budgetLimit;
+            console.log();
+            console.log(chalk.red(`${icons.failed} BUDGET EXCEEDED`));
+            console.log(
+              chalk.red(
+                `   Budget: $${budgetLimit.toFixed(2)}  |  Actual: ${formatCost(totalCost)}  |  Over by: ${formatCost(overBy)}`
+              )
+            );
+            process.exit(1);
+          } else {
+            console.log();
+            console.log(
+              `${icons.passed} ${chalk.green('Within budget')} ${chalk.dim(`($${budgetLimit.toFixed(2)} limit, ${formatCost(totalCost)} used)`)}`
+            );
+          }
+        }
       } catch (error) {
         spinner.fail('Error');