@artemiskit/cli 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA8FpC,wBAAgB,cAAc,IAAI,OAAO,CAiFxC"}
1
+ {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA4IpC,wBAAgB,cAAc,IAAI,OAAO,CAmFxC"}
@@ -1 +1 @@
1
- {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAkCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAkCpC,wBAAgB,cAAc,IAAI,OAAO,CA6bxC"}
1
+ {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAsCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAoCpC,wBAAgB,cAAc,IAAI,OAAO,CAycxC"}
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAaH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA2fpC,wBAAgB,UAAU,IAAI,OAAO,CAocpC"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAyiBpC,wBAAgB,UAAU,IAAI,OAAO,CAggBpC"}
@@ -1 +1 @@
1
- {"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAmBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAmCpC,wBAAgB,aAAa,IAAI,OAAO,CAuRvC"}
1
+ {"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAoBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAsCpC,wBAAgB,aAAa,IAAI,OAAO,CA+SvC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/cli",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -45,11 +45,11 @@
45
45
  "test": "bun test"
46
46
  },
47
47
  "dependencies": {
48
- "@artemiskit/adapter-openai": "workspace:*",
49
- "@artemiskit/adapter-vercel-ai": "workspace:*",
50
- "@artemiskit/core": "workspace:*",
51
- "@artemiskit/redteam": "workspace:*",
52
- "@artemiskit/reports": "workspace:*",
48
+ "@artemiskit/adapter-openai": "0.1.10",
49
+ "@artemiskit/adapter-vercel-ai": "0.1.10",
50
+ "@artemiskit/core": "0.2.3",
51
+ "@artemiskit/redteam": "0.2.3",
52
+ "@artemiskit/reports": "0.2.3",
53
53
  "chalk": "^5.3.0",
54
54
  "cli-table3": "^0.6.3",
55
55
  "commander": "^12.0.0",
@@ -2,6 +2,7 @@
2
2
  * History command - View run history
3
3
  */
4
4
 
5
+ import { formatCost } from '@artemiskit/core';
5
6
  import chalk from 'chalk';
6
7
  import { Command } from 'commander';
7
8
  import { loadConfig } from '../config/loader.js';
@@ -13,6 +14,7 @@ interface HistoryOptions {
13
14
  scenario?: string;
14
15
  limit?: number;
15
16
  config?: string;
17
+ showCost?: boolean;
16
18
  }
17
19
 
18
20
  function renderHistoryTable(
@@ -21,16 +23,20 @@ function renderHistoryTable(
21
23
  scenario: string;
22
24
  successRate: number;
23
25
  createdAt: string;
24
- }>
26
+ estimatedCostUsd?: number;
27
+ }>,
28
+ showCost = false
25
29
  ): string {
26
30
  // Column widths
27
31
  const runIdWidth = 16;
28
- const scenarioWidth = 30;
32
+ const scenarioWidth = showCost ? 25 : 30;
29
33
  const rateWidth = 12;
30
34
  const dateWidth = 20;
35
+ const costWidth = 10;
31
36
 
32
- // Total width = borders(4) + columns + spacing(3 spaces between 4 columns)
33
- const width = 2 + runIdWidth + 1 + scenarioWidth + 1 + rateWidth + 1 + dateWidth + 2;
37
+ // Total width = borders(4) + columns + spacing
38
+ const baseWidth = 2 + runIdWidth + 1 + scenarioWidth + 1 + rateWidth + 1 + dateWidth + 2;
39
+ const width = showCost ? baseWidth + costWidth + 1 : baseWidth;
34
40
  const border = '═'.repeat(width - 2);
35
41
 
36
42
  const formatHeaderRow = () => {
@@ -38,6 +44,10 @@ function renderHistoryTable(
38
44
  const scenarioPad = padText('Scenario', scenarioWidth);
39
45
  const ratePad = padText('Success Rate', rateWidth, 'right');
40
46
  const datePad = padText('Date', dateWidth, 'right');
47
+ if (showCost) {
48
+ const costPad = padText('Cost', costWidth, 'right');
49
+ return `║ ${runIdPad} ${scenarioPad} ${ratePad} ${costPad} ${datePad} ║`;
50
+ }
41
51
  return `║ ${runIdPad} ${scenarioPad} ${ratePad} ${datePad} ║`;
42
52
  };
43
53
 
@@ -49,6 +59,8 @@ function renderHistoryTable(
49
59
  `╟${'─'.repeat(width - 2)}╢`,
50
60
  ];
51
61
 
62
+ let totalCost = 0;
63
+
52
64
  for (const run of runs) {
53
65
  const rateColor =
54
66
  run.successRate >= 0.9 ? chalk.green : run.successRate >= 0.7 ? chalk.yellow : chalk.red;
@@ -70,7 +82,25 @@ function renderHistoryTable(
70
82
  const dateStr = `${dateObj.toLocaleDateString()} ${dateObj.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}`;
71
83
  const datePad = padText(dateStr, dateWidth, 'right');
72
84
 
73
- lines.push(`║ ${runIdPad} ${scenarioPad} ${rateColored} ${datePad} ║`);
85
+ if (showCost) {
86
+ const costValue = run.estimatedCostUsd !== undefined ? formatCost(run.estimatedCostUsd) : '-';
87
+ const costPad = padText(costValue, costWidth, 'right');
88
+ if (run.estimatedCostUsd !== undefined) {
89
+ totalCost += run.estimatedCostUsd;
90
+ }
91
+ lines.push(`║ ${runIdPad} ${scenarioPad} ${rateColored} ${chalk.dim(costPad)} ${datePad} ║`);
92
+ } else {
93
+ lines.push(`║ ${runIdPad} ${scenarioPad} ${rateColored} ${datePad} ║`);
94
+ }
95
+ }
96
+
97
+ // Add total cost row if showing costs
98
+ if (showCost) {
99
+ lines.push(`╟${'─'.repeat(width - 2)}╢`);
100
+ const totalLabel = padText('Total', runIdWidth + 1 + scenarioWidth + 1 + rateWidth, 'right');
101
+ const totalCostStr = padText(formatCost(totalCost), costWidth, 'right');
102
+ const emptyDate = padText('', dateWidth, 'right');
103
+ lines.push(`║ ${totalLabel} ${chalk.bold(totalCostStr)} ${emptyDate} ║`);
74
104
  }
75
105
 
76
106
  lines.push(`╚${border}╝`);
@@ -84,14 +114,31 @@ function renderPlainHistory(
84
114
  scenario: string;
85
115
  successRate: number;
86
116
  createdAt: string;
87
- }>
117
+ estimatedCostUsd?: number;
118
+ }>,
119
+ showCost = false
88
120
  ): string {
89
121
  const lines = ['=== RUN HISTORY ===', ''];
90
122
 
123
+ let totalCost = 0;
124
+
91
125
  for (const run of runs) {
92
126
  const rate = `${(run.successRate * 100).toFixed(1)}%`;
93
127
  const date = new Date(run.createdAt).toLocaleString();
94
- lines.push(`${run.runId} ${run.scenario} ${rate} ${date}`);
128
+ if (showCost) {
129
+ const cost = run.estimatedCostUsd !== undefined ? formatCost(run.estimatedCostUsd) : '-';
130
+ if (run.estimatedCostUsd !== undefined) {
131
+ totalCost += run.estimatedCostUsd;
132
+ }
133
+ lines.push(`${run.runId} ${run.scenario} ${rate} ${cost} ${date}`);
134
+ } else {
135
+ lines.push(`${run.runId} ${run.scenario} ${rate} ${date}`);
136
+ }
137
+ }
138
+
139
+ if (showCost) {
140
+ lines.push('');
141
+ lines.push(`Total: ${formatCost(totalCost)}`);
95
142
  }
96
143
 
97
144
  return lines.join('\n');
@@ -106,6 +153,7 @@ export function historyCommand(): Command {
106
153
  .option('-s, --scenario <scenario>', 'Filter by scenario')
107
154
  .option('-l, --limit <number>', 'Limit number of results', '20')
108
155
  .option('--config <path>', 'Path to config file')
156
+ .option('--show-cost', 'Show cost column and total')
109
157
  .action(async (options: HistoryOptions) => {
110
158
  const spinner = createSpinner('Loading history...');
111
159
  spinner.start();
@@ -119,6 +167,7 @@ export function historyCommand(): Command {
119
167
  project: options.project,
120
168
  scenario: options.scenario,
121
169
  limit,
170
+ includeCost: options.showCost,
122
171
  });
123
172
 
124
173
  spinner.succeed('Loaded history');
@@ -140,9 +189,9 @@ export function historyCommand(): Command {
140
189
 
141
190
  // Show history table
142
191
  if (isTTY) {
143
- console.log(renderHistoryTable(runs));
192
+ console.log(renderHistoryTable(runs, options.showCost));
144
193
  } else {
145
- console.log(renderPlainHistory(runs));
194
+ console.log(renderPlainHistory(runs, options.showCost));
146
195
  }
147
196
 
148
197
  console.log();
@@ -32,7 +32,11 @@ import {
32
32
  UnsafeResponseDetector,
33
33
  loadCustomAttacks,
34
34
  } from '@artemiskit/redteam';
35
- import { generateJSONReport, generateRedTeamHTMLReport } from '@artemiskit/reports';
35
+ import {
36
+ generateJSONReport,
37
+ generateRedTeamHTMLReport,
38
+ generateRedTeamMarkdownReport,
39
+ } from '@artemiskit/reports';
36
40
  import chalk from 'chalk';
37
41
  import { Command } from 'commander';
38
42
  import { nanoid } from 'nanoid';
@@ -66,6 +70,8 @@ interface RedteamOptions {
66
70
  config?: string;
67
71
  redact?: boolean;
68
72
  redactPatterns?: string[];
73
+ export?: 'markdown';
74
+ exportOutput?: string;
69
75
  }
70
76
 
71
77
  export function redteamCommand(): Command {
@@ -91,6 +97,8 @@ export function redteamCommand(): Command {
91
97
  '--redact-patterns <patterns...>',
92
98
  'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
93
99
  )
100
+ .option('--export <format>', 'Export results to format (markdown)')
101
+ .option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
94
102
  .action(async (scenarioPath: string, options: RedteamOptions) => {
95
103
  const spinner = createSpinner('Loading configuration...');
96
104
  spinner.start();
@@ -495,6 +503,16 @@ export function redteamCommand(): Command {
495
503
  console.log(chalk.dim(` JSON: ${jsonPath}`));
496
504
  }
497
505
 
506
+ // Export to markdown if requested
507
+ if (options.export === 'markdown') {
508
+ const exportDir = options.exportOutput || './artemis-exports';
509
+ await mkdir(exportDir, { recursive: true });
510
+ const markdown = generateRedTeamMarkdownReport(manifest);
511
+ const mdPath = join(exportDir, `${runId}.md`);
512
+ await writeFile(mdPath, markdown);
513
+ console.log(chalk.dim(`Exported: ${mdPath}`));
514
+ }
515
+
498
516
  // Exit with error if there were unsafe responses
499
517
  if (metrics.unsafe_responses > 0) {
500
518
  process.exit(1);
@@ -2,16 +2,20 @@
2
2
  * Run command - Execute test scenarios
3
3
  */
4
4
 
5
+ import { mkdir, writeFile } from 'node:fs/promises';
5
6
  import { basename } from 'node:path';
7
+ import { join } from 'node:path';
6
8
  import {
7
9
  type BaselineStorageAdapter,
8
10
  type RedactionConfig,
9
11
  type RunManifest,
10
12
  createAdapter,
13
+ formatCost,
11
14
  parseScenarioFile,
12
15
  resolveScenarioPaths,
13
16
  runScenario,
14
17
  } from '@artemiskit/core';
18
+ import { generateMarkdownReport } from '@artemiskit/reports';
15
19
  import chalk from 'chalk';
16
20
  import { Command } from 'commander';
17
21
  import { loadConfig } from '../config/loader.js';
@@ -62,6 +66,12 @@ interface RunOptions {
62
66
  baseline?: boolean;
63
67
  /** Regression threshold (0-1), default 0.05 (5%) */
64
68
  threshold?: number;
69
+ /** Budget limit in USD - fail if cost exceeds this */
70
+ budget?: number;
71
+ /** Export format: markdown */
72
+ export?: 'markdown';
73
+ /** Output directory for exports */
74
+ exportOutput?: string;
65
75
  }
66
76
 
67
77
  interface ScenarioRunResult {
@@ -103,6 +113,15 @@ interface CISummary {
103
113
  totalMs: number;
104
114
  formatted: string;
105
115
  };
116
+ tokens: {
117
+ prompt: number;
118
+ completion: number;
119
+ total: number;
120
+ };
121
+ cost: {
122
+ estimatedUsd: number;
123
+ formatted: string;
124
+ };
106
125
  runs: Array<{
107
126
  runId: string;
108
127
  scenario: string;
@@ -112,6 +131,7 @@ interface CISummary {
112
131
  failedCases: number;
113
132
  totalCases: number;
114
133
  durationMs: number;
134
+ estimatedCostUsd?: number;
115
135
  }>;
116
136
  baseline?: {
117
137
  compared: boolean;
@@ -123,6 +143,11 @@ interface CISummary {
123
143
  tokens: number;
124
144
  };
125
145
  };
146
+ budget?: {
147
+ limit: number;
148
+ exceeded: boolean;
149
+ overBy: number;
150
+ };
126
151
  }
127
152
 
128
153
  /**
@@ -167,6 +192,21 @@ function buildCISummary(results: ScenarioRunResult[]): CISummary {
167
192
  const failedCases = results.reduce((sum, r) => sum + (r.manifest.metrics?.failed_cases || 0), 0);
168
193
  const totalDuration = results.reduce((sum, r) => sum + (r.manifest.duration_ms || 0), 0);
169
194
 
195
+ // Aggregate token and cost metrics
196
+ const totalPromptTokens = results.reduce(
197
+ (sum, r) => sum + (r.manifest.metrics?.total_prompt_tokens || 0),
198
+ 0
199
+ );
200
+ const totalCompletionTokens = results.reduce(
201
+ (sum, r) => sum + (r.manifest.metrics?.total_completion_tokens || 0),
202
+ 0
203
+ );
204
+ const totalTokens = results.reduce((sum, r) => sum + (r.manifest.metrics?.total_tokens || 0), 0);
205
+ const totalCostUsd = results.reduce(
206
+ (sum, r) => sum + (r.manifest.metrics?.cost?.total_usd || 0),
207
+ 0
208
+ );
209
+
170
210
  return {
171
211
  success: failedScenarios === 0,
172
212
  scenarios: {
@@ -184,6 +224,15 @@ function buildCISummary(results: ScenarioRunResult[]): CISummary {
184
224
  totalMs: totalDuration,
185
225
  formatted: formatDuration(totalDuration),
186
226
  },
227
+ tokens: {
228
+ prompt: totalPromptTokens,
229
+ completion: totalCompletionTokens,
230
+ total: totalTokens,
231
+ },
232
+ cost: {
233
+ estimatedUsd: totalCostUsd,
234
+ formatted: formatCost(totalCostUsd),
235
+ },
187
236
  runs: results.map((r) => ({
188
237
  runId: r.manifest.run_id || '',
189
238
  scenario: r.scenarioName,
@@ -193,6 +242,7 @@ function buildCISummary(results: ScenarioRunResult[]): CISummary {
193
242
  failedCases: r.manifest.metrics?.failed_cases || 0,
194
243
  totalCases: r.manifest.metrics?.total_cases || 0,
195
244
  durationMs: r.manifest.duration_ms || 0,
245
+ estimatedCostUsd: r.manifest.metrics?.cost?.total_usd,
196
246
  })),
197
247
  };
198
248
  }
@@ -556,6 +606,9 @@ export function runCommand(): Command {
556
606
  )
557
607
  .option('--baseline', 'Compare against baseline and detect regression')
558
608
  .option('--threshold <number>', 'Regression threshold (0-1), e.g., 0.05 for 5%', '0.05')
609
+ .option('--budget <amount>', 'Maximum budget in USD - fail if estimated cost exceeds this')
610
+ .option('--export <format>', 'Export format: markdown')
611
+ .option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
559
612
  .action(async (scenarioPath: string | undefined, options: RunOptions) => {
560
613
  // Determine CI mode: explicit flag, environment variable, or summary format that implies CI
561
614
  const isCIMode =
@@ -741,9 +794,12 @@ export function runCommand(): Command {
741
794
 
742
795
  // Show additional metrics
743
796
  console.log();
797
+ const costInfo = result.manifest.metrics.cost
798
+ ? ` | Est. Cost: ${formatCost(result.manifest.metrics.cost.total_usd)}`
799
+ : '';
744
800
  console.log(
745
801
  chalk.dim(
746
- `Run ID: ${result.manifest.run_id} | Median Latency: ${result.manifest.metrics.median_latency_ms}ms | Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}`
802
+ `Run ID: ${result.manifest.run_id} | Median Latency: ${result.manifest.metrics.median_latency_ms}ms | Tokens: ${result.manifest.metrics.total_tokens.toLocaleString()}${costInfo}`
747
803
  )
748
804
  );
749
805
 
@@ -762,6 +818,16 @@ export function runCommand(): Command {
762
818
  const savedPath = await storage.save(result.manifest);
763
819
  console.log(chalk.dim(`Saved: ${savedPath}`));
764
820
  }
821
+
822
+ // Export to markdown if requested
823
+ if (options.export === 'markdown') {
824
+ const exportDir = options.exportOutput || './artemis-exports';
825
+ await mkdir(exportDir, { recursive: true });
826
+ const markdown = generateMarkdownReport(result.manifest);
827
+ const mdPath = join(exportDir, `${result.manifest.run_id}.md`);
828
+ await writeFile(mdPath, markdown);
829
+ console.log(chalk.dim(`Exported: ${mdPath}`));
830
+ }
765
831
  } catch (error) {
766
832
  // Record failed scenario
767
833
  console.log();
@@ -860,6 +926,8 @@ export function runCommand(): Command {
860
926
  console.log(`ARTEMISKIT_CASES_FAILED=${failedCases}`);
861
927
  console.log(`ARTEMISKIT_SUCCESS_RATE=${successRate}`);
862
928
  console.log(`ARTEMISKIT_DURATION_MS=${ciSummary.duration.totalMs}`);
929
+ console.log(`ARTEMISKIT_TOKENS_TOTAL=${ciSummary.tokens.total}`);
930
+ console.log(`ARTEMISKIT_COST_USD=${ciSummary.cost.estimatedUsd.toFixed(4)}`);
863
931
 
864
932
  if (baselineResult) {
865
933
  console.log('ARTEMISKIT_BASELINE_COMPARED=true');
@@ -945,11 +1013,53 @@ export function runCommand(): Command {
945
1013
  }
946
1014
  }
947
1015
 
948
- // Exit with error if any scenarios failed or regression detected
1016
+ // Check budget if specified
1017
+ let budgetExceeded = false;
1018
+ if (options.budget !== undefined) {
1019
+ const budgetLimit = Number.parseFloat(String(options.budget));
1020
+ const totalCost = ciSummary.cost.estimatedUsd;
1021
+
1022
+ if (totalCost > budgetLimit) {
1023
+ budgetExceeded = true;
1024
+ const overBy = totalCost - budgetLimit;
1025
+
1026
+ // Add budget info to CI summary
1027
+ ciSummary.budget = {
1028
+ limit: budgetLimit,
1029
+ exceeded: true,
1030
+ overBy,
1031
+ };
1032
+
1033
+ if (isCIMode) {
1034
+ if (options.summary === 'json') {
1035
+ // Budget info already in ciSummary, will be output above
1036
+ } else {
1037
+ console.log(`ARTEMISKIT_BUDGET_LIMIT=${budgetLimit.toFixed(2)}`);
1038
+ console.log('ARTEMISKIT_BUDGET_EXCEEDED=true');
1039
+ console.log(`ARTEMISKIT_BUDGET_OVER_BY=${overBy.toFixed(4)}`);
1040
+ }
1041
+ } else {
1042
+ console.log();
1043
+ console.log(chalk.red(`${icons.failed} BUDGET EXCEEDED`));
1044
+ console.log(
1045
+ chalk.red(
1046
+ ` Budget: $${budgetLimit.toFixed(2)} | Actual: ${formatCost(totalCost)} | Over by: ${formatCost(overBy)}`
1047
+ )
1048
+ );
1049
+ console.log();
1050
+ }
1051
+ } else if (!isCIMode) {
1052
+ console.log(
1053
+ `${icons.passed} ${chalk.green('Within budget')} ${chalk.dim(`($${budgetLimit.toFixed(2)} limit, ${formatCost(totalCost)} used)`)}`
1054
+ );
1055
+ }
1056
+ }
1057
+
1058
+ // Exit with error if any scenarios failed, regression detected, or budget exceeded
949
1059
  const hasFailures = results.some((r) => !r.success);
950
1060
  const hasRegression = baselineResult?.hasRegression || false;
951
1061
 
952
- if (hasFailures || hasRegression) {
1062
+ if (hasFailures || hasRegression || budgetExceeded) {
953
1063
  process.exit(1);
954
1064
  }
955
1065
  } catch (error) {
@@ -13,6 +13,7 @@ import {
13
13
  type StressRequestResult,
14
14
  createAdapter,
15
15
  estimateCost,
16
+ formatCost,
16
17
  getGitInfo,
17
18
  getModelPricing,
18
19
  parseScenarioFile,
@@ -26,6 +27,7 @@ import {
26
27
  colors,
27
28
  createSpinner,
28
29
  getProviderErrorContext,
30
+ icons,
29
31
  isTTY,
30
32
  renderError,
31
33
  renderInfoBox,
@@ -52,6 +54,8 @@ interface StressOptions {
52
54
  config?: string;
53
55
  redact?: boolean;
54
56
  redactPatterns?: string[];
57
+ /** Budget limit in USD - fail if cost exceeds this */
58
+ budget?: number;
55
59
  }
56
60
 
57
61
  export function stressCommand(): Command {
@@ -75,6 +79,7 @@ export function stressCommand(): Command {
75
79
  '--redact-patterns <patterns...>',
76
80
  'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
77
81
  )
82
+ .option('--budget <amount>', 'Maximum budget in USD - fail if estimated cost exceeds this')
78
83
  .action(async (scenarioPath: string, options: StressOptions) => {
79
84
  const spinner = createSpinner('Loading configuration...');
80
85
  spinner.start();
@@ -319,6 +324,29 @@ export function stressCommand(): Command {
319
324
  console.log(chalk.dim(` HTML: ${htmlPath}`));
320
325
  console.log(chalk.dim(` JSON: ${jsonPath}`));
321
326
  }
327
+
328
+ // Check budget if specified
329
+ if (options.budget !== undefined && metrics.cost) {
330
+ const budgetLimit = Number.parseFloat(String(options.budget));
331
+ const totalCost = metrics.cost.estimated_total_usd;
332
+
333
+ if (totalCost > budgetLimit) {
334
+ const overBy = totalCost - budgetLimit;
335
+ console.log();
336
+ console.log(chalk.red(`${icons.failed} BUDGET EXCEEDED`));
337
+ console.log(
338
+ chalk.red(
339
+ ` Budget: $${budgetLimit.toFixed(2)} | Actual: ${formatCost(totalCost)} | Over by: ${formatCost(overBy)}`
340
+ )
341
+ );
342
+ process.exit(1);
343
+ } else {
344
+ console.log();
345
+ console.log(
346
+ `${icons.passed} ${chalk.green('Within budget')} ${chalk.dim(`($${budgetLimit.toFixed(2)} limit, ${formatCost(totalCost)} used)`)}`
347
+ );
348
+ }
349
+ }
322
350
  } catch (error) {
323
351
  spinner.fail('Error');
324
352