@artemiskit/cli 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +1 -0
- package/dist/index.js +19129 -20009
- package/dist/src/commands/compare.d.ts.map +1 -1
- package/dist/src/commands/history.d.ts.map +1 -1
- package/dist/src/commands/init.d.ts.map +1 -1
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/report.d.ts.map +1 -1
- package/dist/src/commands/run.d.ts.map +1 -1
- package/dist/src/commands/stress.d.ts.map +1 -1
- package/dist/src/ui/colors.d.ts +44 -0
- package/dist/src/ui/colors.d.ts.map +1 -0
- package/dist/src/ui/errors.d.ts +39 -0
- package/dist/src/ui/errors.d.ts.map +1 -0
- package/dist/src/ui/index.d.ts +16 -0
- package/dist/src/ui/index.d.ts.map +1 -0
- package/dist/src/ui/live-status.d.ts +82 -0
- package/dist/src/ui/live-status.d.ts.map +1 -0
- package/dist/src/ui/panels.d.ts +49 -0
- package/dist/src/ui/panels.d.ts.map +1 -0
- package/dist/src/ui/progress.d.ts +60 -0
- package/dist/src/ui/progress.d.ts.map +1 -0
- package/dist/src/ui/utils.d.ts +42 -0
- package/dist/src/ui/utils.d.ts.map +1 -0
- package/package.json +6 -6
- package/src/__tests__/helpers/index.ts +6 -0
- package/src/__tests__/helpers/mock-adapter.ts +90 -0
- package/src/__tests__/helpers/test-utils.ts +205 -0
- package/src/__tests__/integration/compare-command.test.ts +236 -0
- package/src/__tests__/integration/config.test.ts +125 -0
- package/src/__tests__/integration/history-command.test.ts +251 -0
- package/src/__tests__/integration/init-command.test.ts +177 -0
- package/src/__tests__/integration/report-command.test.ts +245 -0
- package/src/__tests__/integration/ui.test.ts +230 -0
- package/src/commands/compare.ts +158 -49
- package/src/commands/history.ts +131 -30
- package/src/commands/init.ts +181 -21
- package/src/commands/redteam.ts +118 -75
- package/src/commands/report.ts +29 -14
- package/src/commands/run.ts +86 -66
- package/src/commands/stress.ts +61 -63
- package/src/ui/colors.ts +62 -0
- package/src/ui/errors.ts +248 -0
- package/src/ui/index.ts +42 -0
- package/src/ui/live-status.ts +259 -0
- package/src/ui/panels.ts +216 -0
- package/src/ui/progress.ts +139 -0
- package/src/ui/utils.ts +88 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for CLI UI components
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, expect, it } from 'bun:test';
|
|
6
|
+
import {
|
|
7
|
+
renderProgressBar,
|
|
8
|
+
renderSummaryPanel,
|
|
9
|
+
renderError,
|
|
10
|
+
renderInfoBox,
|
|
11
|
+
createSpinner,
|
|
12
|
+
icons,
|
|
13
|
+
colors,
|
|
14
|
+
padText,
|
|
15
|
+
formatDuration,
|
|
16
|
+
} from '../../ui/index.js';
|
|
17
|
+
|
|
18
|
+
describe('UI Components', () => {
|
|
19
|
+
describe('renderProgressBar', () => {
|
|
20
|
+
it('should render empty progress bar at 0%', () => {
|
|
21
|
+
const bar = renderProgressBar(0, 10);
|
|
22
|
+
expect(bar).toContain('0/10');
|
|
23
|
+
expect(bar).toContain('0%');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('should render full progress bar at 100%', () => {
|
|
27
|
+
const bar = renderProgressBar(10, 10);
|
|
28
|
+
expect(bar).toContain('10/10');
|
|
29
|
+
expect(bar).toContain('100%');
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('should render partial progress bar', () => {
|
|
33
|
+
const bar = renderProgressBar(5, 10);
|
|
34
|
+
expect(bar).toContain('5/10');
|
|
35
|
+
expect(bar).toContain('50%');
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('should respect custom width option', () => {
|
|
39
|
+
const bar = renderProgressBar(5, 10, { width: 30 });
|
|
40
|
+
expect(bar).toContain('5/10');
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('should handle edge case of 0 total', () => {
|
|
44
|
+
const bar = renderProgressBar(0, 0);
|
|
45
|
+
expect(bar).toBeDefined();
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
describe('renderSummaryPanel', () => {
|
|
50
|
+
it('should render all metrics', () => {
|
|
51
|
+
const panel = renderSummaryPanel({
|
|
52
|
+
passed: 8,
|
|
53
|
+
failed: 2,
|
|
54
|
+
skipped: 0,
|
|
55
|
+
successRate: 80,
|
|
56
|
+
duration: 12500,
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
expect(panel).toContain('8');
|
|
60
|
+
expect(panel).toContain('2');
|
|
61
|
+
expect(panel).toContain('80');
|
|
62
|
+
expect(panel).toContain('TEST RESULTS');
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('should use box drawing characters', () => {
|
|
66
|
+
const panel = renderSummaryPanel({
|
|
67
|
+
passed: 5,
|
|
68
|
+
failed: 0,
|
|
69
|
+
skipped: 1,
|
|
70
|
+
successRate: 100,
|
|
71
|
+
duration: 5000,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
expect(panel).toContain('╔');
|
|
75
|
+
expect(panel).toContain('╗');
|
|
76
|
+
expect(panel).toContain('╚');
|
|
77
|
+
expect(panel).toContain('╝');
|
|
78
|
+
expect(panel).toContain('║');
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should support custom title', () => {
|
|
82
|
+
const panel = renderSummaryPanel({
|
|
83
|
+
passed: 5,
|
|
84
|
+
failed: 0,
|
|
85
|
+
skipped: 0,
|
|
86
|
+
successRate: 100,
|
|
87
|
+
duration: 1000,
|
|
88
|
+
title: 'CUSTOM TITLE',
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
expect(panel).toContain('CUSTOM TITLE');
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
describe('renderError', () => {
|
|
96
|
+
it('should render error title and reason', () => {
|
|
97
|
+
const error = renderError({
|
|
98
|
+
title: 'Connection Failed',
|
|
99
|
+
reason: 'Network timeout',
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
expect(error).toContain('Connection Failed');
|
|
103
|
+
expect(error).toContain('Network timeout');
|
|
104
|
+
expect(error).toContain('ERROR');
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it('should render suggestions when provided', () => {
|
|
108
|
+
const error = renderError({
|
|
109
|
+
title: 'Auth Failed',
|
|
110
|
+
reason: 'Invalid API key',
|
|
111
|
+
suggestions: ['Check your API key', 'Verify environment variables'],
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
expect(error).toContain('Check your API key');
|
|
115
|
+
expect(error).toContain('Verify environment variables');
|
|
116
|
+
expect(error).toContain('Suggestions');
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('should use box drawing characters', () => {
|
|
120
|
+
const error = renderError({
|
|
121
|
+
title: 'Test Error',
|
|
122
|
+
reason: 'Test reason',
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
expect(error).toContain('┌');
|
|
126
|
+
expect(error).toContain('┐');
|
|
127
|
+
expect(error).toContain('└');
|
|
128
|
+
expect(error).toContain('┘');
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
describe('renderInfoBox', () => {
|
|
133
|
+
it('should render title and content', () => {
|
|
134
|
+
const box = renderInfoBox('Info Title', ['Line 1', 'Line 2']);
|
|
135
|
+
|
|
136
|
+
expect(box).toContain('Info Title');
|
|
137
|
+
expect(box).toContain('Line 1');
|
|
138
|
+
expect(box).toContain('Line 2');
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it('should use box drawing characters', () => {
|
|
142
|
+
const box = renderInfoBox('Test', ['content']);
|
|
143
|
+
|
|
144
|
+
// Uses standard box drawing (may be rounded or square depending on implementation)
|
|
145
|
+
expect(box).toContain('┌');
|
|
146
|
+
expect(box).toContain('┐');
|
|
147
|
+
expect(box).toContain('└');
|
|
148
|
+
expect(box).toContain('┘');
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
describe('createSpinner', () => {
|
|
153
|
+
it('should create a spinner with text', () => {
|
|
154
|
+
const spinner = createSpinner('Loading...');
|
|
155
|
+
expect(spinner).toBeDefined();
|
|
156
|
+
expect(typeof spinner.start).toBe('function');
|
|
157
|
+
expect(typeof spinner.succeed).toBe('function');
|
|
158
|
+
expect(typeof spinner.fail).toBe('function');
|
|
159
|
+
expect(typeof spinner.info).toBe('function');
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
describe('icons', () => {
|
|
164
|
+
it('should have all required icons', () => {
|
|
165
|
+
expect(icons.passed).toBeDefined();
|
|
166
|
+
expect(icons.failed).toBeDefined();
|
|
167
|
+
expect(icons.skipped).toBeDefined();
|
|
168
|
+
expect(icons.running).toBeDefined();
|
|
169
|
+
expect(icons.info).toBeDefined();
|
|
170
|
+
expect(icons.warning).toBeDefined();
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
describe('colors', () => {
|
|
175
|
+
it('should have all required color functions', () => {
|
|
176
|
+
expect(typeof colors.success).toBe('function');
|
|
177
|
+
expect(typeof colors.error).toBe('function');
|
|
178
|
+
expect(typeof colors.warning).toBe('function');
|
|
179
|
+
expect(typeof colors.info).toBe('function');
|
|
180
|
+
expect(typeof colors.muted).toBe('function');
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it('should apply colors to text', () => {
|
|
184
|
+
const colored = colors.success('test');
|
|
185
|
+
expect(colored).toContain('test');
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
describe('padText', () => {
|
|
190
|
+
it('should pad text to specified width', () => {
|
|
191
|
+
const padded = padText('hello', 10);
|
|
192
|
+
expect(padded.length).toBe(10);
|
|
193
|
+
expect(padded).toBe('hello ');
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('should right-align text', () => {
|
|
197
|
+
const padded = padText('hello', 10, 'right');
|
|
198
|
+
expect(padded.length).toBe(10);
|
|
199
|
+
expect(padded).toBe(' hello');
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it('should center text', () => {
|
|
203
|
+
const padded = padText('hi', 10, 'center');
|
|
204
|
+
expect(padded.length).toBe(10);
|
|
205
|
+
expect(padded).toContain('hi');
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('should not truncate text longer than width by default', () => {
|
|
209
|
+
// padText does not truncate by default, it just pads shorter text
|
|
210
|
+
const padded = padText('hello world', 5);
|
|
211
|
+
expect(padded).toBe('hello world'); // text unchanged if longer than width
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
describe('formatDuration', () => {
|
|
216
|
+
it('should format milliseconds', () => {
|
|
217
|
+
expect(formatDuration(500)).toBe('500ms');
|
|
218
|
+
expect(formatDuration(50)).toBe('50ms');
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
it('should format seconds', () => {
|
|
222
|
+
expect(formatDuration(1500)).toBe('1.5s');
|
|
223
|
+
expect(formatDuration(2000)).toBe('2.0s');
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
it('should handle zero', () => {
|
|
227
|
+
expect(formatDuration(0)).toBe('0ms');
|
|
228
|
+
});
|
|
229
|
+
});
|
|
230
|
+
});
|
package/src/commands/compare.ts
CHANGED
|
@@ -3,16 +3,129 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import chalk from 'chalk';
|
|
6
|
-
import Table from 'cli-table3';
|
|
7
6
|
import { Command } from 'commander';
|
|
8
|
-
import { loadConfig } from '../config/loader';
|
|
9
|
-
import {
|
|
7
|
+
import { loadConfig } from '../config/loader.js';
|
|
8
|
+
import { createSpinner, renderError, icons, isTTY, padText } from '../ui/index.js';
|
|
9
|
+
import { createStorage } from '../utils/storage.js';
|
|
10
10
|
|
|
11
11
|
interface CompareOptions {
|
|
12
12
|
threshold?: number;
|
|
13
13
|
config?: string;
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
function renderComparisonPanel(
|
|
17
|
+
baseline: { metrics: { success_rate: number; median_latency_ms: number; total_tokens: number } },
|
|
18
|
+
current: { metrics: { success_rate: number; median_latency_ms: number; total_tokens: number } },
|
|
19
|
+
delta: { successRate: number; latency: number; tokens: number }
|
|
20
|
+
): string {
|
|
21
|
+
// Column widths
|
|
22
|
+
const labelWidth = 18;
|
|
23
|
+
const baseWidth = 12;
|
|
24
|
+
const currWidth = 12;
|
|
25
|
+
const deltaWidth = 14;
|
|
26
|
+
|
|
27
|
+
// Calculate total width: ║ + space + columns + space + ║
|
|
28
|
+
// innerContent = labelWidth + baseWidth + currWidth + deltaWidth = 56
|
|
29
|
+
// total = 2 (borders) + 2 (padding) + 56 = 60
|
|
30
|
+
const innerWidth = labelWidth + baseWidth + currWidth + deltaWidth;
|
|
31
|
+
const width = innerWidth + 4; // +4 for "║ " and " ║"
|
|
32
|
+
const border = '═'.repeat(width - 2);
|
|
33
|
+
|
|
34
|
+
const formatDeltaValue = (value: number, suffix = '') => {
|
|
35
|
+
const sign = value > 0 ? '+' : '';
|
|
36
|
+
return `${sign}${value.toFixed(2)}${suffix}`;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const colorDelta = (value: number, inverse: boolean, formatted: string) => {
|
|
40
|
+
const improved = inverse ? value < 0 : value > 0;
|
|
41
|
+
if (improved) return chalk.green(formatted);
|
|
42
|
+
if (value === 0) return chalk.dim(formatted);
|
|
43
|
+
return chalk.red(formatted);
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const formatRow = (label: string, baseVal: string, currVal: string, deltaContent: string) => {
|
|
47
|
+
const labelPad = padText(label, labelWidth);
|
|
48
|
+
const basePad = padText(baseVal, baseWidth, 'right');
|
|
49
|
+
const currPad = padText(currVal, currWidth, 'right');
|
|
50
|
+
const deltaPad = padText(deltaContent, deltaWidth, 'right');
|
|
51
|
+
return `║ ${labelPad}${basePad}${currPad}${deltaPad} ║`;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
const formatDataRow = (
|
|
55
|
+
label: string,
|
|
56
|
+
baseVal: string,
|
|
57
|
+
currVal: string,
|
|
58
|
+
deltaVal: number,
|
|
59
|
+
inverse: boolean,
|
|
60
|
+
suffix: string
|
|
61
|
+
) => {
|
|
62
|
+
const labelPad = padText(label, labelWidth);
|
|
63
|
+
const basePad = padText(baseVal, baseWidth, 'right');
|
|
64
|
+
const currPad = padText(currVal, currWidth, 'right');
|
|
65
|
+
// Pad first, then apply color (color codes don't affect visual width)
|
|
66
|
+
const deltaStr = formatDeltaValue(deltaVal, suffix);
|
|
67
|
+
const deltaPad = padText(deltaStr, deltaWidth, 'right');
|
|
68
|
+
const deltaColored = colorDelta(deltaVal, inverse, deltaPad);
|
|
69
|
+
return `║ ${labelPad}${basePad}${currPad}${deltaColored} ║`;
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
const lines = [
|
|
73
|
+
`╔${border}╗`,
|
|
74
|
+
`║${padText('COMPARISON RESULTS', width - 2, 'center')}║`,
|
|
75
|
+
`╠${border}╣`,
|
|
76
|
+
formatRow('Metric', 'Baseline', 'Current', 'Delta'),
|
|
77
|
+
`╟${'─'.repeat(width - 2)}╢`,
|
|
78
|
+
formatDataRow(
|
|
79
|
+
'Success Rate',
|
|
80
|
+
`${(baseline.metrics.success_rate * 100).toFixed(1)}%`,
|
|
81
|
+
`${(current.metrics.success_rate * 100).toFixed(1)}%`,
|
|
82
|
+
delta.successRate * 100,
|
|
83
|
+
false,
|
|
84
|
+
'%'
|
|
85
|
+
),
|
|
86
|
+
formatDataRow(
|
|
87
|
+
'Median Latency',
|
|
88
|
+
`${baseline.metrics.median_latency_ms}ms`,
|
|
89
|
+
`${current.metrics.median_latency_ms}ms`,
|
|
90
|
+
delta.latency,
|
|
91
|
+
true,
|
|
92
|
+
'ms'
|
|
93
|
+
),
|
|
94
|
+
formatDataRow(
|
|
95
|
+
'Total Tokens',
|
|
96
|
+
baseline.metrics.total_tokens.toLocaleString(),
|
|
97
|
+
current.metrics.total_tokens.toLocaleString(),
|
|
98
|
+
delta.tokens,
|
|
99
|
+
true,
|
|
100
|
+
''
|
|
101
|
+
),
|
|
102
|
+
`╚${border}╝`,
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
return lines.join('\n');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function renderPlainComparison(
|
|
109
|
+
baseline: { metrics: { success_rate: number; median_latency_ms: number; total_tokens: number } },
|
|
110
|
+
current: { metrics: { success_rate: number; median_latency_ms: number; total_tokens: number } },
|
|
111
|
+
delta: { successRate: number; latency: number; tokens: number }
|
|
112
|
+
): string {
|
|
113
|
+
const formatDelta = (value: number, _inverse = false, suffix = '') => {
|
|
114
|
+
const sign = value > 0 ? '+' : '';
|
|
115
|
+
return `${sign}${value.toFixed(2)}${suffix}`;
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
const lines = [
|
|
119
|
+
'=== COMPARISON RESULTS ===',
|
|
120
|
+
'',
|
|
121
|
+
`Success Rate: ${(baseline.metrics.success_rate * 100).toFixed(1)}% -> ${(current.metrics.success_rate * 100).toFixed(1)}% (${formatDelta(delta.successRate * 100, false, '%')})`,
|
|
122
|
+
`Median Latency: ${baseline.metrics.median_latency_ms}ms -> ${current.metrics.median_latency_ms}ms (${formatDelta(delta.latency, true, 'ms')})`,
|
|
123
|
+
`Total Tokens: ${baseline.metrics.total_tokens} -> ${current.metrics.total_tokens} (${formatDelta(delta.tokens, true)})`,
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
return lines.join('\n');
|
|
127
|
+
}
|
|
128
|
+
|
|
16
129
|
export function compareCommand(): Command {
|
|
17
130
|
const cmd = new Command('compare');
|
|
18
131
|
|
|
@@ -23,61 +136,45 @@ export function compareCommand(): Command {
|
|
|
23
136
|
.option('--threshold <number>', 'Regression threshold (0-1)', '0.05')
|
|
24
137
|
.option('--config <path>', 'Path to config file')
|
|
25
138
|
.action(async (baselineId: string, currentId: string, options: CompareOptions) => {
|
|
139
|
+
const spinner = createSpinner('Loading runs...');
|
|
140
|
+
spinner.start();
|
|
141
|
+
|
|
26
142
|
try {
|
|
27
143
|
const config = await loadConfig(options.config);
|
|
28
144
|
const storage = createStorage({ fileConfig: config });
|
|
29
145
|
|
|
30
146
|
if (!storage.compare) {
|
|
31
|
-
|
|
147
|
+
spinner.fail('Error');
|
|
148
|
+
console.log();
|
|
149
|
+
console.log(
|
|
150
|
+
renderError({
|
|
151
|
+
title: 'Comparison Not Supported',
|
|
152
|
+
reason: 'Storage adapter does not support comparison',
|
|
153
|
+
suggestions: [
|
|
154
|
+
'Use local storage which supports comparison',
|
|
155
|
+
'Check your storage configuration',
|
|
156
|
+
],
|
|
157
|
+
})
|
|
158
|
+
);
|
|
32
159
|
process.exit(1);
|
|
33
160
|
}
|
|
34
161
|
|
|
35
|
-
|
|
162
|
+
spinner.succeed('Loaded runs');
|
|
36
163
|
console.log();
|
|
37
164
|
|
|
38
165
|
const comparison = await storage.compare(baselineId, currentId);
|
|
39
166
|
const { baseline, current, delta } = comparison;
|
|
40
167
|
|
|
41
|
-
//
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
chalk.bold('Delta'),
|
|
48
|
-
],
|
|
49
|
-
style: { head: [], border: [] },
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
const formatDelta = (value: number, inverse = false) => {
|
|
53
|
-
const improved = inverse ? value < 0 : value > 0;
|
|
54
|
-
const color = improved ? chalk.green : value === 0 ? chalk.dim : chalk.red;
|
|
55
|
-
const sign = value > 0 ? '+' : '';
|
|
56
|
-
return color(`${sign}${value.toFixed(2)}`);
|
|
57
|
-
};
|
|
58
|
-
|
|
59
|
-
summaryTable.push(
|
|
60
|
-
[
|
|
61
|
-
'Success Rate',
|
|
62
|
-
`${(baseline.metrics.success_rate * 100).toFixed(1)}%`,
|
|
63
|
-
`${(current.metrics.success_rate * 100).toFixed(1)}%`,
|
|
64
|
-
`${formatDelta(delta.successRate * 100)}%`,
|
|
65
|
-
],
|
|
66
|
-
[
|
|
67
|
-
'Median Latency',
|
|
68
|
-
`${baseline.metrics.median_latency_ms}ms`,
|
|
69
|
-
`${current.metrics.median_latency_ms}ms`,
|
|
70
|
-
`${formatDelta(delta.latency, true)}ms`,
|
|
71
|
-
],
|
|
72
|
-
[
|
|
73
|
-
'Total Tokens',
|
|
74
|
-
baseline.metrics.total_tokens.toLocaleString(),
|
|
75
|
-
current.metrics.total_tokens.toLocaleString(),
|
|
76
|
-
formatDelta(delta.tokens, true),
|
|
77
|
-
]
|
|
78
|
-
);
|
|
168
|
+
// Show comparison panel
|
|
169
|
+
if (isTTY) {
|
|
170
|
+
console.log(renderComparisonPanel(baseline, current, delta));
|
|
171
|
+
} else {
|
|
172
|
+
console.log(renderPlainComparison(baseline, current, delta));
|
|
173
|
+
}
|
|
79
174
|
|
|
80
|
-
console.log(
|
|
175
|
+
console.log();
|
|
176
|
+
console.log(chalk.dim(`Baseline: ${baselineId}`));
|
|
177
|
+
console.log(chalk.dim(`Current: ${currentId}`));
|
|
81
178
|
console.log();
|
|
82
179
|
|
|
83
180
|
// Check for regression
|
|
@@ -86,16 +183,28 @@ export function compareCommand(): Command {
|
|
|
86
183
|
|
|
87
184
|
if (hasRegression) {
|
|
88
185
|
console.log(
|
|
89
|
-
chalk.red('
|
|
90
|
-
|
|
91
|
-
|
|
186
|
+
`${icons.failed} ${chalk.red('Regression detected!')} ` +
|
|
187
|
+
`Success rate dropped by ${chalk.bold(Math.abs(delta.successRate * 100).toFixed(1) + '%')} ` +
|
|
188
|
+
chalk.dim(`(threshold: ${threshold * 100}%)`)
|
|
92
189
|
);
|
|
93
190
|
process.exit(1);
|
|
94
191
|
} else {
|
|
95
|
-
console.log(chalk.green('
|
|
192
|
+
console.log(`${icons.passed} ${chalk.green('No regression detected')}`);
|
|
96
193
|
}
|
|
97
194
|
} catch (error) {
|
|
98
|
-
|
|
195
|
+
spinner.fail('Error');
|
|
196
|
+
console.log();
|
|
197
|
+
console.log(
|
|
198
|
+
renderError({
|
|
199
|
+
title: 'Failed to Compare Runs',
|
|
200
|
+
reason: (error as Error).message,
|
|
201
|
+
suggestions: [
|
|
202
|
+
'Check that both run IDs exist',
|
|
203
|
+
'Run "artemiskit history" to see available runs',
|
|
204
|
+
'Verify storage configuration in artemis.config.yaml',
|
|
205
|
+
],
|
|
206
|
+
})
|
|
207
|
+
);
|
|
99
208
|
process.exit(1);
|
|
100
209
|
}
|
|
101
210
|
});
|
package/src/commands/history.ts
CHANGED
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import chalk from 'chalk';
|
|
6
|
-
import Table from 'cli-table3';
|
|
7
6
|
import { Command } from 'commander';
|
|
8
|
-
import { loadConfig } from '../config/loader';
|
|
9
|
-
import {
|
|
7
|
+
import { loadConfig } from '../config/loader.js';
|
|
8
|
+
import { createSpinner, renderError, isTTY, padText } from '../ui/index.js';
|
|
9
|
+
import { createStorage } from '../utils/storage.js';
|
|
10
10
|
|
|
11
11
|
interface HistoryOptions {
|
|
12
12
|
project?: string;
|
|
@@ -15,6 +15,88 @@ interface HistoryOptions {
|
|
|
15
15
|
config?: string;
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
function renderHistoryTable(
|
|
19
|
+
runs: Array<{
|
|
20
|
+
runId: string;
|
|
21
|
+
scenario: string;
|
|
22
|
+
successRate: number;
|
|
23
|
+
createdAt: string;
|
|
24
|
+
}>
|
|
25
|
+
): string {
|
|
26
|
+
// Column widths
|
|
27
|
+
const runIdWidth = 16;
|
|
28
|
+
const scenarioWidth = 30;
|
|
29
|
+
const rateWidth = 12;
|
|
30
|
+
const dateWidth = 20;
|
|
31
|
+
|
|
32
|
+
// Total width = borders(4) + columns + spacing(3 spaces between 4 columns)
|
|
33
|
+
const width = 2 + runIdWidth + 1 + scenarioWidth + 1 + rateWidth + 1 + dateWidth + 2;
|
|
34
|
+
const border = '═'.repeat(width - 2);
|
|
35
|
+
|
|
36
|
+
const formatHeaderRow = () => {
|
|
37
|
+
const runIdPad = padText('Run ID', runIdWidth);
|
|
38
|
+
const scenarioPad = padText('Scenario', scenarioWidth);
|
|
39
|
+
const ratePad = padText('Success Rate', rateWidth, 'right');
|
|
40
|
+
const datePad = padText('Date', dateWidth, 'right');
|
|
41
|
+
return `║ ${runIdPad} ${scenarioPad} ${ratePad} ${datePad} ║`;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
const lines = [
|
|
45
|
+
`╔${border}╗`,
|
|
46
|
+
`║${padText('RUN HISTORY', width - 2, 'center')}║`,
|
|
47
|
+
`╠${border}╣`,
|
|
48
|
+
formatHeaderRow(),
|
|
49
|
+
`╟${'─'.repeat(width - 2)}╢`,
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
for (const run of runs) {
|
|
53
|
+
const rateColor =
|
|
54
|
+
run.successRate >= 0.9 ? chalk.green : run.successRate >= 0.7 ? chalk.yellow : chalk.red;
|
|
55
|
+
|
|
56
|
+
// Pad values first, then apply color to rate
|
|
57
|
+
const runIdPad = padText(run.runId, runIdWidth);
|
|
58
|
+
const truncScenario =
|
|
59
|
+
run.scenario.length > scenarioWidth - 2
|
|
60
|
+
? run.scenario.slice(0, scenarioWidth - 3) + '…'
|
|
61
|
+
: run.scenario;
|
|
62
|
+
const scenarioPad = padText(truncScenario, scenarioWidth);
|
|
63
|
+
|
|
64
|
+
// Pad rate before coloring so ANSI codes don't affect width
|
|
65
|
+
const rateValue = `${(run.successRate * 100).toFixed(1)}%`;
|
|
66
|
+
const ratePad = padText(rateValue, rateWidth, 'right');
|
|
67
|
+
const rateColored = rateColor(ratePad);
|
|
68
|
+
|
|
69
|
+
const dateObj = new Date(run.createdAt);
|
|
70
|
+
const dateStr = `${dateObj.toLocaleDateString()} ${dateObj.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}`;
|
|
71
|
+
const datePad = padText(dateStr, dateWidth, 'right');
|
|
72
|
+
|
|
73
|
+
lines.push(`║ ${runIdPad} ${scenarioPad} ${rateColored} ${datePad} ║`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
lines.push(`╚${border}╝`);
|
|
77
|
+
|
|
78
|
+
return lines.join('\n');
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function renderPlainHistory(
|
|
82
|
+
runs: Array<{
|
|
83
|
+
runId: string;
|
|
84
|
+
scenario: string;
|
|
85
|
+
successRate: number;
|
|
86
|
+
createdAt: string;
|
|
87
|
+
}>
|
|
88
|
+
): string {
|
|
89
|
+
const lines = ['=== RUN HISTORY ===', ''];
|
|
90
|
+
|
|
91
|
+
for (const run of runs) {
|
|
92
|
+
const rate = `${(run.successRate * 100).toFixed(1)}%`;
|
|
93
|
+
const date = new Date(run.createdAt).toLocaleString();
|
|
94
|
+
lines.push(`${run.runId} ${run.scenario} ${rate} ${date}`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return lines.join('\n');
|
|
98
|
+
}
|
|
99
|
+
|
|
18
100
|
export function historyCommand(): Command {
|
|
19
101
|
const cmd = new Command('history');
|
|
20
102
|
|
|
@@ -25,6 +107,9 @@ export function historyCommand(): Command {
|
|
|
25
107
|
.option('-l, --limit <number>', 'Limit number of results', '20')
|
|
26
108
|
.option('--config <path>', 'Path to config file')
|
|
27
109
|
.action(async (options: HistoryOptions) => {
|
|
110
|
+
const spinner = createSpinner('Loading history...');
|
|
111
|
+
spinner.start();
|
|
112
|
+
|
|
28
113
|
try {
|
|
29
114
|
const config = await loadConfig(options.config);
|
|
30
115
|
const storage = createStorage({ fileConfig: config });
|
|
@@ -36,42 +121,58 @@ export function historyCommand(): Command {
|
|
|
36
121
|
limit,
|
|
37
122
|
});
|
|
38
123
|
|
|
124
|
+
spinner.succeed('Loaded history');
|
|
125
|
+
console.log();
|
|
126
|
+
|
|
39
127
|
if (runs.length === 0) {
|
|
40
128
|
console.log(chalk.dim('No runs found.'));
|
|
129
|
+
|
|
130
|
+
if (options.project || options.scenario) {
|
|
131
|
+
console.log();
|
|
132
|
+
console.log(chalk.dim('Filters applied:'));
|
|
133
|
+
if (options.project) console.log(chalk.dim(` Project: ${options.project}`));
|
|
134
|
+
if (options.scenario) console.log(chalk.dim(` Scenario: ${options.scenario}`));
|
|
135
|
+
console.log();
|
|
136
|
+
console.log(chalk.dim('Try removing filters or run some tests first.'));
|
|
137
|
+
}
|
|
41
138
|
return;
|
|
42
139
|
}
|
|
43
140
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
chalk.bold('Date'),
|
|
50
|
-
],
|
|
51
|
-
style: { head: [], border: [] },
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
for (const run of runs) {
|
|
55
|
-
const successColor =
|
|
56
|
-
run.successRate >= 0.9
|
|
57
|
-
? chalk.green
|
|
58
|
-
: run.successRate >= 0.7
|
|
59
|
-
? chalk.yellow
|
|
60
|
-
: chalk.red;
|
|
61
|
-
|
|
62
|
-
table.push([
|
|
63
|
-
run.runId,
|
|
64
|
-
run.scenario,
|
|
65
|
-
successColor(`${(run.successRate * 100).toFixed(1)}%`),
|
|
66
|
-
new Date(run.createdAt).toLocaleString(),
|
|
67
|
-
]);
|
|
141
|
+
// Show history table
|
|
142
|
+
if (isTTY) {
|
|
143
|
+
console.log(renderHistoryTable(runs));
|
|
144
|
+
} else {
|
|
145
|
+
console.log(renderPlainHistory(runs));
|
|
68
146
|
}
|
|
69
147
|
|
|
70
|
-
console.log(table.toString());
|
|
71
148
|
console.log();
|
|
72
|
-
console.log(
|
|
149
|
+
console.log(
|
|
150
|
+
chalk.dim(
|
|
151
|
+
`Showing ${runs.length} run${runs.length === 1 ? '' : 's'}${options.limit ? ` (limit: ${limit})` : ''}`
|
|
152
|
+
)
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
if (options.project || options.scenario) {
|
|
156
|
+
console.log(
|
|
157
|
+
chalk.dim('Filters:') +
|
|
158
|
+
(options.project ? chalk.dim(` project=${options.project}`) : '') +
|
|
159
|
+
(options.scenario ? chalk.dim(` scenario=${options.scenario}`) : '')
|
|
160
|
+
);
|
|
161
|
+
}
|
|
73
162
|
} catch (error) {
|
|
74
|
-
|
|
163
|
+
spinner.fail('Error');
|
|
164
|
+
console.log();
|
|
165
|
+
console.log(
|
|
166
|
+
renderError({
|
|
167
|
+
title: 'Failed to Load History',
|
|
168
|
+
reason: (error as Error).message,
|
|
169
|
+
suggestions: [
|
|
170
|
+
'Check storage configuration in artemis.config.yaml',
|
|
171
|
+
'Verify the storage directory exists',
|
|
172
|
+
'Run some tests first with "artemiskit run"',
|
|
173
|
+
],
|
|
174
|
+
})
|
|
175
|
+
);
|
|
75
176
|
process.exit(1);
|
|
76
177
|
}
|
|
77
178
|
});
|