@artemiskit/cli 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/README.md +1 -0
- package/dist/index.js +19127 -20009
- package/dist/src/__tests__/helpers/index.d.ts +6 -0
- package/dist/src/__tests__/helpers/index.d.ts.map +1 -0
- package/dist/src/__tests__/helpers/mock-adapter.d.ts +87 -0
- package/dist/src/__tests__/helpers/mock-adapter.d.ts.map +1 -0
- package/dist/src/__tests__/helpers/test-utils.d.ts +47 -0
- package/dist/src/__tests__/helpers/test-utils.d.ts.map +1 -0
- package/dist/src/commands/compare.d.ts.map +1 -1
- package/dist/src/commands/history.d.ts.map +1 -1
- package/dist/src/commands/init.d.ts.map +1 -1
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/report.d.ts.map +1 -1
- package/dist/src/commands/run.d.ts.map +1 -1
- package/dist/src/commands/stress.d.ts.map +1 -1
- package/dist/src/ui/colors.d.ts +44 -0
- package/dist/src/ui/colors.d.ts.map +1 -0
- package/dist/src/ui/errors.d.ts +39 -0
- package/dist/src/ui/errors.d.ts.map +1 -0
- package/dist/src/ui/index.d.ts +16 -0
- package/dist/src/ui/index.d.ts.map +1 -0
- package/dist/src/ui/live-status.d.ts +82 -0
- package/dist/src/ui/live-status.d.ts.map +1 -0
- package/dist/src/ui/panels.d.ts +49 -0
- package/dist/src/ui/panels.d.ts.map +1 -0
- package/dist/src/ui/progress.d.ts +60 -0
- package/dist/src/ui/progress.d.ts.map +1 -0
- package/dist/src/ui/utils.d.ts +42 -0
- package/dist/src/ui/utils.d.ts.map +1 -0
- package/package.json +1 -1
- package/src/__tests__/helpers/index.ts +6 -0
- package/src/__tests__/helpers/mock-adapter.ts +108 -0
- package/src/__tests__/helpers/test-utils.ts +205 -0
- package/src/__tests__/integration/compare-command.test.ts +236 -0
- package/src/__tests__/integration/config.test.ts +125 -0
- package/src/__tests__/integration/history-command.test.ts +251 -0
- package/src/__tests__/integration/init-command.test.ts +177 -0
- package/src/__tests__/integration/report-command.test.ts +245 -0
- package/src/__tests__/integration/ui.test.ts +230 -0
- package/src/commands/compare.ts +156 -49
- package/src/commands/history.ts +131 -30
- package/src/commands/init.ts +181 -21
- package/src/commands/redteam.ts +118 -75
- package/src/commands/report.ts +29 -14
- package/src/commands/run.ts +86 -66
- package/src/commands/stress.ts +61 -63
- package/src/ui/colors.ts +62 -0
- package/src/ui/errors.ts +248 -0
- package/src/ui/index.ts +42 -0
- package/src/ui/live-status.ts +259 -0
- package/src/ui/panels.ts +216 -0
- package/src/ui/progress.ts +139 -0
- package/src/ui/utils.ts +89 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock adapter for CLI integration tests
|
|
3
|
+
*
|
|
4
|
+
* This module provides mock types and adapters for testing CLI commands
|
|
5
|
+
* without making actual LLM API calls.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/** Message format for chat interactions */
|
|
9
|
+
export interface MockMessage {
|
|
10
|
+
role: 'system' | 'user' | 'assistant';
|
|
11
|
+
content: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/** Response from the mock adapter */
|
|
15
|
+
export interface MockLLMResponse {
|
|
16
|
+
content: string;
|
|
17
|
+
usage: { input: number; output: number };
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** Mock adapter interface */
|
|
21
|
+
export interface MockLLMAdapter {
|
|
22
|
+
chat: (messages: MockMessage[]) => Promise<MockLLMResponse>;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface MockResponse {
|
|
26
|
+
content: string;
|
|
27
|
+
latencyMs?: number;
|
|
28
|
+
tokens?: { input: number; output: number };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface MockAdapterOptions {
|
|
32
|
+
responses?: Map<string, MockResponse>;
|
|
33
|
+
defaultResponse?: MockResponse;
|
|
34
|
+
shouldFail?: boolean;
|
|
35
|
+
failureMessage?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Creates a mock LLM adapter for testing
|
|
40
|
+
*/
|
|
41
|
+
export function createMockAdapter(options: MockAdapterOptions = {}): MockLLMAdapter {
|
|
42
|
+
const {
|
|
43
|
+
responses = new Map(),
|
|
44
|
+
defaultResponse = {
|
|
45
|
+
content: 'Hello! How can I help you today?',
|
|
46
|
+
latencyMs: 100,
|
|
47
|
+
tokens: { input: 10, output: 15 },
|
|
48
|
+
},
|
|
49
|
+
shouldFail = false,
|
|
50
|
+
failureMessage = 'Mock adapter failure',
|
|
51
|
+
} = options;
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
chat: async (messages: MockMessage[]): Promise<MockLLMResponse> => {
|
|
55
|
+
if (shouldFail) {
|
|
56
|
+
throw new Error(failureMessage);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Get the last user message to determine response
|
|
60
|
+
const lastUserMessage = [...messages].reverse().find((m) => m.role === 'user');
|
|
61
|
+
const prompt = lastUserMessage?.content || '';
|
|
62
|
+
|
|
63
|
+
// Check for specific response mapping
|
|
64
|
+
const mockResponse = responses.get(prompt) || defaultResponse;
|
|
65
|
+
|
|
66
|
+
// Simulate latency
|
|
67
|
+
if (mockResponse.latencyMs) {
|
|
68
|
+
await new Promise((resolve) => setTimeout(resolve, Math.min(mockResponse.latencyMs, 50)));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
content: mockResponse.content,
|
|
73
|
+
usage: mockResponse.tokens || { input: 10, output: 15 },
|
|
74
|
+
};
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Preset responses for common test scenarios
|
|
81
|
+
*/
|
|
82
|
+
export const mockResponses = {
|
|
83
|
+
greeting: {
|
|
84
|
+
content: 'Hello! How can I help you today?',
|
|
85
|
+
latencyMs: 50,
|
|
86
|
+
tokens: { input: 5, output: 8 },
|
|
87
|
+
},
|
|
88
|
+
capitals: {
|
|
89
|
+
content: 'The capital of France is Paris.',
|
|
90
|
+
latencyMs: 75,
|
|
91
|
+
tokens: { input: 10, output: 8 },
|
|
92
|
+
},
|
|
93
|
+
math: {
|
|
94
|
+
content: 'The answer is 4.',
|
|
95
|
+
latencyMs: 30,
|
|
96
|
+
tokens: { input: 8, output: 5 },
|
|
97
|
+
},
|
|
98
|
+
json: {
|
|
99
|
+
content: '{"name": "John", "age": 30}',
|
|
100
|
+
latencyMs: 60,
|
|
101
|
+
tokens: { input: 15, output: 12 },
|
|
102
|
+
},
|
|
103
|
+
code: {
|
|
104
|
+
content: 'function add(a, b) { return a + b; }',
|
|
105
|
+
latencyMs: 80,
|
|
106
|
+
tokens: { input: 20, output: 15 },
|
|
107
|
+
},
|
|
108
|
+
};
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test utilities for CLI integration tests
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { mkdir, rm, writeFile } from 'node:fs/promises';
|
|
6
|
+
import { tmpdir } from 'node:os';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Creates a temporary directory for test isolation
|
|
11
|
+
*/
|
|
12
|
+
export async function createTestDir(prefix = 'artemis-test'): Promise<string> {
|
|
13
|
+
const testDir = join(tmpdir(), `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
14
|
+
await mkdir(testDir, { recursive: true });
|
|
15
|
+
return testDir;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Cleans up a test directory
|
|
20
|
+
*/
|
|
21
|
+
export async function cleanupTestDir(testDir: string): Promise<void> {
|
|
22
|
+
try {
|
|
23
|
+
await rm(testDir, { recursive: true, force: true });
|
|
24
|
+
} catch {
|
|
25
|
+
// Ignore cleanup errors
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Creates a test scenario file
|
|
31
|
+
*/
|
|
32
|
+
export async function createScenarioFile(
|
|
33
|
+
dir: string,
|
|
34
|
+
name: string,
|
|
35
|
+
content: string
|
|
36
|
+
): Promise<string> {
|
|
37
|
+
const scenariosDir = join(dir, 'scenarios');
|
|
38
|
+
await mkdir(scenariosDir, { recursive: true });
|
|
39
|
+
const filePath = join(scenariosDir, `${name}.yaml`);
|
|
40
|
+
await writeFile(filePath, content);
|
|
41
|
+
return filePath;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Creates a test config file
|
|
46
|
+
*/
|
|
47
|
+
export async function createConfigFile(
|
|
48
|
+
dir: string,
|
|
49
|
+
config: Record<string, unknown>
|
|
50
|
+
): Promise<string> {
|
|
51
|
+
const filePath = join(dir, 'artemis.config.yaml');
|
|
52
|
+
const yaml = Object.entries(config)
|
|
53
|
+
.map(([key, value]) => {
|
|
54
|
+
if (typeof value === 'object' && value !== null) {
|
|
55
|
+
const nested = Object.entries(value as Record<string, unknown>)
|
|
56
|
+
.map(([k, v]) => ` ${k}: ${JSON.stringify(v)}`)
|
|
57
|
+
.join('\n');
|
|
58
|
+
return `${key}:\n${nested}`;
|
|
59
|
+
}
|
|
60
|
+
return `${key}: ${JSON.stringify(value)}`;
|
|
61
|
+
})
|
|
62
|
+
.join('\n');
|
|
63
|
+
await writeFile(filePath, yaml);
|
|
64
|
+
return filePath;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Sample scenario templates for testing
|
|
69
|
+
*/
|
|
70
|
+
export const scenarioTemplates = {
|
|
71
|
+
simple: `
|
|
72
|
+
name: simple-test
|
|
73
|
+
description: A simple test scenario
|
|
74
|
+
|
|
75
|
+
cases:
|
|
76
|
+
- id: test-1
|
|
77
|
+
prompt: "Say hello"
|
|
78
|
+
expected:
|
|
79
|
+
type: contains
|
|
80
|
+
values:
|
|
81
|
+
- "hello"
|
|
82
|
+
mode: any
|
|
83
|
+
`,
|
|
84
|
+
|
|
85
|
+
multiCase: `
|
|
86
|
+
name: multi-case-test
|
|
87
|
+
description: Multiple test cases
|
|
88
|
+
|
|
89
|
+
cases:
|
|
90
|
+
- id: case-1
|
|
91
|
+
prompt: "What is 2+2?"
|
|
92
|
+
expected:
|
|
93
|
+
type: contains
|
|
94
|
+
values:
|
|
95
|
+
- "4"
|
|
96
|
+
mode: any
|
|
97
|
+
|
|
98
|
+
- id: case-2
|
|
99
|
+
prompt: "What is the capital of France?"
|
|
100
|
+
expected:
|
|
101
|
+
type: contains
|
|
102
|
+
values:
|
|
103
|
+
- "Paris"
|
|
104
|
+
mode: any
|
|
105
|
+
|
|
106
|
+
- id: case-3
|
|
107
|
+
prompt: "Say hello"
|
|
108
|
+
expected:
|
|
109
|
+
type: contains
|
|
110
|
+
values:
|
|
111
|
+
- "hello"
|
|
112
|
+
mode: any
|
|
113
|
+
`,
|
|
114
|
+
|
|
115
|
+
withProvider: `
|
|
116
|
+
name: provider-test
|
|
117
|
+
description: Test with provider config
|
|
118
|
+
provider: openai
|
|
119
|
+
model: gpt-4o-mini
|
|
120
|
+
|
|
121
|
+
cases:
|
|
122
|
+
- id: test-1
|
|
123
|
+
prompt: "Hello"
|
|
124
|
+
expected:
|
|
125
|
+
type: contains
|
|
126
|
+
values:
|
|
127
|
+
- "hello"
|
|
128
|
+
mode: any
|
|
129
|
+
`,
|
|
130
|
+
|
|
131
|
+
exactMatch: `
|
|
132
|
+
name: exact-match-test
|
|
133
|
+
description: Test exact matching
|
|
134
|
+
|
|
135
|
+
cases:
|
|
136
|
+
- id: exact-1
|
|
137
|
+
prompt: "Return exactly: hello world"
|
|
138
|
+
expected:
|
|
139
|
+
type: exact
|
|
140
|
+
value: "hello world"
|
|
141
|
+
`,
|
|
142
|
+
|
|
143
|
+
regexMatch: `
|
|
144
|
+
name: regex-test
|
|
145
|
+
description: Test regex matching
|
|
146
|
+
|
|
147
|
+
cases:
|
|
148
|
+
- id: regex-1
|
|
149
|
+
prompt: "Return a number"
|
|
150
|
+
expected:
|
|
151
|
+
type: regex
|
|
152
|
+
pattern: "\\\\d+"
|
|
153
|
+
`,
|
|
154
|
+
|
|
155
|
+
failing: `
|
|
156
|
+
name: failing-test
|
|
157
|
+
description: A test that should fail
|
|
158
|
+
|
|
159
|
+
cases:
|
|
160
|
+
- id: will-fail
|
|
161
|
+
prompt: "Say hello"
|
|
162
|
+
expected:
|
|
163
|
+
type: exact
|
|
164
|
+
value: "this will not match"
|
|
165
|
+
`,
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Captures console output during test execution
|
|
170
|
+
*/
|
|
171
|
+
export class OutputCapture {
|
|
172
|
+
private originalLog: typeof console.log = console.log;
|
|
173
|
+
private originalError: typeof console.error = console.error;
|
|
174
|
+
private logs: string[] = [];
|
|
175
|
+
private errors: string[] = [];
|
|
176
|
+
|
|
177
|
+
start(): void {
|
|
178
|
+
this.originalLog = console.log;
|
|
179
|
+
this.originalError = console.error;
|
|
180
|
+
this.logs = [];
|
|
181
|
+
this.errors = [];
|
|
182
|
+
|
|
183
|
+
console.log = (...args: unknown[]) => {
|
|
184
|
+
this.logs.push(args.map(String).join(' '));
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
console.error = (...args: unknown[]) => {
|
|
188
|
+
this.errors.push(args.map(String).join(' '));
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
stop(): { logs: string[]; errors: string[] } {
|
|
193
|
+
console.log = this.originalLog;
|
|
194
|
+
console.error = this.originalError;
|
|
195
|
+
return { logs: this.logs, errors: this.errors };
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
getOutput(): string {
|
|
199
|
+
return this.logs.join('\n');
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
getErrors(): string {
|
|
203
|
+
return this.errors.join('\n');
|
|
204
|
+
}
|
|
205
|
+
}
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for compare command
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
|
|
6
|
+
import { mkdir, writeFile } from 'node:fs/promises';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
import { createStorage } from '../../utils/storage.js';
|
|
9
|
+
import { cleanupTestDir, createTestDir } from '../helpers/test-utils.js';
|
|
10
|
+
|
|
11
|
+
describe('Compare Command', () => {
|
|
12
|
+
let testDir: string;
|
|
13
|
+
let originalCwd: string;
|
|
14
|
+
|
|
15
|
+
beforeEach(async () => {
|
|
16
|
+
testDir = await createTestDir('compare-test');
|
|
17
|
+
originalCwd = process.cwd();
|
|
18
|
+
process.chdir(testDir);
|
|
19
|
+
|
|
20
|
+
// Create storage directory
|
|
21
|
+
await mkdir(join(testDir, 'artemis-runs', 'test-project'), { recursive: true });
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
afterEach(async () => {
|
|
25
|
+
process.chdir(originalCwd);
|
|
26
|
+
await cleanupTestDir(testDir);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe('storage comparison', () => {
|
|
30
|
+
it('should compare two runs and calculate deltas', async () => {
|
|
31
|
+
const baselineManifest = {
|
|
32
|
+
run_id: 'baseline-001',
|
|
33
|
+
project: 'test-project',
|
|
34
|
+
config: { scenario: 'test-scenario' },
|
|
35
|
+
start_time: new Date('2026-01-15T10:00:00Z').toISOString(),
|
|
36
|
+
metrics: {
|
|
37
|
+
success_rate: 0.8,
|
|
38
|
+
passed_cases: 4,
|
|
39
|
+
failed_cases: 1,
|
|
40
|
+
total_tokens: 500,
|
|
41
|
+
median_latency_ms: 200,
|
|
42
|
+
},
|
|
43
|
+
cases: [],
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const currentManifest = {
|
|
47
|
+
run_id: 'current-001',
|
|
48
|
+
project: 'test-project',
|
|
49
|
+
config: { scenario: 'test-scenario' },
|
|
50
|
+
start_time: new Date('2026-01-16T10:00:00Z').toISOString(),
|
|
51
|
+
metrics: {
|
|
52
|
+
success_rate: 1.0,
|
|
53
|
+
passed_cases: 5,
|
|
54
|
+
failed_cases: 0,
|
|
55
|
+
total_tokens: 450,
|
|
56
|
+
median_latency_ms: 150,
|
|
57
|
+
},
|
|
58
|
+
cases: [],
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
await writeFile(
|
|
62
|
+
join(testDir, 'artemis-runs', 'test-project', 'baseline-001.json'),
|
|
63
|
+
JSON.stringify(baselineManifest)
|
|
64
|
+
);
|
|
65
|
+
await writeFile(
|
|
66
|
+
join(testDir, 'artemis-runs', 'test-project', 'current-001.json'),
|
|
67
|
+
JSON.stringify(currentManifest)
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
const storage = createStorage({
|
|
71
|
+
fileConfig: {
|
|
72
|
+
storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
|
|
73
|
+
},
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
const comparison = await storage.compare?.('baseline-001', 'current-001');
|
|
77
|
+
|
|
78
|
+
expect(comparison.baseline.metrics.success_rate).toBe(0.8);
|
|
79
|
+
expect(comparison.current.metrics.success_rate).toBe(1.0);
|
|
80
|
+
expect(comparison.delta.successRate).toBeCloseTo(0.2, 5); // 1.0 - 0.8
|
|
81
|
+
expect(comparison.delta.latency).toBe(-50); // 150 - 200 (improved)
|
|
82
|
+
expect(comparison.delta.tokens).toBe(-50); // 450 - 500 (reduced)
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it('should detect regression when success rate drops', async () => {
|
|
86
|
+
const baselineManifest = {
|
|
87
|
+
run_id: 'baseline-002',
|
|
88
|
+
project: 'test-project',
|
|
89
|
+
config: { scenario: 'test-scenario' },
|
|
90
|
+
start_time: new Date('2026-01-15T10:00:00Z').toISOString(),
|
|
91
|
+
metrics: {
|
|
92
|
+
success_rate: 1.0,
|
|
93
|
+
passed_cases: 5,
|
|
94
|
+
failed_cases: 0,
|
|
95
|
+
total_tokens: 500,
|
|
96
|
+
median_latency_ms: 200,
|
|
97
|
+
},
|
|
98
|
+
cases: [],
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const currentManifest = {
|
|
102
|
+
run_id: 'current-002',
|
|
103
|
+
project: 'test-project',
|
|
104
|
+
config: { scenario: 'test-scenario' },
|
|
105
|
+
start_time: new Date('2026-01-16T10:00:00Z').toISOString(),
|
|
106
|
+
metrics: {
|
|
107
|
+
success_rate: 0.6,
|
|
108
|
+
passed_cases: 3,
|
|
109
|
+
failed_cases: 2,
|
|
110
|
+
total_tokens: 600,
|
|
111
|
+
median_latency_ms: 300,
|
|
112
|
+
},
|
|
113
|
+
cases: [],
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
await writeFile(
|
|
117
|
+
join(testDir, 'artemis-runs', 'test-project', 'baseline-002.json'),
|
|
118
|
+
JSON.stringify(baselineManifest)
|
|
119
|
+
);
|
|
120
|
+
await writeFile(
|
|
121
|
+
join(testDir, 'artemis-runs', 'test-project', 'current-002.json'),
|
|
122
|
+
JSON.stringify(currentManifest)
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
const storage = createStorage({
|
|
126
|
+
fileConfig: {
|
|
127
|
+
storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
|
|
128
|
+
},
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
const comparison = await storage.compare?.('baseline-002', 'current-002');
|
|
132
|
+
|
|
133
|
+
// Success rate dropped by 0.4 (40%)
|
|
134
|
+
expect(comparison.delta.successRate).toBeCloseTo(-0.4, 5);
|
|
135
|
+
|
|
136
|
+
// This would be a regression (threshold typically 5%)
|
|
137
|
+
const threshold = 0.05;
|
|
138
|
+
const hasRegression = comparison.delta.successRate < -threshold;
|
|
139
|
+
expect(hasRegression).toBe(true);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('should handle identical runs', async () => {
|
|
143
|
+
const manifest = {
|
|
144
|
+
run_id: 'same-001',
|
|
145
|
+
project: 'test-project',
|
|
146
|
+
config: { scenario: 'test-scenario' },
|
|
147
|
+
start_time: new Date().toISOString(),
|
|
148
|
+
metrics: {
|
|
149
|
+
success_rate: 0.9,
|
|
150
|
+
passed_cases: 9,
|
|
151
|
+
failed_cases: 1,
|
|
152
|
+
total_tokens: 1000,
|
|
153
|
+
median_latency_ms: 250,
|
|
154
|
+
},
|
|
155
|
+
cases: [],
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
await writeFile(
|
|
159
|
+
join(testDir, 'artemis-runs', 'test-project', 'same-001.json'),
|
|
160
|
+
JSON.stringify(manifest)
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
const storage = createStorage({
|
|
164
|
+
fileConfig: {
|
|
165
|
+
storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
|
|
166
|
+
},
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const comparison = await storage.compare?.('same-001', 'same-001');
|
|
170
|
+
|
|
171
|
+
expect(comparison.delta.successRate).toBe(0);
|
|
172
|
+
expect(comparison.delta.latency).toBe(0);
|
|
173
|
+
expect(comparison.delta.tokens).toBe(0);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('should throw error for non-existent baseline', async () => {
|
|
177
|
+
const currentManifest = {
|
|
178
|
+
run_id: 'exists-001',
|
|
179
|
+
project: 'test-project',
|
|
180
|
+
config: { scenario: 'test-scenario' },
|
|
181
|
+
start_time: new Date().toISOString(),
|
|
182
|
+
metrics: {
|
|
183
|
+
success_rate: 1.0,
|
|
184
|
+
passed_cases: 5,
|
|
185
|
+
failed_cases: 0,
|
|
186
|
+
total_tokens: 500,
|
|
187
|
+
median_latency_ms: 200,
|
|
188
|
+
},
|
|
189
|
+
cases: [],
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
await writeFile(
|
|
193
|
+
join(testDir, 'artemis-runs', 'test-project', 'exists-001.json'),
|
|
194
|
+
JSON.stringify(currentManifest)
|
|
195
|
+
);
|
|
196
|
+
|
|
197
|
+
const storage = createStorage({
|
|
198
|
+
fileConfig: {
|
|
199
|
+
storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
await expect(storage.compare?.('non-existent', 'exists-001')).rejects.toThrow();
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
it('should throw error for non-existent current', async () => {
|
|
207
|
+
const baselineManifest = {
|
|
208
|
+
run_id: 'exists-002',
|
|
209
|
+
project: 'test-project',
|
|
210
|
+
config: { scenario: 'test-scenario' },
|
|
211
|
+
start_time: new Date().toISOString(),
|
|
212
|
+
metrics: {
|
|
213
|
+
success_rate: 1.0,
|
|
214
|
+
passed_cases: 5,
|
|
215
|
+
failed_cases: 0,
|
|
216
|
+
total_tokens: 500,
|
|
217
|
+
median_latency_ms: 200,
|
|
218
|
+
},
|
|
219
|
+
cases: [],
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
await writeFile(
|
|
223
|
+
join(testDir, 'artemis-runs', 'test-project', 'exists-002.json'),
|
|
224
|
+
JSON.stringify(baselineManifest)
|
|
225
|
+
);
|
|
226
|
+
|
|
227
|
+
const storage = createStorage({
|
|
228
|
+
fileConfig: {
|
|
229
|
+
storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
|
|
230
|
+
},
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
await expect(storage.compare?.('exists-002', 'non-existent')).rejects.toThrow();
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for CLI configuration loading
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
|
|
6
|
+
import { writeFile } from 'node:fs/promises';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
import { loadConfig } from '../../config/loader.js';
|
|
9
|
+
import { cleanupTestDir, createTestDir } from '../helpers/test-utils.js';
|
|
10
|
+
|
|
11
|
+
describe('Config Loader', () => {
|
|
12
|
+
let testDir: string;
|
|
13
|
+
let originalCwd: string;
|
|
14
|
+
|
|
15
|
+
beforeEach(async () => {
|
|
16
|
+
testDir = await createTestDir('config-test');
|
|
17
|
+
originalCwd = process.cwd();
|
|
18
|
+
process.chdir(testDir);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
afterEach(async () => {
|
|
22
|
+
process.chdir(originalCwd);
|
|
23
|
+
await cleanupTestDir(testDir);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
describe('loadConfig', () => {
|
|
27
|
+
it('should return null when no config file exists', async () => {
|
|
28
|
+
const config = await loadConfig();
|
|
29
|
+
expect(config).toBeNull();
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('should load artemis.config.yaml from current directory', async () => {
|
|
33
|
+
const configContent = `
|
|
34
|
+
provider: openai
|
|
35
|
+
model: gpt-4o-mini
|
|
36
|
+
project: test-project
|
|
37
|
+
`;
|
|
38
|
+
await writeFile(join(testDir, 'artemis.config.yaml'), configContent);
|
|
39
|
+
|
|
40
|
+
const config = await loadConfig();
|
|
41
|
+
expect(config).toBeDefined();
|
|
42
|
+
expect(config?.provider).toBe('openai');
|
|
43
|
+
expect(config?.model).toBe('gpt-4o-mini');
|
|
44
|
+
expect(config?.project).toBe('test-project');
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('should load artemis.config.yml (yml extension)', async () => {
|
|
48
|
+
const configContent = `
|
|
49
|
+
provider: azure-openai
|
|
50
|
+
model: gpt-4
|
|
51
|
+
`;
|
|
52
|
+
await writeFile(join(testDir, 'artemis.config.yml'), configContent);
|
|
53
|
+
|
|
54
|
+
const config = await loadConfig();
|
|
55
|
+
expect(config).toBeDefined();
|
|
56
|
+
expect(config?.provider).toBe('azure-openai');
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('should load config from explicit path', async () => {
|
|
60
|
+
const customPath = join(testDir, 'custom-config.yaml');
|
|
61
|
+
const configContent = `
|
|
62
|
+
provider: anthropic
|
|
63
|
+
model: claude-3-sonnet
|
|
64
|
+
`;
|
|
65
|
+
await writeFile(customPath, configContent);
|
|
66
|
+
|
|
67
|
+
const config = await loadConfig(customPath);
|
|
68
|
+
expect(config).toBeDefined();
|
|
69
|
+
expect(config?.provider).toBe('anthropic');
|
|
70
|
+
expect(config?.model).toBe('claude-3-sonnet');
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('should load nested azure configuration', async () => {
|
|
74
|
+
const configContent = `
|
|
75
|
+
provider: azure-openai
|
|
76
|
+
model: gpt-4
|
|
77
|
+
providers:
|
|
78
|
+
azure-openai:
|
|
79
|
+
resourceName: my-resource
|
|
80
|
+
deploymentName: my-deployment
|
|
81
|
+
apiVersion: "2024-02-15-preview"
|
|
82
|
+
`;
|
|
83
|
+
await writeFile(join(testDir, 'artemis.config.yaml'), configContent);
|
|
84
|
+
|
|
85
|
+
const config = await loadConfig();
|
|
86
|
+
expect(config).toBeDefined();
|
|
87
|
+
expect(config?.providers?.['azure-openai']?.resourceName).toBe('my-resource');
|
|
88
|
+
expect(config?.providers?.['azure-openai']?.deploymentName).toBe('my-deployment');
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it('should load storage configuration', async () => {
|
|
92
|
+
const configContent = `
|
|
93
|
+
provider: openai
|
|
94
|
+
storage:
|
|
95
|
+
type: local
|
|
96
|
+
basePath: ./my-runs
|
|
97
|
+
`;
|
|
98
|
+
await writeFile(join(testDir, 'artemis.config.yaml'), configContent);
|
|
99
|
+
|
|
100
|
+
const config = await loadConfig();
|
|
101
|
+
expect(config).toBeDefined();
|
|
102
|
+
expect(config?.storage?.type).toBe('local');
|
|
103
|
+
expect(config?.storage?.basePath).toBe('./my-runs');
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it('should throw error for invalid YAML', async () => {
|
|
107
|
+
const invalidYaml = `
|
|
108
|
+
provider: openai
|
|
109
|
+
model: gpt-4 # invalid indentation
|
|
110
|
+
invalid: [unclosed
|
|
111
|
+
`;
|
|
112
|
+
await writeFile(join(testDir, 'artemis.config.yaml'), invalidYaml);
|
|
113
|
+
|
|
114
|
+
await expect(loadConfig()).rejects.toThrow();
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('should prefer .yaml over .yml when both exist', async () => {
|
|
118
|
+
await writeFile(join(testDir, 'artemis.config.yaml'), 'provider: openai');
|
|
119
|
+
await writeFile(join(testDir, 'artemis.config.yml'), 'provider: anthropic');
|
|
120
|
+
|
|
121
|
+
const config = await loadConfig();
|
|
122
|
+
expect(config?.provider).toBe('openai');
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
});
|