@artemiskit/cli 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -0
- package/artemis-runs/my-project/-sEsU7KtJ7VE.json +188 -0
- package/bin/artemis.ts +13 -0
- package/dist/bin/artemis.d.ts +6 -0
- package/dist/bin/artemis.d.ts.map +1 -0
- package/dist/index.js +51297 -0
- package/dist/src/adapters.d.ts +6 -0
- package/dist/src/adapters.d.ts.map +1 -0
- package/dist/src/cli.d.ts +6 -0
- package/dist/src/cli.d.ts.map +1 -0
- package/dist/src/commands/compare.d.ts +6 -0
- package/dist/src/commands/compare.d.ts.map +1 -0
- package/dist/src/commands/history.d.ts +6 -0
- package/dist/src/commands/history.d.ts.map +1 -0
- package/dist/src/commands/index.d.ts +8 -0
- package/dist/src/commands/index.d.ts.map +1 -0
- package/dist/src/commands/init.d.ts +6 -0
- package/dist/src/commands/init.d.ts.map +1 -0
- package/dist/src/commands/redteam.d.ts +6 -0
- package/dist/src/commands/redteam.d.ts.map +1 -0
- package/dist/src/commands/report.d.ts +6 -0
- package/dist/src/commands/report.d.ts.map +1 -0
- package/dist/src/commands/run.d.ts +6 -0
- package/dist/src/commands/run.d.ts.map +1 -0
- package/dist/src/commands/stress.d.ts +6 -0
- package/dist/src/commands/stress.d.ts.map +1 -0
- package/dist/src/config/index.d.ts +6 -0
- package/dist/src/config/index.d.ts.map +1 -0
- package/dist/src/config/loader.d.ts +13 -0
- package/dist/src/config/loader.d.ts.map +1 -0
- package/dist/src/config/schema.d.ts +215 -0
- package/dist/src/config/schema.d.ts.map +1 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/utils/adapter.d.ts +71 -0
- package/dist/src/utils/adapter.d.ts.map +1 -0
- package/dist/src/utils/storage.d.ts +22 -0
- package/dist/src/utils/storage.d.ts.map +1 -0
- package/package.json +65 -0
- package/src/adapters.ts +33 -0
- package/src/cli.ts +34 -0
- package/src/commands/compare.ts +104 -0
- package/src/commands/history.ts +80 -0
- package/src/commands/index.ts +8 -0
- package/src/commands/init.ts +111 -0
- package/src/commands/redteam.ts +511 -0
- package/src/commands/report.ts +126 -0
- package/src/commands/run.ts +233 -0
- package/src/commands/stress.ts +501 -0
- package/src/config/index.ts +6 -0
- package/src/config/loader.ts +112 -0
- package/src/config/schema.ts +56 -0
- package/src/index.ts +6 -0
- package/src/utils/adapter.ts +542 -0
- package/src/utils/storage.ts +67 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stress command - Run load/stress tests against an LLM
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { mkdir, writeFile } from 'node:fs/promises';
|
|
6
|
+
import { basename, join } from 'node:path';
|
|
7
|
+
import {
|
|
8
|
+
type ManifestRedactionInfo,
|
|
9
|
+
type RedactionConfig,
|
|
10
|
+
Redactor,
|
|
11
|
+
type StressManifest,
|
|
12
|
+
type StressMetrics,
|
|
13
|
+
type StressRequestResult,
|
|
14
|
+
createAdapter,
|
|
15
|
+
getGitInfo,
|
|
16
|
+
parseScenarioFile,
|
|
17
|
+
} from '@artemiskit/core';
|
|
18
|
+
import { generateJSONReport, generateStressHTMLReport } from '@artemiskit/reports';
|
|
19
|
+
import chalk from 'chalk';
|
|
20
|
+
import Table from 'cli-table3';
|
|
21
|
+
import { Command } from 'commander';
|
|
22
|
+
import { nanoid } from 'nanoid';
|
|
23
|
+
import ora from 'ora';
|
|
24
|
+
import { loadConfig } from '../config/loader';
|
|
25
|
+
import {
|
|
26
|
+
buildAdapterConfig,
|
|
27
|
+
resolveModelWithSource,
|
|
28
|
+
resolveProviderWithSource,
|
|
29
|
+
} from '../utils/adapter';
|
|
30
|
+
import { createStorage } from '../utils/storage';
|
|
31
|
+
|
|
32
|
+
interface StressOptions {
|
|
33
|
+
provider?: string;
|
|
34
|
+
model?: string;
|
|
35
|
+
concurrency?: number;
|
|
36
|
+
requests?: number;
|
|
37
|
+
duration?: number;
|
|
38
|
+
rampUp?: number;
|
|
39
|
+
save?: boolean;
|
|
40
|
+
output?: string;
|
|
41
|
+
verbose?: boolean;
|
|
42
|
+
config?: string;
|
|
43
|
+
redact?: boolean;
|
|
44
|
+
redactPatterns?: string[];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function stressCommand(): Command {
|
|
48
|
+
const cmd = new Command('stress');
|
|
49
|
+
|
|
50
|
+
cmd
|
|
51
|
+
.description('Run load/stress tests against an LLM')
|
|
52
|
+
.argument('<scenario>', 'Path to scenario YAML file')
|
|
53
|
+
.option('-p, --provider <provider>', 'Provider to use')
|
|
54
|
+
.option('-m, --model <model>', 'Model to use')
|
|
55
|
+
.option('-c, --concurrency <number>', 'Number of concurrent requests', '10')
|
|
56
|
+
.option('-n, --requests <number>', 'Total number of requests to make')
|
|
57
|
+
.option('-d, --duration <seconds>', 'Duration to run the test in seconds', '30')
|
|
58
|
+
.option('--ramp-up <seconds>', 'Ramp-up time in seconds', '5')
|
|
59
|
+
.option('--save', 'Save results to storage')
|
|
60
|
+
.option('-o, --output <dir>', 'Output directory for reports')
|
|
61
|
+
.option('-v, --verbose', 'Verbose output')
|
|
62
|
+
.option('--config <path>', 'Path to config file')
|
|
63
|
+
.option('--redact', 'Enable PII/sensitive data redaction in results')
|
|
64
|
+
.option(
|
|
65
|
+
'--redact-patterns <patterns...>',
|
|
66
|
+
'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
|
|
67
|
+
)
|
|
68
|
+
.action(async (scenarioPath: string, options: StressOptions) => {
|
|
69
|
+
const spinner = ora('Loading configuration...').start();
|
|
70
|
+
const startTime = new Date();
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
// Load config file if present
|
|
74
|
+
const config = await loadConfig(options.config);
|
|
75
|
+
if (config) {
|
|
76
|
+
spinner.succeed('Loaded config file');
|
|
77
|
+
} else {
|
|
78
|
+
spinner.info('No config file found, using defaults');
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Parse scenario
|
|
82
|
+
spinner.start('Loading scenario...');
|
|
83
|
+
const scenario = await parseScenarioFile(scenarioPath);
|
|
84
|
+
spinner.succeed(`Loaded scenario: ${scenario.name}`);
|
|
85
|
+
|
|
86
|
+
// Resolve provider and model with precedence and source tracking:
|
|
87
|
+
// CLI > Scenario > Config > Default
|
|
88
|
+
const { provider, source: providerSource } = resolveProviderWithSource(
|
|
89
|
+
options.provider,
|
|
90
|
+
scenario.provider,
|
|
91
|
+
config?.provider
|
|
92
|
+
);
|
|
93
|
+
const { model, source: modelSource } = resolveModelWithSource(
|
|
94
|
+
options.model,
|
|
95
|
+
scenario.model,
|
|
96
|
+
config?.model
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
// Build adapter config with full precedence chain and source tracking
|
|
100
|
+
spinner.start(`Connecting to ${provider}...`);
|
|
101
|
+
const { adapterConfig, resolvedConfig } = buildAdapterConfig({
|
|
102
|
+
provider,
|
|
103
|
+
model,
|
|
104
|
+
providerSource,
|
|
105
|
+
modelSource,
|
|
106
|
+
scenarioConfig: scenario.providerConfig,
|
|
107
|
+
fileConfig: config,
|
|
108
|
+
});
|
|
109
|
+
const client = await createAdapter(adapterConfig);
|
|
110
|
+
spinner.succeed(`Connected to ${provider}`);
|
|
111
|
+
|
|
112
|
+
// Configuration
|
|
113
|
+
const concurrency = Number.parseInt(String(options.concurrency)) || 10;
|
|
114
|
+
const durationSec = Number.parseInt(String(options.duration)) || 30;
|
|
115
|
+
const rampUpSec = Number.parseInt(String(options.rampUp)) || 5;
|
|
116
|
+
const maxRequests = options.requests
|
|
117
|
+
? Number.parseInt(String(options.requests))
|
|
118
|
+
: undefined;
|
|
119
|
+
|
|
120
|
+
// Set up redaction if enabled
|
|
121
|
+
let redactionConfig: RedactionConfig | undefined;
|
|
122
|
+
let redactor: Redactor | undefined;
|
|
123
|
+
if (options.redact) {
|
|
124
|
+
redactionConfig = {
|
|
125
|
+
enabled: true,
|
|
126
|
+
patterns: options.redactPatterns,
|
|
127
|
+
redactPrompts: true,
|
|
128
|
+
redactResponses: true,
|
|
129
|
+
redactMetadata: false,
|
|
130
|
+
replacement: '[REDACTED]',
|
|
131
|
+
};
|
|
132
|
+
redactor = new Redactor(redactionConfig);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
console.log();
|
|
136
|
+
console.log(chalk.bold('Stress Test Configuration'));
|
|
137
|
+
console.log(chalk.dim(`Concurrency: ${concurrency}`));
|
|
138
|
+
console.log(chalk.dim(`Duration: ${durationSec}s`));
|
|
139
|
+
console.log(chalk.dim(`Ramp-up: ${rampUpSec}s`));
|
|
140
|
+
if (maxRequests) {
|
|
141
|
+
console.log(chalk.dim(`Max requests: ${maxRequests}`));
|
|
142
|
+
}
|
|
143
|
+
if (options.redact) {
|
|
144
|
+
console.log(
|
|
145
|
+
chalk.dim(
|
|
146
|
+
`Redaction: enabled${options.redactPatterns ? ` (${options.redactPatterns.join(', ')})` : ' (default patterns)'}`
|
|
147
|
+
)
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
console.log();
|
|
151
|
+
|
|
152
|
+
// Get test prompts from scenario cases
|
|
153
|
+
const prompts = scenario.cases.map((c) =>
|
|
154
|
+
typeof c.prompt === 'string' ? c.prompt : c.prompt.map((m) => m.content).join('\n')
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
if (prompts.length === 0) {
|
|
158
|
+
throw new Error('No test cases found in scenario');
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Run stress test
|
|
162
|
+
spinner.start('Running stress test...');
|
|
163
|
+
const results = await runStressTest({
|
|
164
|
+
client,
|
|
165
|
+
model,
|
|
166
|
+
prompts,
|
|
167
|
+
concurrency,
|
|
168
|
+
durationMs: durationSec * 1000,
|
|
169
|
+
rampUpMs: rampUpSec * 1000,
|
|
170
|
+
maxRequests,
|
|
171
|
+
temperature: scenario.temperature,
|
|
172
|
+
onProgress: (completed, active) => {
|
|
173
|
+
spinner.text = `Running stress test... ${completed} completed, ${active} active`;
|
|
174
|
+
},
|
|
175
|
+
verbose: options.verbose,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
spinner.succeed('Stress test completed');
|
|
179
|
+
const endTime = new Date();
|
|
180
|
+
console.log();
|
|
181
|
+
|
|
182
|
+
// Calculate stats
|
|
183
|
+
const metrics = calculateMetrics(results, endTime.getTime() - startTime.getTime());
|
|
184
|
+
|
|
185
|
+
// Build redaction metadata if enabled
|
|
186
|
+
let redactionInfo: ManifestRedactionInfo | undefined;
|
|
187
|
+
if (redactor && redactionConfig?.enabled) {
|
|
188
|
+
redactionInfo = {
|
|
189
|
+
enabled: true,
|
|
190
|
+
patternsUsed: redactor.patternNames,
|
|
191
|
+
replacement: redactor.replacement,
|
|
192
|
+
summary: {
|
|
193
|
+
promptsRedacted: 0, // Stress test doesn't track individual prompts
|
|
194
|
+
responsesRedacted: 0,
|
|
195
|
+
totalRedactions: 0,
|
|
196
|
+
},
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Build manifest
|
|
201
|
+
const runId = `st_${nanoid(12)}`;
|
|
202
|
+
const manifest: StressManifest = {
|
|
203
|
+
version: '1.0',
|
|
204
|
+
type: 'stress',
|
|
205
|
+
run_id: runId,
|
|
206
|
+
project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
|
|
207
|
+
start_time: startTime.toISOString(),
|
|
208
|
+
end_time: endTime.toISOString(),
|
|
209
|
+
duration_ms: endTime.getTime() - startTime.getTime(),
|
|
210
|
+
config: {
|
|
211
|
+
scenario: basename(scenarioPath, '.yaml'),
|
|
212
|
+
provider,
|
|
213
|
+
model: resolvedConfig.model,
|
|
214
|
+
concurrency,
|
|
215
|
+
duration_seconds: durationSec,
|
|
216
|
+
ramp_up_seconds: rampUpSec,
|
|
217
|
+
max_requests: maxRequests,
|
|
218
|
+
},
|
|
219
|
+
resolved_config: resolvedConfig,
|
|
220
|
+
metrics,
|
|
221
|
+
git: await getGitInfo(),
|
|
222
|
+
provenance: {
|
|
223
|
+
run_by: process.env.USER || process.env.USERNAME || 'unknown',
|
|
224
|
+
},
|
|
225
|
+
// Sample results (keep only a sample to avoid huge files)
|
|
226
|
+
sample_results: sampleResults(results, 100),
|
|
227
|
+
environment: {
|
|
228
|
+
node_version: process.version,
|
|
229
|
+
platform: process.platform,
|
|
230
|
+
arch: process.arch,
|
|
231
|
+
},
|
|
232
|
+
redaction: redactionInfo,
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
// Display stats
|
|
236
|
+
displayStats(metrics, runId);
|
|
237
|
+
|
|
238
|
+
// Display latency histogram if verbose
|
|
239
|
+
if (options.verbose) {
|
|
240
|
+
displayHistogram(results);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Save results if requested
|
|
244
|
+
if (options.save) {
|
|
245
|
+
spinner.start('Saving results...');
|
|
246
|
+
const storage = createStorage({ fileConfig: config });
|
|
247
|
+
const path = await storage.save(manifest);
|
|
248
|
+
spinner.succeed(`Results saved: ${path}`);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Generate reports if output directory specified
|
|
252
|
+
if (options.output) {
|
|
253
|
+
spinner.start('Generating reports...');
|
|
254
|
+
await mkdir(options.output, { recursive: true });
|
|
255
|
+
|
|
256
|
+
// HTML report
|
|
257
|
+
const html = generateStressHTMLReport(manifest);
|
|
258
|
+
const htmlPath = join(options.output, `${runId}.html`);
|
|
259
|
+
await writeFile(htmlPath, html);
|
|
260
|
+
|
|
261
|
+
// JSON report
|
|
262
|
+
const json = generateJSONReport(manifest);
|
|
263
|
+
const jsonPath = join(options.output, `${runId}.json`);
|
|
264
|
+
await writeFile(jsonPath, json);
|
|
265
|
+
|
|
266
|
+
spinner.succeed(`Reports generated: ${options.output}`);
|
|
267
|
+
console.log(chalk.dim(` HTML: ${htmlPath}`));
|
|
268
|
+
console.log(chalk.dim(` JSON: ${jsonPath}`));
|
|
269
|
+
}
|
|
270
|
+
} catch (error) {
|
|
271
|
+
spinner.fail('Error');
|
|
272
|
+
console.error(chalk.red('Error:'), (error as Error).message);
|
|
273
|
+
process.exit(1);
|
|
274
|
+
}
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
return cmd;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
interface StressTestOptions {
|
|
281
|
+
client: {
|
|
282
|
+
generate: (req: { prompt: string; model?: string; temperature?: number }) => Promise<{
|
|
283
|
+
text: string;
|
|
284
|
+
}>;
|
|
285
|
+
};
|
|
286
|
+
model?: string;
|
|
287
|
+
prompts: string[];
|
|
288
|
+
concurrency: number;
|
|
289
|
+
durationMs: number;
|
|
290
|
+
rampUpMs: number;
|
|
291
|
+
maxRequests?: number;
|
|
292
|
+
temperature?: number;
|
|
293
|
+
onProgress?: (completed: number, active: number) => void;
|
|
294
|
+
verbose?: boolean;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
async function runStressTest(options: StressTestOptions): Promise<StressRequestResult[]> {
|
|
298
|
+
const {
|
|
299
|
+
client,
|
|
300
|
+
model,
|
|
301
|
+
prompts,
|
|
302
|
+
concurrency,
|
|
303
|
+
durationMs,
|
|
304
|
+
rampUpMs,
|
|
305
|
+
maxRequests,
|
|
306
|
+
temperature,
|
|
307
|
+
onProgress,
|
|
308
|
+
} = options;
|
|
309
|
+
|
|
310
|
+
const results: StressRequestResult[] = [];
|
|
311
|
+
const startTime = Date.now();
|
|
312
|
+
const endTime = startTime + durationMs;
|
|
313
|
+
let completed = 0;
|
|
314
|
+
let active = 0;
|
|
315
|
+
let promptIndex = 0;
|
|
316
|
+
|
|
317
|
+
const makeRequest = async (): Promise<void> => {
|
|
318
|
+
const prompt = prompts[promptIndex % prompts.length];
|
|
319
|
+
promptIndex++;
|
|
320
|
+
|
|
321
|
+
const requestStart = Date.now();
|
|
322
|
+
active++;
|
|
323
|
+
|
|
324
|
+
try {
|
|
325
|
+
await client.generate({
|
|
326
|
+
prompt,
|
|
327
|
+
model,
|
|
328
|
+
temperature,
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
results.push({
|
|
332
|
+
success: true,
|
|
333
|
+
latencyMs: Date.now() - requestStart,
|
|
334
|
+
timestamp: requestStart,
|
|
335
|
+
});
|
|
336
|
+
} catch (error) {
|
|
337
|
+
results.push({
|
|
338
|
+
success: false,
|
|
339
|
+
latencyMs: Date.now() - requestStart,
|
|
340
|
+
error: (error as Error).message,
|
|
341
|
+
timestamp: requestStart,
|
|
342
|
+
});
|
|
343
|
+
} finally {
|
|
344
|
+
active--;
|
|
345
|
+
completed++;
|
|
346
|
+
onProgress?.(completed, active);
|
|
347
|
+
}
|
|
348
|
+
};
|
|
349
|
+
|
|
350
|
+
// Calculate target concurrency based on ramp-up
|
|
351
|
+
const getTargetConcurrency = (elapsed: number): number => {
|
|
352
|
+
if (elapsed >= rampUpMs) return concurrency;
|
|
353
|
+
return Math.ceil((elapsed / rampUpMs) * concurrency);
|
|
354
|
+
};
|
|
355
|
+
|
|
356
|
+
// Main loop
|
|
357
|
+
const promises: Promise<void>[] = [];
|
|
358
|
+
|
|
359
|
+
while (Date.now() < endTime) {
|
|
360
|
+
if (maxRequests && completed >= maxRequests) break;
|
|
361
|
+
|
|
362
|
+
const elapsed = Date.now() - startTime;
|
|
363
|
+
const targetConcurrency = getTargetConcurrency(elapsed);
|
|
364
|
+
|
|
365
|
+
// Launch new requests if below target
|
|
366
|
+
while (active < targetConcurrency && Date.now() < endTime) {
|
|
367
|
+
if (maxRequests && completed + active >= maxRequests) break;
|
|
368
|
+
promises.push(makeRequest());
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Small delay to prevent tight loop
|
|
372
|
+
await new Promise((resolve) => setTimeout(resolve, 10));
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Wait for all pending requests
|
|
376
|
+
await Promise.all(promises);
|
|
377
|
+
|
|
378
|
+
return results;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function calculateMetrics(results: StressRequestResult[], durationMs: number): StressMetrics {
|
|
382
|
+
const successful = results.filter((r) => r.success);
|
|
383
|
+
const latencies = successful.map((r) => r.latencyMs).sort((a, b) => a - b);
|
|
384
|
+
|
|
385
|
+
const totalRequests = results.length;
|
|
386
|
+
const successfulRequests = successful.length;
|
|
387
|
+
const failedRequests = totalRequests - successfulRequests;
|
|
388
|
+
|
|
389
|
+
const minLatency = latencies[0] || 0;
|
|
390
|
+
const maxLatency = latencies[latencies.length - 1] || 0;
|
|
391
|
+
const avgLatency =
|
|
392
|
+
latencies.length > 0 ? latencies.reduce((sum, l) => sum + l, 0) / latencies.length : 0;
|
|
393
|
+
|
|
394
|
+
const requestsPerSecond = durationMs > 0 ? (totalRequests / durationMs) * 1000 : 0;
|
|
395
|
+
const successRate = totalRequests > 0 ? successfulRequests / totalRequests : 0;
|
|
396
|
+
|
|
397
|
+
return {
|
|
398
|
+
total_requests: totalRequests,
|
|
399
|
+
successful_requests: successfulRequests,
|
|
400
|
+
failed_requests: failedRequests,
|
|
401
|
+
success_rate: successRate,
|
|
402
|
+
requests_per_second: requestsPerSecond,
|
|
403
|
+
min_latency_ms: minLatency,
|
|
404
|
+
max_latency_ms: maxLatency,
|
|
405
|
+
avg_latency_ms: Math.round(avgLatency),
|
|
406
|
+
p50_latency_ms: percentile(latencies, 50),
|
|
407
|
+
p90_latency_ms: percentile(latencies, 90),
|
|
408
|
+
p95_latency_ms: percentile(latencies, 95),
|
|
409
|
+
p99_latency_ms: percentile(latencies, 99),
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
function percentile(sorted: number[], p: number): number {
|
|
414
|
+
if (sorted.length === 0) return 0;
|
|
415
|
+
const index = Math.ceil((p / 100) * sorted.length) - 1;
|
|
416
|
+
return sorted[Math.max(0, Math.min(index, sorted.length - 1))];
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
function sampleResults(results: StressRequestResult[], maxSamples: number): StressRequestResult[] {
|
|
420
|
+
if (results.length <= maxSamples) return results;
|
|
421
|
+
|
|
422
|
+
// Sample evenly across the results
|
|
423
|
+
const step = Math.floor(results.length / maxSamples);
|
|
424
|
+
const sampled: StressRequestResult[] = [];
|
|
425
|
+
for (let i = 0; i < results.length && sampled.length < maxSamples; i += step) {
|
|
426
|
+
sampled.push(results[i]);
|
|
427
|
+
}
|
|
428
|
+
return sampled;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
function displayStats(metrics: StressMetrics, runId: string): void {
|
|
432
|
+
const table = new Table({
|
|
433
|
+
head: [chalk.bold('Metric'), chalk.bold('Value')],
|
|
434
|
+
style: { head: [], border: [] },
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
table.push(
|
|
438
|
+
['Run ID', runId],
|
|
439
|
+
['Total Requests', metrics.total_requests.toString()],
|
|
440
|
+
['Successful', chalk.green(metrics.successful_requests.toString())],
|
|
441
|
+
['Failed', metrics.failed_requests > 0 ? chalk.red(metrics.failed_requests.toString()) : '0'],
|
|
442
|
+
['', ''],
|
|
443
|
+
['Requests/sec', metrics.requests_per_second.toFixed(2)],
|
|
444
|
+
['', ''],
|
|
445
|
+
['Min Latency', `${metrics.min_latency_ms}ms`],
|
|
446
|
+
['Max Latency', `${metrics.max_latency_ms}ms`],
|
|
447
|
+
['Avg Latency', `${metrics.avg_latency_ms}ms`],
|
|
448
|
+
['p50 Latency', `${metrics.p50_latency_ms}ms`],
|
|
449
|
+
['p90 Latency', `${metrics.p90_latency_ms}ms`],
|
|
450
|
+
['p95 Latency', `${metrics.p95_latency_ms}ms`],
|
|
451
|
+
['p99 Latency', `${metrics.p99_latency_ms}ms`]
|
|
452
|
+
);
|
|
453
|
+
|
|
454
|
+
console.log(chalk.bold('Results'));
|
|
455
|
+
console.log(table.toString());
|
|
456
|
+
|
|
457
|
+
// Success rate
|
|
458
|
+
const successRate = metrics.success_rate * 100;
|
|
459
|
+
|
|
460
|
+
console.log();
|
|
461
|
+
if (successRate >= 99) {
|
|
462
|
+
console.log(chalk.green(`✓ Success rate: ${successRate.toFixed(2)}%`));
|
|
463
|
+
} else if (successRate >= 95) {
|
|
464
|
+
console.log(chalk.yellow(`⚠ Success rate: ${successRate.toFixed(2)}%`));
|
|
465
|
+
} else {
|
|
466
|
+
console.log(chalk.red(`✗ Success rate: ${successRate.toFixed(2)}%`));
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
function displayHistogram(results: StressRequestResult[]): void {
|
|
471
|
+
const successful = results.filter((r) => r.success);
|
|
472
|
+
if (successful.length === 0) return;
|
|
473
|
+
|
|
474
|
+
const latencies = successful.map((r) => r.latencyMs);
|
|
475
|
+
const maxLatency = Math.max(...latencies);
|
|
476
|
+
const bucketSize = Math.ceil(maxLatency / 10);
|
|
477
|
+
const buckets = new Array(10).fill(0);
|
|
478
|
+
|
|
479
|
+
for (const latency of latencies) {
|
|
480
|
+
const bucket = Math.min(Math.floor(latency / bucketSize), 9);
|
|
481
|
+
buckets[bucket]++;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
const maxCount = Math.max(...buckets);
|
|
485
|
+
|
|
486
|
+
console.log();
|
|
487
|
+
console.log(chalk.bold('Latency Distribution'));
|
|
488
|
+
console.log();
|
|
489
|
+
|
|
490
|
+
for (let i = 0; i < 10; i++) {
|
|
491
|
+
const rangeStart = i * bucketSize;
|
|
492
|
+
const rangeEnd = (i + 1) * bucketSize;
|
|
493
|
+
const count = buckets[i];
|
|
494
|
+
const barLength = maxCount > 0 ? Math.round((count / maxCount) * 30) : 0;
|
|
495
|
+
const bar = '█'.repeat(barLength);
|
|
496
|
+
|
|
497
|
+
console.log(
|
|
498
|
+
`${chalk.dim(`${rangeStart.toString().padStart(5)}-${rangeEnd.toString().padStart(5)}ms`)} │ ${chalk.cyan(bar)} ${count}`
|
|
499
|
+
);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration file loader
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { existsSync } from 'node:fs';
|
|
6
|
+
import { readFile } from 'node:fs/promises';
|
|
7
|
+
import { join, resolve } from 'node:path';
|
|
8
|
+
import { parse as parseYaml } from 'yaml';
|
|
9
|
+
import { type ArtemisConfig, ArtemisConfigSchema } from './schema';
|
|
10
|
+
|
|
11
|
+
const CONFIG_FILENAMES = ['artemis.config.yaml', 'artemis.config.yml', 'artemis.yaml'];
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Find and load the configuration file
|
|
15
|
+
*/
|
|
16
|
+
export async function loadConfig(configPath?: string): Promise<ArtemisConfig | null> {
|
|
17
|
+
const path = configPath || findConfigFile();
|
|
18
|
+
|
|
19
|
+
if (!path) {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
const content = await readFile(path, 'utf-8');
|
|
25
|
+
const raw = parseYaml(content);
|
|
26
|
+
|
|
27
|
+
// Expand environment variables
|
|
28
|
+
const expanded = expandEnvVars(raw);
|
|
29
|
+
|
|
30
|
+
const result = ArtemisConfigSchema.safeParse(expanded);
|
|
31
|
+
|
|
32
|
+
if (!result.success) {
|
|
33
|
+
const issues = result.error.issues
|
|
34
|
+
.map((i) => ` - ${i.path.join('.')}: ${i.message}`)
|
|
35
|
+
.join('\n');
|
|
36
|
+
throw new Error(`Invalid config file ${path}:\n${issues}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return result.data;
|
|
40
|
+
} catch (error) {
|
|
41
|
+
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
throw error;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Find config file in current directory or parents
|
|
50
|
+
*/
|
|
51
|
+
function findConfigFile(): string | null {
|
|
52
|
+
let dir = process.cwd();
|
|
53
|
+
const root = resolve('/');
|
|
54
|
+
|
|
55
|
+
while (dir !== root) {
|
|
56
|
+
for (const filename of CONFIG_FILENAMES) {
|
|
57
|
+
const path = join(dir, filename);
|
|
58
|
+
if (existsSync(path)) {
|
|
59
|
+
return path;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
dir = resolve(dir, '..');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Expand environment variables in config values
|
|
70
|
+
* Supports ${VAR} and ${VAR:-default} syntax
|
|
71
|
+
*/
|
|
72
|
+
function expandEnvVars(obj: unknown): unknown {
|
|
73
|
+
if (typeof obj === 'string') {
|
|
74
|
+
return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
|
|
75
|
+
const [varName, defaultValue] = expr.split(':-');
|
|
76
|
+
return process.env[varName] || defaultValue || '';
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (Array.isArray(obj)) {
|
|
81
|
+
return obj.map(expandEnvVars);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (obj && typeof obj === 'object') {
|
|
85
|
+
const result: Record<string, unknown> = {};
|
|
86
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
87
|
+
result[key] = expandEnvVars(value);
|
|
88
|
+
}
|
|
89
|
+
return result;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return obj;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Get a merged config with CLI options taking precedence
|
|
97
|
+
*/
|
|
98
|
+
export function mergeConfig(
|
|
99
|
+
fileConfig: ArtemisConfig | null,
|
|
100
|
+
cliOptions: Partial<ArtemisConfig>
|
|
101
|
+
): ArtemisConfig {
|
|
102
|
+
const defaults: ArtemisConfig = {
|
|
103
|
+
project: 'default',
|
|
104
|
+
scenariosDir: './scenarios',
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
...defaults,
|
|
109
|
+
...fileConfig,
|
|
110
|
+
...Object.fromEntries(Object.entries(cliOptions).filter(([_, v]) => v !== undefined)),
|
|
111
|
+
} as ArtemisConfig;
|
|
112
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration schema for artemis.config.yaml
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { z } from 'zod';
|
|
6
|
+
|
|
7
|
+
const ProviderConfigSchema = z.object({
|
|
8
|
+
apiKey: z.string().optional(),
|
|
9
|
+
baseUrl: z.string().optional(),
|
|
10
|
+
defaultModel: z.string().optional(),
|
|
11
|
+
timeout: z.number().optional(),
|
|
12
|
+
maxRetries: z.number().optional(),
|
|
13
|
+
// OpenAI specific
|
|
14
|
+
organization: z.string().optional(),
|
|
15
|
+
// Azure specific
|
|
16
|
+
resourceName: z.string().optional(),
|
|
17
|
+
deploymentName: z.string().optional(),
|
|
18
|
+
apiVersion: z.string().optional(),
|
|
19
|
+
// Vercel AI specific
|
|
20
|
+
underlyingProvider: z.enum(['openai', 'azure', 'anthropic', 'google', 'mistral']).optional(),
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
const StorageConfigSchema = z.object({
|
|
24
|
+
type: z.enum(['supabase', 'local']).default('local'),
|
|
25
|
+
url: z.string().optional(),
|
|
26
|
+
anonKey: z.string().optional(),
|
|
27
|
+
bucket: z.string().optional(),
|
|
28
|
+
basePath: z.string().optional(),
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
const OutputConfigSchema = z.object({
|
|
32
|
+
format: z.enum(['json', 'html', 'both']).default('json'),
|
|
33
|
+
dir: z.string().default('./artemis-output'),
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const CIConfigSchema = z.object({
|
|
37
|
+
failOnRegression: z.boolean().default(true),
|
|
38
|
+
regressionThreshold: z.number().min(0).max(1).default(0.05),
|
|
39
|
+
baselineStrategy: z.enum(['latest', 'tagged', 'specific']).default('latest'),
|
|
40
|
+
baselineRunId: z.string().optional(),
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
export const ArtemisConfigSchema = z.object({
|
|
44
|
+
project: z.string().default('default'),
|
|
45
|
+
provider: z.string().optional(),
|
|
46
|
+
model: z.string().optional(),
|
|
47
|
+
providers: z.record(ProviderConfigSchema).optional(),
|
|
48
|
+
storage: StorageConfigSchema.optional(),
|
|
49
|
+
scenariosDir: z.string().default('./scenarios'),
|
|
50
|
+
output: OutputConfigSchema.optional(),
|
|
51
|
+
ci: CIConfigSchema.optional(),
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
export type ArtemisConfig = z.infer<typeof ArtemisConfigSchema>;
|
|
55
|
+
export type ProviderConfig = z.infer<typeof ProviderConfigSchema>;
|
|
56
|
+
export type StorageConfig = z.infer<typeof StorageConfigSchema>;
|