@artemiskit/cli 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +62 -0
  2. package/artemis-runs/my-project/-sEsU7KtJ7VE.json +188 -0
  3. package/bin/artemis.ts +13 -0
  4. package/dist/bin/artemis.d.ts +6 -0
  5. package/dist/bin/artemis.d.ts.map +1 -0
  6. package/dist/index.js +51297 -0
  7. package/dist/src/adapters.d.ts +6 -0
  8. package/dist/src/adapters.d.ts.map +1 -0
  9. package/dist/src/cli.d.ts +6 -0
  10. package/dist/src/cli.d.ts.map +1 -0
  11. package/dist/src/commands/compare.d.ts +6 -0
  12. package/dist/src/commands/compare.d.ts.map +1 -0
  13. package/dist/src/commands/history.d.ts +6 -0
  14. package/dist/src/commands/history.d.ts.map +1 -0
  15. package/dist/src/commands/index.d.ts +8 -0
  16. package/dist/src/commands/index.d.ts.map +1 -0
  17. package/dist/src/commands/init.d.ts +6 -0
  18. package/dist/src/commands/init.d.ts.map +1 -0
  19. package/dist/src/commands/redteam.d.ts +6 -0
  20. package/dist/src/commands/redteam.d.ts.map +1 -0
  21. package/dist/src/commands/report.d.ts +6 -0
  22. package/dist/src/commands/report.d.ts.map +1 -0
  23. package/dist/src/commands/run.d.ts +6 -0
  24. package/dist/src/commands/run.d.ts.map +1 -0
  25. package/dist/src/commands/stress.d.ts +6 -0
  26. package/dist/src/commands/stress.d.ts.map +1 -0
  27. package/dist/src/config/index.d.ts +6 -0
  28. package/dist/src/config/index.d.ts.map +1 -0
  29. package/dist/src/config/loader.d.ts +13 -0
  30. package/dist/src/config/loader.d.ts.map +1 -0
  31. package/dist/src/config/schema.d.ts +215 -0
  32. package/dist/src/config/schema.d.ts.map +1 -0
  33. package/dist/src/index.d.ts +6 -0
  34. package/dist/src/index.d.ts.map +1 -0
  35. package/dist/src/utils/adapter.d.ts +71 -0
  36. package/dist/src/utils/adapter.d.ts.map +1 -0
  37. package/dist/src/utils/storage.d.ts +22 -0
  38. package/dist/src/utils/storage.d.ts.map +1 -0
  39. package/package.json +65 -0
  40. package/src/adapters.ts +33 -0
  41. package/src/cli.ts +34 -0
  42. package/src/commands/compare.ts +104 -0
  43. package/src/commands/history.ts +80 -0
  44. package/src/commands/index.ts +8 -0
  45. package/src/commands/init.ts +111 -0
  46. package/src/commands/redteam.ts +511 -0
  47. package/src/commands/report.ts +126 -0
  48. package/src/commands/run.ts +233 -0
  49. package/src/commands/stress.ts +501 -0
  50. package/src/config/index.ts +6 -0
  51. package/src/config/loader.ts +112 -0
  52. package/src/config/schema.ts +56 -0
  53. package/src/index.ts +6 -0
  54. package/src/utils/adapter.ts +542 -0
  55. package/src/utils/storage.ts +67 -0
  56. package/tsconfig.json +13 -0
@@ -0,0 +1,501 @@
1
+ /**
2
+ * Stress command - Run load/stress tests against an LLM
3
+ */
4
+
5
+ import { mkdir, writeFile } from 'node:fs/promises';
6
+ import { basename, join } from 'node:path';
7
+ import {
8
+ type ManifestRedactionInfo,
9
+ type RedactionConfig,
10
+ Redactor,
11
+ type StressManifest,
12
+ type StressMetrics,
13
+ type StressRequestResult,
14
+ createAdapter,
15
+ getGitInfo,
16
+ parseScenarioFile,
17
+ } from '@artemiskit/core';
18
+ import { generateJSONReport, generateStressHTMLReport } from '@artemiskit/reports';
19
+ import chalk from 'chalk';
20
+ import Table from 'cli-table3';
21
+ import { Command } from 'commander';
22
+ import { nanoid } from 'nanoid';
23
+ import ora from 'ora';
24
+ import { loadConfig } from '../config/loader';
25
+ import {
26
+ buildAdapterConfig,
27
+ resolveModelWithSource,
28
+ resolveProviderWithSource,
29
+ } from '../utils/adapter';
30
+ import { createStorage } from '../utils/storage';
31
+
32
+ interface StressOptions {
33
+ provider?: string;
34
+ model?: string;
35
+ concurrency?: number;
36
+ requests?: number;
37
+ duration?: number;
38
+ rampUp?: number;
39
+ save?: boolean;
40
+ output?: string;
41
+ verbose?: boolean;
42
+ config?: string;
43
+ redact?: boolean;
44
+ redactPatterns?: string[];
45
+ }
46
+
47
+ export function stressCommand(): Command {
48
+ const cmd = new Command('stress');
49
+
50
+ cmd
51
+ .description('Run load/stress tests against an LLM')
52
+ .argument('<scenario>', 'Path to scenario YAML file')
53
+ .option('-p, --provider <provider>', 'Provider to use')
54
+ .option('-m, --model <model>', 'Model to use')
55
+ .option('-c, --concurrency <number>', 'Number of concurrent requests', '10')
56
+ .option('-n, --requests <number>', 'Total number of requests to make')
57
+ .option('-d, --duration <seconds>', 'Duration to run the test in seconds', '30')
58
+ .option('--ramp-up <seconds>', 'Ramp-up time in seconds', '5')
59
+ .option('--save', 'Save results to storage')
60
+ .option('-o, --output <dir>', 'Output directory for reports')
61
+ .option('-v, --verbose', 'Verbose output')
62
+ .option('--config <path>', 'Path to config file')
63
+ .option('--redact', 'Enable PII/sensitive data redaction in results')
64
+ .option(
65
+ '--redact-patterns <patterns...>',
66
+ 'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
67
+ )
68
+ .action(async (scenarioPath: string, options: StressOptions) => {
69
+ const spinner = ora('Loading configuration...').start();
70
+ const startTime = new Date();
71
+
72
+ try {
73
+ // Load config file if present
74
+ const config = await loadConfig(options.config);
75
+ if (config) {
76
+ spinner.succeed('Loaded config file');
77
+ } else {
78
+ spinner.info('No config file found, using defaults');
79
+ }
80
+
81
+ // Parse scenario
82
+ spinner.start('Loading scenario...');
83
+ const scenario = await parseScenarioFile(scenarioPath);
84
+ spinner.succeed(`Loaded scenario: ${scenario.name}`);
85
+
86
+ // Resolve provider and model with precedence and source tracking:
87
+ // CLI > Scenario > Config > Default
88
+ const { provider, source: providerSource } = resolveProviderWithSource(
89
+ options.provider,
90
+ scenario.provider,
91
+ config?.provider
92
+ );
93
+ const { model, source: modelSource } = resolveModelWithSource(
94
+ options.model,
95
+ scenario.model,
96
+ config?.model
97
+ );
98
+
99
+ // Build adapter config with full precedence chain and source tracking
100
+ spinner.start(`Connecting to ${provider}...`);
101
+ const { adapterConfig, resolvedConfig } = buildAdapterConfig({
102
+ provider,
103
+ model,
104
+ providerSource,
105
+ modelSource,
106
+ scenarioConfig: scenario.providerConfig,
107
+ fileConfig: config,
108
+ });
109
+ const client = await createAdapter(adapterConfig);
110
+ spinner.succeed(`Connected to ${provider}`);
111
+
112
+ // Configuration
113
+ const concurrency = Number.parseInt(String(options.concurrency)) || 10;
114
+ const durationSec = Number.parseInt(String(options.duration)) || 30;
115
+ const rampUpSec = Number.parseInt(String(options.rampUp)) || 5;
116
+ const maxRequests = options.requests
117
+ ? Number.parseInt(String(options.requests))
118
+ : undefined;
119
+
120
+ // Set up redaction if enabled
121
+ let redactionConfig: RedactionConfig | undefined;
122
+ let redactor: Redactor | undefined;
123
+ if (options.redact) {
124
+ redactionConfig = {
125
+ enabled: true,
126
+ patterns: options.redactPatterns,
127
+ redactPrompts: true,
128
+ redactResponses: true,
129
+ redactMetadata: false,
130
+ replacement: '[REDACTED]',
131
+ };
132
+ redactor = new Redactor(redactionConfig);
133
+ }
134
+
135
+ console.log();
136
+ console.log(chalk.bold('Stress Test Configuration'));
137
+ console.log(chalk.dim(`Concurrency: ${concurrency}`));
138
+ console.log(chalk.dim(`Duration: ${durationSec}s`));
139
+ console.log(chalk.dim(`Ramp-up: ${rampUpSec}s`));
140
+ if (maxRequests) {
141
+ console.log(chalk.dim(`Max requests: ${maxRequests}`));
142
+ }
143
+ if (options.redact) {
144
+ console.log(
145
+ chalk.dim(
146
+ `Redaction: enabled${options.redactPatterns ? ` (${options.redactPatterns.join(', ')})` : ' (default patterns)'}`
147
+ )
148
+ );
149
+ }
150
+ console.log();
151
+
152
+ // Get test prompts from scenario cases
153
+ const prompts = scenario.cases.map((c) =>
154
+ typeof c.prompt === 'string' ? c.prompt : c.prompt.map((m) => m.content).join('\n')
155
+ );
156
+
157
+ if (prompts.length === 0) {
158
+ throw new Error('No test cases found in scenario');
159
+ }
160
+
161
+ // Run stress test
162
+ spinner.start('Running stress test...');
163
+ const results = await runStressTest({
164
+ client,
165
+ model,
166
+ prompts,
167
+ concurrency,
168
+ durationMs: durationSec * 1000,
169
+ rampUpMs: rampUpSec * 1000,
170
+ maxRequests,
171
+ temperature: scenario.temperature,
172
+ onProgress: (completed, active) => {
173
+ spinner.text = `Running stress test... ${completed} completed, ${active} active`;
174
+ },
175
+ verbose: options.verbose,
176
+ });
177
+
178
+ spinner.succeed('Stress test completed');
179
+ const endTime = new Date();
180
+ console.log();
181
+
182
+ // Calculate stats
183
+ const metrics = calculateMetrics(results, endTime.getTime() - startTime.getTime());
184
+
185
+ // Build redaction metadata if enabled
186
+ let redactionInfo: ManifestRedactionInfo | undefined;
187
+ if (redactor && redactionConfig?.enabled) {
188
+ redactionInfo = {
189
+ enabled: true,
190
+ patternsUsed: redactor.patternNames,
191
+ replacement: redactor.replacement,
192
+ summary: {
193
+ promptsRedacted: 0, // Stress test doesn't track individual prompts
194
+ responsesRedacted: 0,
195
+ totalRedactions: 0,
196
+ },
197
+ };
198
+ }
199
+
200
+ // Build manifest
201
+ const runId = `st_${nanoid(12)}`;
202
+ const manifest: StressManifest = {
203
+ version: '1.0',
204
+ type: 'stress',
205
+ run_id: runId,
206
+ project: config?.project || process.env.ARTEMIS_PROJECT || 'default',
207
+ start_time: startTime.toISOString(),
208
+ end_time: endTime.toISOString(),
209
+ duration_ms: endTime.getTime() - startTime.getTime(),
210
+ config: {
211
+ scenario: basename(scenarioPath, '.yaml'),
212
+ provider,
213
+ model: resolvedConfig.model,
214
+ concurrency,
215
+ duration_seconds: durationSec,
216
+ ramp_up_seconds: rampUpSec,
217
+ max_requests: maxRequests,
218
+ },
219
+ resolved_config: resolvedConfig,
220
+ metrics,
221
+ git: await getGitInfo(),
222
+ provenance: {
223
+ run_by: process.env.USER || process.env.USERNAME || 'unknown',
224
+ },
225
+ // Sample results (keep only a sample to avoid huge files)
226
+ sample_results: sampleResults(results, 100),
227
+ environment: {
228
+ node_version: process.version,
229
+ platform: process.platform,
230
+ arch: process.arch,
231
+ },
232
+ redaction: redactionInfo,
233
+ };
234
+
235
+ // Display stats
236
+ displayStats(metrics, runId);
237
+
238
+ // Display latency histogram if verbose
239
+ if (options.verbose) {
240
+ displayHistogram(results);
241
+ }
242
+
243
+ // Save results if requested
244
+ if (options.save) {
245
+ spinner.start('Saving results...');
246
+ const storage = createStorage({ fileConfig: config });
247
+ const path = await storage.save(manifest);
248
+ spinner.succeed(`Results saved: ${path}`);
249
+ }
250
+
251
+ // Generate reports if output directory specified
252
+ if (options.output) {
253
+ spinner.start('Generating reports...');
254
+ await mkdir(options.output, { recursive: true });
255
+
256
+ // HTML report
257
+ const html = generateStressHTMLReport(manifest);
258
+ const htmlPath = join(options.output, `${runId}.html`);
259
+ await writeFile(htmlPath, html);
260
+
261
+ // JSON report
262
+ const json = generateJSONReport(manifest);
263
+ const jsonPath = join(options.output, `${runId}.json`);
264
+ await writeFile(jsonPath, json);
265
+
266
+ spinner.succeed(`Reports generated: ${options.output}`);
267
+ console.log(chalk.dim(` HTML: ${htmlPath}`));
268
+ console.log(chalk.dim(` JSON: ${jsonPath}`));
269
+ }
270
+ } catch (error) {
271
+ spinner.fail('Error');
272
+ console.error(chalk.red('Error:'), (error as Error).message);
273
+ process.exit(1);
274
+ }
275
+ });
276
+
277
+ return cmd;
278
+ }
279
+
280
+ interface StressTestOptions {
281
+ client: {
282
+ generate: (req: { prompt: string; model?: string; temperature?: number }) => Promise<{
283
+ text: string;
284
+ }>;
285
+ };
286
+ model?: string;
287
+ prompts: string[];
288
+ concurrency: number;
289
+ durationMs: number;
290
+ rampUpMs: number;
291
+ maxRequests?: number;
292
+ temperature?: number;
293
+ onProgress?: (completed: number, active: number) => void;
294
+ verbose?: boolean;
295
+ }
296
+
297
+ async function runStressTest(options: StressTestOptions): Promise<StressRequestResult[]> {
298
+ const {
299
+ client,
300
+ model,
301
+ prompts,
302
+ concurrency,
303
+ durationMs,
304
+ rampUpMs,
305
+ maxRequests,
306
+ temperature,
307
+ onProgress,
308
+ } = options;
309
+
310
+ const results: StressRequestResult[] = [];
311
+ const startTime = Date.now();
312
+ const endTime = startTime + durationMs;
313
+ let completed = 0;
314
+ let active = 0;
315
+ let promptIndex = 0;
316
+
317
+ const makeRequest = async (): Promise<void> => {
318
+ const prompt = prompts[promptIndex % prompts.length];
319
+ promptIndex++;
320
+
321
+ const requestStart = Date.now();
322
+ active++;
323
+
324
+ try {
325
+ await client.generate({
326
+ prompt,
327
+ model,
328
+ temperature,
329
+ });
330
+
331
+ results.push({
332
+ success: true,
333
+ latencyMs: Date.now() - requestStart,
334
+ timestamp: requestStart,
335
+ });
336
+ } catch (error) {
337
+ results.push({
338
+ success: false,
339
+ latencyMs: Date.now() - requestStart,
340
+ error: (error as Error).message,
341
+ timestamp: requestStart,
342
+ });
343
+ } finally {
344
+ active--;
345
+ completed++;
346
+ onProgress?.(completed, active);
347
+ }
348
+ };
349
+
350
+ // Calculate target concurrency based on ramp-up
351
+ const getTargetConcurrency = (elapsed: number): number => {
352
+ if (elapsed >= rampUpMs) return concurrency;
353
+ return Math.ceil((elapsed / rampUpMs) * concurrency);
354
+ };
355
+
356
+ // Main loop
357
+ const promises: Promise<void>[] = [];
358
+
359
+ while (Date.now() < endTime) {
360
+ if (maxRequests && completed >= maxRequests) break;
361
+
362
+ const elapsed = Date.now() - startTime;
363
+ const targetConcurrency = getTargetConcurrency(elapsed);
364
+
365
+ // Launch new requests if below target
366
+ while (active < targetConcurrency && Date.now() < endTime) {
367
+ if (maxRequests && completed + active >= maxRequests) break;
368
+ promises.push(makeRequest());
369
+ }
370
+
371
+ // Small delay to prevent tight loop
372
+ await new Promise((resolve) => setTimeout(resolve, 10));
373
+ }
374
+
375
+ // Wait for all pending requests
376
+ await Promise.all(promises);
377
+
378
+ return results;
379
+ }
380
+
381
+ function calculateMetrics(results: StressRequestResult[], durationMs: number): StressMetrics {
382
+ const successful = results.filter((r) => r.success);
383
+ const latencies = successful.map((r) => r.latencyMs).sort((a, b) => a - b);
384
+
385
+ const totalRequests = results.length;
386
+ const successfulRequests = successful.length;
387
+ const failedRequests = totalRequests - successfulRequests;
388
+
389
+ const minLatency = latencies[0] || 0;
390
+ const maxLatency = latencies[latencies.length - 1] || 0;
391
+ const avgLatency =
392
+ latencies.length > 0 ? latencies.reduce((sum, l) => sum + l, 0) / latencies.length : 0;
393
+
394
+ const requestsPerSecond = durationMs > 0 ? (totalRequests / durationMs) * 1000 : 0;
395
+ const successRate = totalRequests > 0 ? successfulRequests / totalRequests : 0;
396
+
397
+ return {
398
+ total_requests: totalRequests,
399
+ successful_requests: successfulRequests,
400
+ failed_requests: failedRequests,
401
+ success_rate: successRate,
402
+ requests_per_second: requestsPerSecond,
403
+ min_latency_ms: minLatency,
404
+ max_latency_ms: maxLatency,
405
+ avg_latency_ms: Math.round(avgLatency),
406
+ p50_latency_ms: percentile(latencies, 50),
407
+ p90_latency_ms: percentile(latencies, 90),
408
+ p95_latency_ms: percentile(latencies, 95),
409
+ p99_latency_ms: percentile(latencies, 99),
410
+ };
411
+ }
412
+
413
+ function percentile(sorted: number[], p: number): number {
414
+ if (sorted.length === 0) return 0;
415
+ const index = Math.ceil((p / 100) * sorted.length) - 1;
416
+ return sorted[Math.max(0, Math.min(index, sorted.length - 1))];
417
+ }
418
+
419
+ function sampleResults(results: StressRequestResult[], maxSamples: number): StressRequestResult[] {
420
+ if (results.length <= maxSamples) return results;
421
+
422
+ // Sample evenly across the results
423
+ const step = Math.floor(results.length / maxSamples);
424
+ const sampled: StressRequestResult[] = [];
425
+ for (let i = 0; i < results.length && sampled.length < maxSamples; i += step) {
426
+ sampled.push(results[i]);
427
+ }
428
+ return sampled;
429
+ }
430
+
431
+ function displayStats(metrics: StressMetrics, runId: string): void {
432
+ const table = new Table({
433
+ head: [chalk.bold('Metric'), chalk.bold('Value')],
434
+ style: { head: [], border: [] },
435
+ });
436
+
437
+ table.push(
438
+ ['Run ID', runId],
439
+ ['Total Requests', metrics.total_requests.toString()],
440
+ ['Successful', chalk.green(metrics.successful_requests.toString())],
441
+ ['Failed', metrics.failed_requests > 0 ? chalk.red(metrics.failed_requests.toString()) : '0'],
442
+ ['', ''],
443
+ ['Requests/sec', metrics.requests_per_second.toFixed(2)],
444
+ ['', ''],
445
+ ['Min Latency', `${metrics.min_latency_ms}ms`],
446
+ ['Max Latency', `${metrics.max_latency_ms}ms`],
447
+ ['Avg Latency', `${metrics.avg_latency_ms}ms`],
448
+ ['p50 Latency', `${metrics.p50_latency_ms}ms`],
449
+ ['p90 Latency', `${metrics.p90_latency_ms}ms`],
450
+ ['p95 Latency', `${metrics.p95_latency_ms}ms`],
451
+ ['p99 Latency', `${metrics.p99_latency_ms}ms`]
452
+ );
453
+
454
+ console.log(chalk.bold('Results'));
455
+ console.log(table.toString());
456
+
457
+ // Success rate
458
+ const successRate = metrics.success_rate * 100;
459
+
460
+ console.log();
461
+ if (successRate >= 99) {
462
+ console.log(chalk.green(`✓ Success rate: ${successRate.toFixed(2)}%`));
463
+ } else if (successRate >= 95) {
464
+ console.log(chalk.yellow(`⚠ Success rate: ${successRate.toFixed(2)}%`));
465
+ } else {
466
+ console.log(chalk.red(`✗ Success rate: ${successRate.toFixed(2)}%`));
467
+ }
468
+ }
469
+
470
+ function displayHistogram(results: StressRequestResult[]): void {
471
+ const successful = results.filter((r) => r.success);
472
+ if (successful.length === 0) return;
473
+
474
+ const latencies = successful.map((r) => r.latencyMs);
475
+ const maxLatency = Math.max(...latencies);
476
+ const bucketSize = Math.ceil(maxLatency / 10);
477
+ const buckets = new Array(10).fill(0);
478
+
479
+ for (const latency of latencies) {
480
+ const bucket = Math.min(Math.floor(latency / bucketSize), 9);
481
+ buckets[bucket]++;
482
+ }
483
+
484
+ const maxCount = Math.max(...buckets);
485
+
486
+ console.log();
487
+ console.log(chalk.bold('Latency Distribution'));
488
+ console.log();
489
+
490
+ for (let i = 0; i < 10; i++) {
491
+ const rangeStart = i * bucketSize;
492
+ const rangeEnd = (i + 1) * bucketSize;
493
+ const count = buckets[i];
494
+ const barLength = maxCount > 0 ? Math.round((count / maxCount) * 30) : 0;
495
+ const bar = '█'.repeat(barLength);
496
+
497
+ console.log(
498
+ `${chalk.dim(`${rangeStart.toString().padStart(5)}-${rangeEnd.toString().padStart(5)}ms`)} │ ${chalk.cyan(bar)} ${count}`
499
+ );
500
+ }
501
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Config module exports
3
+ */
4
+
5
+ export * from './schema';
6
+ export * from './loader';
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Configuration file loader
3
+ */
4
+
5
+ import { existsSync } from 'node:fs';
6
+ import { readFile } from 'node:fs/promises';
7
+ import { join, resolve } from 'node:path';
8
+ import { parse as parseYaml } from 'yaml';
9
+ import { type ArtemisConfig, ArtemisConfigSchema } from './schema';
10
+
11
+ const CONFIG_FILENAMES = ['artemis.config.yaml', 'artemis.config.yml', 'artemis.yaml'];
12
+
13
+ /**
14
+ * Find and load the configuration file
15
+ */
16
+ export async function loadConfig(configPath?: string): Promise<ArtemisConfig | null> {
17
+ const path = configPath || findConfigFile();
18
+
19
+ if (!path) {
20
+ return null;
21
+ }
22
+
23
+ try {
24
+ const content = await readFile(path, 'utf-8');
25
+ const raw = parseYaml(content);
26
+
27
+ // Expand environment variables
28
+ const expanded = expandEnvVars(raw);
29
+
30
+ const result = ArtemisConfigSchema.safeParse(expanded);
31
+
32
+ if (!result.success) {
33
+ const issues = result.error.issues
34
+ .map((i) => ` - ${i.path.join('.')}: ${i.message}`)
35
+ .join('\n');
36
+ throw new Error(`Invalid config file ${path}:\n${issues}`);
37
+ }
38
+
39
+ return result.data;
40
+ } catch (error) {
41
+ if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
42
+ return null;
43
+ }
44
+ throw error;
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Find config file in current directory or parents
50
+ */
51
+ function findConfigFile(): string | null {
52
+ let dir = process.cwd();
53
+ const root = resolve('/');
54
+
55
+ while (dir !== root) {
56
+ for (const filename of CONFIG_FILENAMES) {
57
+ const path = join(dir, filename);
58
+ if (existsSync(path)) {
59
+ return path;
60
+ }
61
+ }
62
+ dir = resolve(dir, '..');
63
+ }
64
+
65
+ return null;
66
+ }
67
+
68
+ /**
69
+ * Expand environment variables in config values
70
+ * Supports ${VAR} and ${VAR:-default} syntax
71
+ */
72
+ function expandEnvVars(obj: unknown): unknown {
73
+ if (typeof obj === 'string') {
74
+ return obj.replace(/\$\{([^}]+)\}/g, (_, expr) => {
75
+ const [varName, defaultValue] = expr.split(':-');
76
+ return process.env[varName] || defaultValue || '';
77
+ });
78
+ }
79
+
80
+ if (Array.isArray(obj)) {
81
+ return obj.map(expandEnvVars);
82
+ }
83
+
84
+ if (obj && typeof obj === 'object') {
85
+ const result: Record<string, unknown> = {};
86
+ for (const [key, value] of Object.entries(obj)) {
87
+ result[key] = expandEnvVars(value);
88
+ }
89
+ return result;
90
+ }
91
+
92
+ return obj;
93
+ }
94
+
95
+ /**
96
+ * Get a merged config with CLI options taking precedence
97
+ */
98
+ export function mergeConfig(
99
+ fileConfig: ArtemisConfig | null,
100
+ cliOptions: Partial<ArtemisConfig>
101
+ ): ArtemisConfig {
102
+ const defaults: ArtemisConfig = {
103
+ project: 'default',
104
+ scenariosDir: './scenarios',
105
+ };
106
+
107
+ return {
108
+ ...defaults,
109
+ ...fileConfig,
110
+ ...Object.fromEntries(Object.entries(cliOptions).filter(([_, v]) => v !== undefined)),
111
+ } as ArtemisConfig;
112
+ }
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Configuration schema for artemis.config.yaml
3
+ */
4
+
5
+ import { z } from 'zod';
6
+
7
+ const ProviderConfigSchema = z.object({
8
+ apiKey: z.string().optional(),
9
+ baseUrl: z.string().optional(),
10
+ defaultModel: z.string().optional(),
11
+ timeout: z.number().optional(),
12
+ maxRetries: z.number().optional(),
13
+ // OpenAI specific
14
+ organization: z.string().optional(),
15
+ // Azure specific
16
+ resourceName: z.string().optional(),
17
+ deploymentName: z.string().optional(),
18
+ apiVersion: z.string().optional(),
19
+ // Vercel AI specific
20
+ underlyingProvider: z.enum(['openai', 'azure', 'anthropic', 'google', 'mistral']).optional(),
21
+ });
22
+
23
+ const StorageConfigSchema = z.object({
24
+ type: z.enum(['supabase', 'local']).default('local'),
25
+ url: z.string().optional(),
26
+ anonKey: z.string().optional(),
27
+ bucket: z.string().optional(),
28
+ basePath: z.string().optional(),
29
+ });
30
+
31
+ const OutputConfigSchema = z.object({
32
+ format: z.enum(['json', 'html', 'both']).default('json'),
33
+ dir: z.string().default('./artemis-output'),
34
+ });
35
+
36
+ const CIConfigSchema = z.object({
37
+ failOnRegression: z.boolean().default(true),
38
+ regressionThreshold: z.number().min(0).max(1).default(0.05),
39
+ baselineStrategy: z.enum(['latest', 'tagged', 'specific']).default('latest'),
40
+ baselineRunId: z.string().optional(),
41
+ });
42
+
43
+ export const ArtemisConfigSchema = z.object({
44
+ project: z.string().default('default'),
45
+ provider: z.string().optional(),
46
+ model: z.string().optional(),
47
+ providers: z.record(ProviderConfigSchema).optional(),
48
+ storage: StorageConfigSchema.optional(),
49
+ scenariosDir: z.string().default('./scenarios'),
50
+ output: OutputConfigSchema.optional(),
51
+ ci: CIConfigSchema.optional(),
52
+ });
53
+
54
+ export type ArtemisConfig = z.infer<typeof ArtemisConfigSchema>;
55
+ export type ProviderConfig = z.infer<typeof ProviderConfigSchema>;
56
+ export type StorageConfig = z.infer<typeof StorageConfigSchema>;
package/src/index.ts ADDED
@@ -0,0 +1,6 @@
1
+ /**
2
+ * @artemiskit/cli
3
+ * Command-line interface for Artemis Agent Reliability Toolkit
4
+ */
5
+
6
+ export { createCLI } from './cli';