@artemiskit/cli 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +62 -0
  2. package/artemis-runs/my-project/-sEsU7KtJ7VE.json +188 -0
  3. package/bin/artemis.ts +13 -0
  4. package/dist/bin/artemis.d.ts +6 -0
  5. package/dist/bin/artemis.d.ts.map +1 -0
  6. package/dist/index.js +51297 -0
  7. package/dist/src/adapters.d.ts +6 -0
  8. package/dist/src/adapters.d.ts.map +1 -0
  9. package/dist/src/cli.d.ts +6 -0
  10. package/dist/src/cli.d.ts.map +1 -0
  11. package/dist/src/commands/compare.d.ts +6 -0
  12. package/dist/src/commands/compare.d.ts.map +1 -0
  13. package/dist/src/commands/history.d.ts +6 -0
  14. package/dist/src/commands/history.d.ts.map +1 -0
  15. package/dist/src/commands/index.d.ts +8 -0
  16. package/dist/src/commands/index.d.ts.map +1 -0
  17. package/dist/src/commands/init.d.ts +6 -0
  18. package/dist/src/commands/init.d.ts.map +1 -0
  19. package/dist/src/commands/redteam.d.ts +6 -0
  20. package/dist/src/commands/redteam.d.ts.map +1 -0
  21. package/dist/src/commands/report.d.ts +6 -0
  22. package/dist/src/commands/report.d.ts.map +1 -0
  23. package/dist/src/commands/run.d.ts +6 -0
  24. package/dist/src/commands/run.d.ts.map +1 -0
  25. package/dist/src/commands/stress.d.ts +6 -0
  26. package/dist/src/commands/stress.d.ts.map +1 -0
  27. package/dist/src/config/index.d.ts +6 -0
  28. package/dist/src/config/index.d.ts.map +1 -0
  29. package/dist/src/config/loader.d.ts +13 -0
  30. package/dist/src/config/loader.d.ts.map +1 -0
  31. package/dist/src/config/schema.d.ts +215 -0
  32. package/dist/src/config/schema.d.ts.map +1 -0
  33. package/dist/src/index.d.ts +6 -0
  34. package/dist/src/index.d.ts.map +1 -0
  35. package/dist/src/utils/adapter.d.ts +71 -0
  36. package/dist/src/utils/adapter.d.ts.map +1 -0
  37. package/dist/src/utils/storage.d.ts +22 -0
  38. package/dist/src/utils/storage.d.ts.map +1 -0
  39. package/package.json +65 -0
  40. package/src/adapters.ts +33 -0
  41. package/src/cli.ts +34 -0
  42. package/src/commands/compare.ts +104 -0
  43. package/src/commands/history.ts +80 -0
  44. package/src/commands/index.ts +8 -0
  45. package/src/commands/init.ts +111 -0
  46. package/src/commands/redteam.ts +511 -0
  47. package/src/commands/report.ts +126 -0
  48. package/src/commands/run.ts +233 -0
  49. package/src/commands/stress.ts +501 -0
  50. package/src/config/index.ts +6 -0
  51. package/src/config/loader.ts +112 -0
  52. package/src/config/schema.ts +56 -0
  53. package/src/index.ts +6 -0
  54. package/src/utils/adapter.ts +542 -0
  55. package/src/utils/storage.ts +67 -0
  56. package/tsconfig.json +13 -0
@@ -0,0 +1,104 @@
1
+ /**
2
+ * Compare command - Compare two test runs
3
+ */
4
+
5
+ import chalk from 'chalk';
6
+ import Table from 'cli-table3';
7
+ import { Command } from 'commander';
8
+ import { loadConfig } from '../config/loader';
9
+ import { createStorage } from '../utils/storage';
10
+
11
+ interface CompareOptions {
12
+ threshold?: number;
13
+ config?: string;
14
+ }
15
+
16
+ export function compareCommand(): Command {
17
+ const cmd = new Command('compare');
18
+
19
+ cmd
20
+ .description('Compare two test runs')
21
+ .argument('<baseline>', 'Baseline run ID')
22
+ .argument('<current>', 'Current run ID')
23
+ .option('--threshold <number>', 'Regression threshold (0-1)', '0.05')
24
+ .option('--config <path>', 'Path to config file')
25
+ .action(async (baselineId: string, currentId: string, options: CompareOptions) => {
26
+ try {
27
+ const config = await loadConfig(options.config);
28
+ const storage = createStorage({ fileConfig: config });
29
+
30
+ if (!storage.compare) {
31
+ console.error(chalk.red('Storage adapter does not support comparison'));
32
+ process.exit(1);
33
+ }
34
+
35
+ console.log(chalk.bold('Comparing runs...'));
36
+ console.log();
37
+
38
+ const comparison = await storage.compare(baselineId, currentId);
39
+ const { baseline, current, delta } = comparison;
40
+
41
+ // Summary table
42
+ const summaryTable = new Table({
43
+ head: [
44
+ chalk.bold('Metric'),
45
+ chalk.bold('Baseline'),
46
+ chalk.bold('Current'),
47
+ chalk.bold('Delta'),
48
+ ],
49
+ style: { head: [], border: [] },
50
+ });
51
+
52
+ const formatDelta = (value: number, inverse = false) => {
53
+ const improved = inverse ? value < 0 : value > 0;
54
+ const color = improved ? chalk.green : value === 0 ? chalk.dim : chalk.red;
55
+ const sign = value > 0 ? '+' : '';
56
+ return color(`${sign}${value.toFixed(2)}`);
57
+ };
58
+
59
+ summaryTable.push(
60
+ [
61
+ 'Success Rate',
62
+ `${(baseline.metrics.success_rate * 100).toFixed(1)}%`,
63
+ `${(current.metrics.success_rate * 100).toFixed(1)}%`,
64
+ `${formatDelta(delta.successRate * 100)}%`,
65
+ ],
66
+ [
67
+ 'Median Latency',
68
+ `${baseline.metrics.median_latency_ms}ms`,
69
+ `${current.metrics.median_latency_ms}ms`,
70
+ `${formatDelta(delta.latency, true)}ms`,
71
+ ],
72
+ [
73
+ 'Total Tokens',
74
+ baseline.metrics.total_tokens.toLocaleString(),
75
+ current.metrics.total_tokens.toLocaleString(),
76
+ formatDelta(delta.tokens, true),
77
+ ]
78
+ );
79
+
80
+ console.log(summaryTable.toString());
81
+ console.log();
82
+
83
+ // Check for regression
84
+ const threshold = Number.parseFloat(String(options.threshold)) || 0.05;
85
+ const hasRegression = delta.successRate < -threshold;
86
+
87
+ if (hasRegression) {
88
+ console.log(
89
+ chalk.red('⚠ Regression detected!'),
90
+ `Success rate dropped by ${Math.abs(delta.successRate * 100).toFixed(1)}%`,
91
+ `(threshold: ${threshold * 100}%)`
92
+ );
93
+ process.exit(1);
94
+ } else {
95
+ console.log(chalk.green('✓ No regression detected'));
96
+ }
97
+ } catch (error) {
98
+ console.error(chalk.red('Error:'), (error as Error).message);
99
+ process.exit(1);
100
+ }
101
+ });
102
+
103
+ return cmd;
104
+ }
@@ -0,0 +1,80 @@
1
+ /**
2
+ * History command - View run history
3
+ */
4
+
5
+ import chalk from 'chalk';
6
+ import Table from 'cli-table3';
7
+ import { Command } from 'commander';
8
+ import { loadConfig } from '../config/loader';
9
+ import { createStorage } from '../utils/storage';
10
+
11
+ interface HistoryOptions {
12
+ project?: string;
13
+ scenario?: string;
14
+ limit?: number;
15
+ config?: string;
16
+ }
17
+
18
+ export function historyCommand(): Command {
19
+ const cmd = new Command('history');
20
+
21
+ cmd
22
+ .description('View run history')
23
+ .option('-p, --project <project>', 'Filter by project')
24
+ .option('-s, --scenario <scenario>', 'Filter by scenario')
25
+ .option('-l, --limit <number>', 'Limit number of results', '20')
26
+ .option('--config <path>', 'Path to config file')
27
+ .action(async (options: HistoryOptions) => {
28
+ try {
29
+ const config = await loadConfig(options.config);
30
+ const storage = createStorage({ fileConfig: config });
31
+ const limit = Number.parseInt(String(options.limit)) || 20;
32
+
33
+ const runs = await storage.list({
34
+ project: options.project,
35
+ scenario: options.scenario,
36
+ limit,
37
+ });
38
+
39
+ if (runs.length === 0) {
40
+ console.log(chalk.dim('No runs found.'));
41
+ return;
42
+ }
43
+
44
+ const table = new Table({
45
+ head: [
46
+ chalk.bold('Run ID'),
47
+ chalk.bold('Scenario'),
48
+ chalk.bold('Success Rate'),
49
+ chalk.bold('Date'),
50
+ ],
51
+ style: { head: [], border: [] },
52
+ });
53
+
54
+ for (const run of runs) {
55
+ const successColor =
56
+ run.successRate >= 0.9
57
+ ? chalk.green
58
+ : run.successRate >= 0.7
59
+ ? chalk.yellow
60
+ : chalk.red;
61
+
62
+ table.push([
63
+ run.runId,
64
+ run.scenario,
65
+ successColor(`${(run.successRate * 100).toFixed(1)}%`),
66
+ new Date(run.createdAt).toLocaleString(),
67
+ ]);
68
+ }
69
+
70
+ console.log(table.toString());
71
+ console.log();
72
+ console.log(chalk.dim(`Showing ${runs.length} runs`));
73
+ } catch (error) {
74
+ console.error(chalk.red('Error:'), (error as Error).message);
75
+ process.exit(1);
76
+ }
77
+ });
78
+
79
+ return cmd;
80
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * CLI commands exports
3
+ */
4
+
5
+ export { initCommand } from './init';
6
+ export { runCommand } from './run';
7
+ export { compareCommand } from './compare';
8
+ export { historyCommand } from './history';
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Init command - Initialize ArtemisKit in a project
3
+ */
4
+
5
+ import { mkdir, writeFile } from 'node:fs/promises';
6
+ import { join } from 'node:path';
7
+ import chalk from 'chalk';
8
+ import { Command } from 'commander';
9
+
10
+ const DEFAULT_CONFIG = `# ArtemisKit Configuration
11
+ project: my-project
12
+
13
+ # Default provider settings
14
+ provider: openai
15
+ model: gpt-4
16
+
17
+ # Provider configurations
18
+ providers:
19
+ openai:
20
+ apiKey: \${OPENAI_API_KEY}
21
+ defaultModel: gpt-4
22
+
23
+ azure-openai:
24
+ apiKey: \${AZURE_OPENAI_API_KEY}
25
+ resourceName: \${AZURE_OPENAI_RESOURCE}
26
+ deploymentName: \${AZURE_OPENAI_DEPLOYMENT}
27
+ apiVersion: "2024-02-15-preview"
28
+
29
+ # Storage configuration
30
+ storage:
31
+ type: local
32
+ basePath: ./artemis-runs
33
+
34
+ # Scenarios directory
35
+ scenariosDir: ./scenarios
36
+
37
+ # Output settings
38
+ output:
39
+ format: json
40
+ dir: ./artemis-output
41
+ `;
42
+
43
+ const DEFAULT_SCENARIO = `name: Example Scenario
44
+ description: Basic example scenario for testing
45
+ version: "1.0"
46
+ provider: openai
47
+ model: gpt-4
48
+ temperature: 0
49
+
50
+ cases:
51
+ - id: greeting
52
+ name: Simple Greeting
53
+ prompt: "Say hello in exactly 3 words."
54
+ expected:
55
+ type: regex
56
+ pattern: "^\\\\w+\\\\s+\\\\w+\\\\s+\\\\w+$"
57
+ tags:
58
+ - greeting
59
+ - basic
60
+
61
+ - id: math
62
+ name: Basic Math
63
+ prompt: "What is 2 + 2? Reply with just the number."
64
+ expected:
65
+ type: exact
66
+ value: "4"
67
+ tags:
68
+ - math
69
+ - basic
70
+ `;
71
+
72
+ export function initCommand(): Command {
73
+ const cmd = new Command('init');
74
+
75
+ cmd
76
+ .description('Initialize ArtemisKit in the current directory')
77
+ .option('-f, --force', 'Overwrite existing configuration')
78
+ .action(async () => {
79
+ try {
80
+ const cwd = process.cwd();
81
+
82
+ // Create directories
83
+ await mkdir(join(cwd, 'scenarios'), { recursive: true });
84
+ await mkdir(join(cwd, 'artemis-runs'), { recursive: true });
85
+ await mkdir(join(cwd, 'artemis-output'), { recursive: true });
86
+
87
+ // Write config file
88
+ const configPath = join(cwd, 'artemis.config.yaml');
89
+ await writeFile(configPath, DEFAULT_CONFIG);
90
+ console.log(chalk.green('✓'), 'Created artemis.config.yaml');
91
+
92
+ // Write example scenario
93
+ const scenarioPath = join(cwd, 'scenarios', 'example.yaml');
94
+ await writeFile(scenarioPath, DEFAULT_SCENARIO);
95
+ console.log(chalk.green('✓'), 'Created scenarios/example.yaml');
96
+
97
+ console.log();
98
+ console.log(chalk.bold('ArtemisKit initialized successfully!'));
99
+ console.log();
100
+ console.log('Next steps:');
101
+ console.log(' 1. Configure your API keys in .env or environment variables');
102
+ console.log(' 2. Edit scenarios/example.yaml to add your test cases');
103
+ console.log(' 3. Run tests with: artemiskit run scenarios/example.yaml');
104
+ } catch (error) {
105
+ console.error(chalk.red('Error:'), (error as Error).message);
106
+ process.exit(1);
107
+ }
108
+ });
109
+
110
+ return cmd;
111
+ }