@artemiskit/cli 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -0
- package/artemis-runs/my-project/-sEsU7KtJ7VE.json +188 -0
- package/bin/artemis.ts +13 -0
- package/dist/bin/artemis.d.ts +6 -0
- package/dist/bin/artemis.d.ts.map +1 -0
- package/dist/index.js +51297 -0
- package/dist/src/adapters.d.ts +6 -0
- package/dist/src/adapters.d.ts.map +1 -0
- package/dist/src/cli.d.ts +6 -0
- package/dist/src/cli.d.ts.map +1 -0
- package/dist/src/commands/compare.d.ts +6 -0
- package/dist/src/commands/compare.d.ts.map +1 -0
- package/dist/src/commands/history.d.ts +6 -0
- package/dist/src/commands/history.d.ts.map +1 -0
- package/dist/src/commands/index.d.ts +8 -0
- package/dist/src/commands/index.d.ts.map +1 -0
- package/dist/src/commands/init.d.ts +6 -0
- package/dist/src/commands/init.d.ts.map +1 -0
- package/dist/src/commands/redteam.d.ts +6 -0
- package/dist/src/commands/redteam.d.ts.map +1 -0
- package/dist/src/commands/report.d.ts +6 -0
- package/dist/src/commands/report.d.ts.map +1 -0
- package/dist/src/commands/run.d.ts +6 -0
- package/dist/src/commands/run.d.ts.map +1 -0
- package/dist/src/commands/stress.d.ts +6 -0
- package/dist/src/commands/stress.d.ts.map +1 -0
- package/dist/src/config/index.d.ts +6 -0
- package/dist/src/config/index.d.ts.map +1 -0
- package/dist/src/config/loader.d.ts +13 -0
- package/dist/src/config/loader.d.ts.map +1 -0
- package/dist/src/config/schema.d.ts +215 -0
- package/dist/src/config/schema.d.ts.map +1 -0
- package/dist/src/index.d.ts +6 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/utils/adapter.d.ts +71 -0
- package/dist/src/utils/adapter.d.ts.map +1 -0
- package/dist/src/utils/storage.d.ts +22 -0
- package/dist/src/utils/storage.d.ts.map +1 -0
- package/package.json +65 -0
- package/src/adapters.ts +33 -0
- package/src/cli.ts +34 -0
- package/src/commands/compare.ts +104 -0
- package/src/commands/history.ts +80 -0
- package/src/commands/index.ts +8 -0
- package/src/commands/init.ts +111 -0
- package/src/commands/redteam.ts +511 -0
- package/src/commands/report.ts +126 -0
- package/src/commands/run.ts +233 -0
- package/src/commands/stress.ts +501 -0
- package/src/config/index.ts +6 -0
- package/src/config/loader.ts +112 -0
- package/src/config/schema.ts +56 -0
- package/src/index.ts +6 -0
- package/src/utils/adapter.ts +542 -0
- package/src/utils/storage.ts +67 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compare command - Compare two test runs
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import chalk from 'chalk';
|
|
6
|
+
import Table from 'cli-table3';
|
|
7
|
+
import { Command } from 'commander';
|
|
8
|
+
import { loadConfig } from '../config/loader';
|
|
9
|
+
import { createStorage } from '../utils/storage';
|
|
10
|
+
|
|
11
|
+
interface CompareOptions {
|
|
12
|
+
threshold?: number;
|
|
13
|
+
config?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function compareCommand(): Command {
|
|
17
|
+
const cmd = new Command('compare');
|
|
18
|
+
|
|
19
|
+
cmd
|
|
20
|
+
.description('Compare two test runs')
|
|
21
|
+
.argument('<baseline>', 'Baseline run ID')
|
|
22
|
+
.argument('<current>', 'Current run ID')
|
|
23
|
+
.option('--threshold <number>', 'Regression threshold (0-1)', '0.05')
|
|
24
|
+
.option('--config <path>', 'Path to config file')
|
|
25
|
+
.action(async (baselineId: string, currentId: string, options: CompareOptions) => {
|
|
26
|
+
try {
|
|
27
|
+
const config = await loadConfig(options.config);
|
|
28
|
+
const storage = createStorage({ fileConfig: config });
|
|
29
|
+
|
|
30
|
+
if (!storage.compare) {
|
|
31
|
+
console.error(chalk.red('Storage adapter does not support comparison'));
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
console.log(chalk.bold('Comparing runs...'));
|
|
36
|
+
console.log();
|
|
37
|
+
|
|
38
|
+
const comparison = await storage.compare(baselineId, currentId);
|
|
39
|
+
const { baseline, current, delta } = comparison;
|
|
40
|
+
|
|
41
|
+
// Summary table
|
|
42
|
+
const summaryTable = new Table({
|
|
43
|
+
head: [
|
|
44
|
+
chalk.bold('Metric'),
|
|
45
|
+
chalk.bold('Baseline'),
|
|
46
|
+
chalk.bold('Current'),
|
|
47
|
+
chalk.bold('Delta'),
|
|
48
|
+
],
|
|
49
|
+
style: { head: [], border: [] },
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
const formatDelta = (value: number, inverse = false) => {
|
|
53
|
+
const improved = inverse ? value < 0 : value > 0;
|
|
54
|
+
const color = improved ? chalk.green : value === 0 ? chalk.dim : chalk.red;
|
|
55
|
+
const sign = value > 0 ? '+' : '';
|
|
56
|
+
return color(`${sign}${value.toFixed(2)}`);
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
summaryTable.push(
|
|
60
|
+
[
|
|
61
|
+
'Success Rate',
|
|
62
|
+
`${(baseline.metrics.success_rate * 100).toFixed(1)}%`,
|
|
63
|
+
`${(current.metrics.success_rate * 100).toFixed(1)}%`,
|
|
64
|
+
`${formatDelta(delta.successRate * 100)}%`,
|
|
65
|
+
],
|
|
66
|
+
[
|
|
67
|
+
'Median Latency',
|
|
68
|
+
`${baseline.metrics.median_latency_ms}ms`,
|
|
69
|
+
`${current.metrics.median_latency_ms}ms`,
|
|
70
|
+
`${formatDelta(delta.latency, true)}ms`,
|
|
71
|
+
],
|
|
72
|
+
[
|
|
73
|
+
'Total Tokens',
|
|
74
|
+
baseline.metrics.total_tokens.toLocaleString(),
|
|
75
|
+
current.metrics.total_tokens.toLocaleString(),
|
|
76
|
+
formatDelta(delta.tokens, true),
|
|
77
|
+
]
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
console.log(summaryTable.toString());
|
|
81
|
+
console.log();
|
|
82
|
+
|
|
83
|
+
// Check for regression
|
|
84
|
+
const threshold = Number.parseFloat(String(options.threshold)) || 0.05;
|
|
85
|
+
const hasRegression = delta.successRate < -threshold;
|
|
86
|
+
|
|
87
|
+
if (hasRegression) {
|
|
88
|
+
console.log(
|
|
89
|
+
chalk.red('⚠ Regression detected!'),
|
|
90
|
+
`Success rate dropped by ${Math.abs(delta.successRate * 100).toFixed(1)}%`,
|
|
91
|
+
`(threshold: ${threshold * 100}%)`
|
|
92
|
+
);
|
|
93
|
+
process.exit(1);
|
|
94
|
+
} else {
|
|
95
|
+
console.log(chalk.green('✓ No regression detected'));
|
|
96
|
+
}
|
|
97
|
+
} catch (error) {
|
|
98
|
+
console.error(chalk.red('Error:'), (error as Error).message);
|
|
99
|
+
process.exit(1);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
return cmd;
|
|
104
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* History command - View run history
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import chalk from 'chalk';
|
|
6
|
+
import Table from 'cli-table3';
|
|
7
|
+
import { Command } from 'commander';
|
|
8
|
+
import { loadConfig } from '../config/loader';
|
|
9
|
+
import { createStorage } from '../utils/storage';
|
|
10
|
+
|
|
11
|
+
interface HistoryOptions {
|
|
12
|
+
project?: string;
|
|
13
|
+
scenario?: string;
|
|
14
|
+
limit?: number;
|
|
15
|
+
config?: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function historyCommand(): Command {
|
|
19
|
+
const cmd = new Command('history');
|
|
20
|
+
|
|
21
|
+
cmd
|
|
22
|
+
.description('View run history')
|
|
23
|
+
.option('-p, --project <project>', 'Filter by project')
|
|
24
|
+
.option('-s, --scenario <scenario>', 'Filter by scenario')
|
|
25
|
+
.option('-l, --limit <number>', 'Limit number of results', '20')
|
|
26
|
+
.option('--config <path>', 'Path to config file')
|
|
27
|
+
.action(async (options: HistoryOptions) => {
|
|
28
|
+
try {
|
|
29
|
+
const config = await loadConfig(options.config);
|
|
30
|
+
const storage = createStorage({ fileConfig: config });
|
|
31
|
+
const limit = Number.parseInt(String(options.limit)) || 20;
|
|
32
|
+
|
|
33
|
+
const runs = await storage.list({
|
|
34
|
+
project: options.project,
|
|
35
|
+
scenario: options.scenario,
|
|
36
|
+
limit,
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
if (runs.length === 0) {
|
|
40
|
+
console.log(chalk.dim('No runs found.'));
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const table = new Table({
|
|
45
|
+
head: [
|
|
46
|
+
chalk.bold('Run ID'),
|
|
47
|
+
chalk.bold('Scenario'),
|
|
48
|
+
chalk.bold('Success Rate'),
|
|
49
|
+
chalk.bold('Date'),
|
|
50
|
+
],
|
|
51
|
+
style: { head: [], border: [] },
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
for (const run of runs) {
|
|
55
|
+
const successColor =
|
|
56
|
+
run.successRate >= 0.9
|
|
57
|
+
? chalk.green
|
|
58
|
+
: run.successRate >= 0.7
|
|
59
|
+
? chalk.yellow
|
|
60
|
+
: chalk.red;
|
|
61
|
+
|
|
62
|
+
table.push([
|
|
63
|
+
run.runId,
|
|
64
|
+
run.scenario,
|
|
65
|
+
successColor(`${(run.successRate * 100).toFixed(1)}%`),
|
|
66
|
+
new Date(run.createdAt).toLocaleString(),
|
|
67
|
+
]);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
console.log(table.toString());
|
|
71
|
+
console.log();
|
|
72
|
+
console.log(chalk.dim(`Showing ${runs.length} runs`));
|
|
73
|
+
} catch (error) {
|
|
74
|
+
console.error(chalk.red('Error:'), (error as Error).message);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
return cmd;
|
|
80
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Init command - Initialize ArtemisKit in a project
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { mkdir, writeFile } from 'node:fs/promises';
|
|
6
|
+
import { join } from 'node:path';
|
|
7
|
+
import chalk from 'chalk';
|
|
8
|
+
import { Command } from 'commander';
|
|
9
|
+
|
|
10
|
+
const DEFAULT_CONFIG = `# ArtemisKit Configuration
|
|
11
|
+
project: my-project
|
|
12
|
+
|
|
13
|
+
# Default provider settings
|
|
14
|
+
provider: openai
|
|
15
|
+
model: gpt-4
|
|
16
|
+
|
|
17
|
+
# Provider configurations
|
|
18
|
+
providers:
|
|
19
|
+
openai:
|
|
20
|
+
apiKey: \${OPENAI_API_KEY}
|
|
21
|
+
defaultModel: gpt-4
|
|
22
|
+
|
|
23
|
+
azure-openai:
|
|
24
|
+
apiKey: \${AZURE_OPENAI_API_KEY}
|
|
25
|
+
resourceName: \${AZURE_OPENAI_RESOURCE}
|
|
26
|
+
deploymentName: \${AZURE_OPENAI_DEPLOYMENT}
|
|
27
|
+
apiVersion: "2024-02-15-preview"
|
|
28
|
+
|
|
29
|
+
# Storage configuration
|
|
30
|
+
storage:
|
|
31
|
+
type: local
|
|
32
|
+
basePath: ./artemis-runs
|
|
33
|
+
|
|
34
|
+
# Scenarios directory
|
|
35
|
+
scenariosDir: ./scenarios
|
|
36
|
+
|
|
37
|
+
# Output settings
|
|
38
|
+
output:
|
|
39
|
+
format: json
|
|
40
|
+
dir: ./artemis-output
|
|
41
|
+
`;
|
|
42
|
+
|
|
43
|
+
const DEFAULT_SCENARIO = `name: Example Scenario
|
|
44
|
+
description: Basic example scenario for testing
|
|
45
|
+
version: "1.0"
|
|
46
|
+
provider: openai
|
|
47
|
+
model: gpt-4
|
|
48
|
+
temperature: 0
|
|
49
|
+
|
|
50
|
+
cases:
|
|
51
|
+
- id: greeting
|
|
52
|
+
name: Simple Greeting
|
|
53
|
+
prompt: "Say hello in exactly 3 words."
|
|
54
|
+
expected:
|
|
55
|
+
type: regex
|
|
56
|
+
pattern: "^\\\\w+\\\\s+\\\\w+\\\\s+\\\\w+$"
|
|
57
|
+
tags:
|
|
58
|
+
- greeting
|
|
59
|
+
- basic
|
|
60
|
+
|
|
61
|
+
- id: math
|
|
62
|
+
name: Basic Math
|
|
63
|
+
prompt: "What is 2 + 2? Reply with just the number."
|
|
64
|
+
expected:
|
|
65
|
+
type: exact
|
|
66
|
+
value: "4"
|
|
67
|
+
tags:
|
|
68
|
+
- math
|
|
69
|
+
- basic
|
|
70
|
+
`;
|
|
71
|
+
|
|
72
|
+
export function initCommand(): Command {
|
|
73
|
+
const cmd = new Command('init');
|
|
74
|
+
|
|
75
|
+
cmd
|
|
76
|
+
.description('Initialize ArtemisKit in the current directory')
|
|
77
|
+
.option('-f, --force', 'Overwrite existing configuration')
|
|
78
|
+
.action(async () => {
|
|
79
|
+
try {
|
|
80
|
+
const cwd = process.cwd();
|
|
81
|
+
|
|
82
|
+
// Create directories
|
|
83
|
+
await mkdir(join(cwd, 'scenarios'), { recursive: true });
|
|
84
|
+
await mkdir(join(cwd, 'artemis-runs'), { recursive: true });
|
|
85
|
+
await mkdir(join(cwd, 'artemis-output'), { recursive: true });
|
|
86
|
+
|
|
87
|
+
// Write config file
|
|
88
|
+
const configPath = join(cwd, 'artemis.config.yaml');
|
|
89
|
+
await writeFile(configPath, DEFAULT_CONFIG);
|
|
90
|
+
console.log(chalk.green('✓'), 'Created artemis.config.yaml');
|
|
91
|
+
|
|
92
|
+
// Write example scenario
|
|
93
|
+
const scenarioPath = join(cwd, 'scenarios', 'example.yaml');
|
|
94
|
+
await writeFile(scenarioPath, DEFAULT_SCENARIO);
|
|
95
|
+
console.log(chalk.green('✓'), 'Created scenarios/example.yaml');
|
|
96
|
+
|
|
97
|
+
console.log();
|
|
98
|
+
console.log(chalk.bold('ArtemisKit initialized successfully!'));
|
|
99
|
+
console.log();
|
|
100
|
+
console.log('Next steps:');
|
|
101
|
+
console.log(' 1. Configure your API keys in .env or environment variables');
|
|
102
|
+
console.log(' 2. Edit scenarios/example.yaml to add your test cases');
|
|
103
|
+
console.log(' 3. Run tests with: artemiskit run scenarios/example.yaml');
|
|
104
|
+
} catch (error) {
|
|
105
|
+
console.error(chalk.red('Error:'), (error as Error).message);
|
|
106
|
+
process.exit(1);
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
return cmd;
|
|
111
|
+
}
|