@artemiskit/cli 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/dist/index.js +65256 -63756
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/commands/baseline.d.ts +9 -0
- package/dist/src/commands/baseline.d.ts.map +1 -0
- package/dist/src/commands/history.d.ts.map +1 -1
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/run.d.ts.map +1 -1
- package/dist/src/commands/stress.d.ts.map +1 -1
- package/dist/src/config/schema.d.ts +8 -0
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/utils/adapter.d.ts.map +1 -1
- package/package.json +6 -6
- package/src/cli.ts +2 -0
- package/src/commands/baseline.ts +473 -0
- package/src/commands/history.ts +58 -9
- package/src/commands/redteam.ts +19 -1
- package/src/commands/run.ts +479 -52
- package/src/commands/stress.ts +28 -0
- package/src/config/schema.ts +3 -0
- package/src/utils/adapter.ts +7 -0
package/dist/src/cli.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAYpC,wBAAgB,SAAS,IAAI,OAAO,CAwCnC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Baseline command - Manage baseline runs for regression detection
|
|
3
|
+
*/
|
|
4
|
+
import { Command } from 'commander';
|
|
5
|
+
/**
|
|
6
|
+
* Create the main baseline command with subcommands
|
|
7
|
+
*/
|
|
8
|
+
export declare function baselineCommand(): Command;
|
|
9
|
+
//# sourceMappingURL=baseline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"baseline.d.ts","sourceRoot":"","sources":["../../../src/commands/baseline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAocpC;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAWzC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA4IpC,wBAAgB,cAAc,IAAI,OAAO,CAmFxC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAsCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAoCpC,wBAAgB,cAAc,IAAI,OAAO,CAycxC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAyiBpC,wBAAgB,UAAU,IAAI,OAAO,CAggBpC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAoBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAsCpC,wBAAgB,aAAa,IAAI,OAAO,CA+SvC"}
|
|
@@ -13,6 +13,7 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
13
13
|
deploymentName: z.ZodOptional<z.ZodString>;
|
|
14
14
|
apiVersion: z.ZodOptional<z.ZodString>;
|
|
15
15
|
embeddingDeploymentName: z.ZodOptional<z.ZodString>;
|
|
16
|
+
modelFamily: z.ZodOptional<z.ZodString>;
|
|
16
17
|
underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
|
|
17
18
|
}, "strip", z.ZodTypeAny, {
|
|
18
19
|
apiKey?: string | undefined;
|
|
@@ -25,6 +26,7 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
25
26
|
deploymentName?: string | undefined;
|
|
26
27
|
apiVersion?: string | undefined;
|
|
27
28
|
embeddingDeploymentName?: string | undefined;
|
|
29
|
+
modelFamily?: string | undefined;
|
|
28
30
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
29
31
|
}, {
|
|
30
32
|
apiKey?: string | undefined;
|
|
@@ -37,6 +39,7 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
37
39
|
deploymentName?: string | undefined;
|
|
38
40
|
apiVersion?: string | undefined;
|
|
39
41
|
embeddingDeploymentName?: string | undefined;
|
|
42
|
+
modelFamily?: string | undefined;
|
|
40
43
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
41
44
|
}>;
|
|
42
45
|
declare const StorageConfigSchema: z.ZodObject<{
|
|
@@ -73,6 +76,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
73
76
|
deploymentName: z.ZodOptional<z.ZodString>;
|
|
74
77
|
apiVersion: z.ZodOptional<z.ZodString>;
|
|
75
78
|
embeddingDeploymentName: z.ZodOptional<z.ZodString>;
|
|
79
|
+
modelFamily: z.ZodOptional<z.ZodString>;
|
|
76
80
|
underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
|
|
77
81
|
}, "strip", z.ZodTypeAny, {
|
|
78
82
|
apiKey?: string | undefined;
|
|
@@ -85,6 +89,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
85
89
|
deploymentName?: string | undefined;
|
|
86
90
|
apiVersion?: string | undefined;
|
|
87
91
|
embeddingDeploymentName?: string | undefined;
|
|
92
|
+
modelFamily?: string | undefined;
|
|
88
93
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
89
94
|
}, {
|
|
90
95
|
apiKey?: string | undefined;
|
|
@@ -97,6 +102,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
97
102
|
deploymentName?: string | undefined;
|
|
98
103
|
apiVersion?: string | undefined;
|
|
99
104
|
embeddingDeploymentName?: string | undefined;
|
|
105
|
+
modelFamily?: string | undefined;
|
|
100
106
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
101
107
|
}>>>;
|
|
102
108
|
storage: z.ZodOptional<z.ZodObject<{
|
|
@@ -161,6 +167,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
161
167
|
deploymentName?: string | undefined;
|
|
162
168
|
apiVersion?: string | undefined;
|
|
163
169
|
embeddingDeploymentName?: string | undefined;
|
|
170
|
+
modelFamily?: string | undefined;
|
|
164
171
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
165
172
|
}> | undefined;
|
|
166
173
|
storage?: {
|
|
@@ -195,6 +202,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
195
202
|
deploymentName?: string | undefined;
|
|
196
203
|
apiVersion?: string | undefined;
|
|
197
204
|
embeddingDeploymentName?: string | undefined;
|
|
205
|
+
modelFamily?: string | undefined;
|
|
198
206
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
199
207
|
}> | undefined;
|
|
200
208
|
storage?: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA8ErF;
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA8ErF;AAsXD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@artemiskit/cli",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.3",
|
|
4
4
|
"description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -45,11 +45,11 @@
|
|
|
45
45
|
"test": "bun test"
|
|
46
46
|
},
|
|
47
47
|
"dependencies": {
|
|
48
|
-
"@artemiskit/adapter-openai": "0.1.
|
|
49
|
-
"@artemiskit/adapter-vercel-ai": "0.1.
|
|
50
|
-
"@artemiskit/core": "0.2.
|
|
51
|
-
"@artemiskit/redteam": "0.2.
|
|
52
|
-
"@artemiskit/reports": "0.2.
|
|
48
|
+
"@artemiskit/adapter-openai": "0.1.10",
|
|
49
|
+
"@artemiskit/adapter-vercel-ai": "0.1.10",
|
|
50
|
+
"@artemiskit/core": "0.2.3",
|
|
51
|
+
"@artemiskit/redteam": "0.2.3",
|
|
52
|
+
"@artemiskit/reports": "0.2.3",
|
|
53
53
|
"chalk": "^5.3.0",
|
|
54
54
|
"cli-table3": "^0.6.3",
|
|
55
55
|
"commander": "^12.0.0",
|
package/src/cli.ts
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import { Command } from 'commander';
|
|
6
6
|
import { version } from '../package.json';
|
|
7
|
+
import { baselineCommand } from './commands/baseline';
|
|
7
8
|
import { compareCommand } from './commands/compare';
|
|
8
9
|
import { historyCommand } from './commands/history';
|
|
9
10
|
import { initCommand } from './commands/init';
|
|
@@ -45,6 +46,7 @@ export function createCLI(): Command {
|
|
|
45
46
|
|
|
46
47
|
program.addCommand(initCommand());
|
|
47
48
|
program.addCommand(runCommand());
|
|
49
|
+
program.addCommand(baselineCommand());
|
|
48
50
|
program.addCommand(compareCommand());
|
|
49
51
|
program.addCommand(historyCommand());
|
|
50
52
|
program.addCommand(reportCommand());
|
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Baseline command - Manage baseline runs for regression detection
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { BaselineMetadata, BaselineStorageAdapter } from '@artemiskit/core';
|
|
6
|
+
import chalk from 'chalk';
|
|
7
|
+
import { Command } from 'commander';
|
|
8
|
+
import { loadConfig } from '../config/loader.js';
|
|
9
|
+
import { createSpinner, icons, isTTY, padText, renderError } from '../ui/index.js';
|
|
10
|
+
import { createStorage } from '../utils/storage.js';
|
|
11
|
+
|
|
12
|
+
interface BaselineSetOptions {
|
|
13
|
+
scenario?: string;
|
|
14
|
+
tag?: string;
|
|
15
|
+
config?: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface BaselineListOptions {
|
|
19
|
+
config?: string;
|
|
20
|
+
json?: boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
interface BaselineRemoveOptions {
|
|
24
|
+
config?: string;
|
|
25
|
+
force?: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Check if storage adapter supports baselines
|
|
30
|
+
*/
|
|
31
|
+
function isBaselineStorage(storage: unknown): storage is BaselineStorageAdapter {
|
|
32
|
+
return (
|
|
33
|
+
typeof storage === 'object' &&
|
|
34
|
+
storage !== null &&
|
|
35
|
+
'setBaseline' in storage &&
|
|
36
|
+
'getBaseline' in storage &&
|
|
37
|
+
'listBaselines' in storage &&
|
|
38
|
+
'removeBaseline' in storage
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Render baselines table for TTY
|
|
44
|
+
*/
|
|
45
|
+
function renderBaselinesTable(baselines: BaselineMetadata[]): string {
|
|
46
|
+
const scenarioWidth = 30;
|
|
47
|
+
const runIdWidth = 16;
|
|
48
|
+
const rateWidth = 12;
|
|
49
|
+
const dateWidth = 20;
|
|
50
|
+
const tagWidth = 12;
|
|
51
|
+
|
|
52
|
+
const width =
|
|
53
|
+
2 + scenarioWidth + 1 + runIdWidth + 1 + rateWidth + 1 + dateWidth + 1 + tagWidth + 2;
|
|
54
|
+
const border = '═'.repeat(width - 2);
|
|
55
|
+
|
|
56
|
+
const formatHeaderRow = () => {
|
|
57
|
+
const scenarioPad = padText('Scenario', scenarioWidth);
|
|
58
|
+
const runIdPad = padText('Run ID', runIdWidth);
|
|
59
|
+
const ratePad = padText('Success Rate', rateWidth, 'right');
|
|
60
|
+
const datePad = padText('Created', dateWidth, 'right');
|
|
61
|
+
const tagPad = padText('Tag', tagWidth, 'right');
|
|
62
|
+
return `║ ${scenarioPad} ${runIdPad} ${ratePad} ${datePad} ${tagPad} ║`;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
const lines = [
|
|
66
|
+
`╔${border}╗`,
|
|
67
|
+
`║${padText('BASELINES', width - 2, 'center')}║`,
|
|
68
|
+
`╠${border}╣`,
|
|
69
|
+
formatHeaderRow(),
|
|
70
|
+
`╟${'─'.repeat(width - 2)}╢`,
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
for (const baseline of baselines) {
|
|
74
|
+
const rateColor =
|
|
75
|
+
baseline.metrics.successRate >= 0.9
|
|
76
|
+
? chalk.green
|
|
77
|
+
: baseline.metrics.successRate >= 0.7
|
|
78
|
+
? chalk.yellow
|
|
79
|
+
: chalk.red;
|
|
80
|
+
|
|
81
|
+
const truncScenario =
|
|
82
|
+
baseline.scenario.length > scenarioWidth - 2
|
|
83
|
+
? `${baseline.scenario.slice(0, scenarioWidth - 3)}…`
|
|
84
|
+
: baseline.scenario;
|
|
85
|
+
const scenarioPad = padText(truncScenario, scenarioWidth);
|
|
86
|
+
const runIdPad = padText(baseline.runId, runIdWidth);
|
|
87
|
+
|
|
88
|
+
const rateValue = `${(baseline.metrics.successRate * 100).toFixed(1)}%`;
|
|
89
|
+
const ratePad = padText(rateValue, rateWidth, 'right');
|
|
90
|
+
const rateColored = rateColor(ratePad);
|
|
91
|
+
|
|
92
|
+
const dateObj = new Date(baseline.createdAt);
|
|
93
|
+
const dateStr = `${dateObj.toLocaleDateString()} ${dateObj.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}`;
|
|
94
|
+
const datePad = padText(dateStr, dateWidth, 'right');
|
|
95
|
+
|
|
96
|
+
const tagPad = padText(baseline.tag || '-', tagWidth, 'right');
|
|
97
|
+
|
|
98
|
+
lines.push(`║ ${scenarioPad} ${runIdPad} ${rateColored} ${datePad} ${tagPad} ║`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
lines.push(`╚${border}╝`);
|
|
102
|
+
|
|
103
|
+
return lines.join('\n');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Render baselines as plain text for CI/non-TTY
|
|
108
|
+
*/
|
|
109
|
+
function renderBaselinesPlain(baselines: BaselineMetadata[]): string {
|
|
110
|
+
const lines = ['=== BASELINES ===', ''];
|
|
111
|
+
|
|
112
|
+
for (const baseline of baselines) {
|
|
113
|
+
const rate = `${(baseline.metrics.successRate * 100).toFixed(1)}%`;
|
|
114
|
+
const date = new Date(baseline.createdAt).toLocaleString();
|
|
115
|
+
const tag = baseline.tag ? ` [${baseline.tag}]` : '';
|
|
116
|
+
lines.push(`${baseline.scenario} ${baseline.runId} ${rate} ${date}${tag}`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return lines.join('\n');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Create the baseline set subcommand
|
|
124
|
+
*/
|
|
125
|
+
function baselineSetCommand(): Command {
|
|
126
|
+
const cmd = new Command('set');
|
|
127
|
+
|
|
128
|
+
cmd
|
|
129
|
+
.description('Set a run as the baseline for regression comparison')
|
|
130
|
+
.argument('<run-id>', 'Run ID to set as baseline')
|
|
131
|
+
.option('-s, --scenario <name>', 'Override scenario name (defaults to scenario from run)')
|
|
132
|
+
.option('-t, --tag <tag>', 'Optional tag/description for the baseline')
|
|
133
|
+
.option('--config <path>', 'Path to config file')
|
|
134
|
+
.action(async (runId: string, options: BaselineSetOptions) => {
|
|
135
|
+
const spinner = createSpinner('Setting baseline...');
|
|
136
|
+
spinner.start();
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
const config = await loadConfig(options.config);
|
|
140
|
+
const storage = createStorage({ fileConfig: config });
|
|
141
|
+
|
|
142
|
+
if (!isBaselineStorage(storage)) {
|
|
143
|
+
spinner.fail('Error');
|
|
144
|
+
console.log();
|
|
145
|
+
console.log(
|
|
146
|
+
renderError({
|
|
147
|
+
title: 'Baselines Not Supported',
|
|
148
|
+
reason: 'Current storage adapter does not support baseline management',
|
|
149
|
+
suggestions: [
|
|
150
|
+
'Use local storage which supports baselines',
|
|
151
|
+
'Check your storage configuration in artemis.config.yaml',
|
|
152
|
+
],
|
|
153
|
+
})
|
|
154
|
+
);
|
|
155
|
+
process.exit(1);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const baseline = await storage.setBaseline(options.scenario || '', runId, options.tag);
|
|
159
|
+
|
|
160
|
+
spinner.succeed('Baseline set successfully');
|
|
161
|
+
console.log();
|
|
162
|
+
console.log(`${icons.passed} ${chalk.bold('Baseline created')}`);
|
|
163
|
+
console.log();
|
|
164
|
+
console.log(` Scenario: ${chalk.cyan(baseline.scenario)}`);
|
|
165
|
+
console.log(` Run ID: ${chalk.dim(baseline.runId)}`);
|
|
166
|
+
console.log(
|
|
167
|
+
` Success Rate: ${chalk.green(`${(baseline.metrics.successRate * 100).toFixed(1)}%`)}`
|
|
168
|
+
);
|
|
169
|
+
console.log(
|
|
170
|
+
` Test Cases: ${baseline.metrics.passedCases}/${baseline.metrics.totalCases} passed`
|
|
171
|
+
);
|
|
172
|
+
if (baseline.tag) {
|
|
173
|
+
console.log(` Tag: ${chalk.dim(baseline.tag)}`);
|
|
174
|
+
}
|
|
175
|
+
console.log();
|
|
176
|
+
console.log(
|
|
177
|
+
chalk.dim('Future runs of this scenario will be compared against this baseline.')
|
|
178
|
+
);
|
|
179
|
+
} catch (error) {
|
|
180
|
+
spinner.fail('Error');
|
|
181
|
+
console.log();
|
|
182
|
+
console.log(
|
|
183
|
+
renderError({
|
|
184
|
+
title: 'Failed to Set Baseline',
|
|
185
|
+
reason: (error as Error).message,
|
|
186
|
+
suggestions: [
|
|
187
|
+
'Check that the run ID exists',
|
|
188
|
+
'Run "artemiskit history" to see available runs',
|
|
189
|
+
'Verify storage configuration',
|
|
190
|
+
],
|
|
191
|
+
})
|
|
192
|
+
);
|
|
193
|
+
process.exit(1);
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
return cmd;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Create the baseline list subcommand
|
|
202
|
+
*/
|
|
203
|
+
function baselineListCommand(): Command {
|
|
204
|
+
const cmd = new Command('list');
|
|
205
|
+
|
|
206
|
+
cmd
|
|
207
|
+
.description('List all baselines')
|
|
208
|
+
.option('--config <path>', 'Path to config file')
|
|
209
|
+
.option('--json', 'Output as JSON')
|
|
210
|
+
.action(async (options: BaselineListOptions) => {
|
|
211
|
+
const spinner = createSpinner('Loading baselines...');
|
|
212
|
+
spinner.start();
|
|
213
|
+
|
|
214
|
+
try {
|
|
215
|
+
const config = await loadConfig(options.config);
|
|
216
|
+
const storage = createStorage({ fileConfig: config });
|
|
217
|
+
|
|
218
|
+
if (!isBaselineStorage(storage)) {
|
|
219
|
+
spinner.fail('Error');
|
|
220
|
+
console.log();
|
|
221
|
+
console.log(
|
|
222
|
+
renderError({
|
|
223
|
+
title: 'Baselines Not Supported',
|
|
224
|
+
reason: 'Current storage adapter does not support baseline management',
|
|
225
|
+
suggestions: ['Use local storage which supports baselines'],
|
|
226
|
+
})
|
|
227
|
+
);
|
|
228
|
+
process.exit(1);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const baselines = await storage.listBaselines();
|
|
232
|
+
spinner.succeed('Loaded baselines');
|
|
233
|
+
console.log();
|
|
234
|
+
|
|
235
|
+
if (baselines.length === 0) {
|
|
236
|
+
console.log(chalk.dim('No baselines set.'));
|
|
237
|
+
console.log();
|
|
238
|
+
console.log(chalk.dim('Set a baseline with:'));
|
|
239
|
+
console.log(chalk.dim(' artemiskit baseline set <run-id>'));
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (options.json) {
|
|
244
|
+
console.log(JSON.stringify(baselines, null, 2));
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (isTTY) {
|
|
249
|
+
console.log(renderBaselinesTable(baselines));
|
|
250
|
+
} else {
|
|
251
|
+
console.log(renderBaselinesPlain(baselines));
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
console.log();
|
|
255
|
+
console.log(
|
|
256
|
+
chalk.dim(`${baselines.length} baseline${baselines.length === 1 ? '' : 's'} configured`)
|
|
257
|
+
);
|
|
258
|
+
} catch (error) {
|
|
259
|
+
spinner.fail('Error');
|
|
260
|
+
console.log();
|
|
261
|
+
console.log(
|
|
262
|
+
renderError({
|
|
263
|
+
title: 'Failed to List Baselines',
|
|
264
|
+
reason: (error as Error).message,
|
|
265
|
+
suggestions: ['Verify storage configuration'],
|
|
266
|
+
})
|
|
267
|
+
);
|
|
268
|
+
process.exit(1);
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
return cmd;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Create the baseline remove subcommand
|
|
277
|
+
*/
|
|
278
|
+
function baselineRemoveCommand(): Command {
|
|
279
|
+
const cmd = new Command('remove');
|
|
280
|
+
|
|
281
|
+
cmd
|
|
282
|
+
.description('Remove a baseline')
|
|
283
|
+
.argument('<identifier>', 'Run ID of the baseline to remove (or scenario name with --scenario)')
|
|
284
|
+
.option('--config <path>', 'Path to config file')
|
|
285
|
+
.option('-f, --force', 'Skip confirmation prompt')
|
|
286
|
+
.option('-s, --scenario', 'Treat identifier as scenario name instead of run ID')
|
|
287
|
+
.action(async (identifier: string, options: BaselineRemoveOptions & { scenario?: boolean }) => {
|
|
288
|
+
const spinner = createSpinner('Removing baseline...');
|
|
289
|
+
|
|
290
|
+
try {
|
|
291
|
+
const config = await loadConfig(options.config);
|
|
292
|
+
const storage = createStorage({ fileConfig: config });
|
|
293
|
+
|
|
294
|
+
if (!isBaselineStorage(storage)) {
|
|
295
|
+
console.log(
|
|
296
|
+
renderError({
|
|
297
|
+
title: 'Baselines Not Supported',
|
|
298
|
+
reason: 'Current storage adapter does not support baseline management',
|
|
299
|
+
suggestions: ['Use local storage which supports baselines'],
|
|
300
|
+
})
|
|
301
|
+
);
|
|
302
|
+
process.exit(1);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Check if baseline exists first - by run ID or scenario name
|
|
306
|
+
const existing = options.scenario
|
|
307
|
+
? await storage.getBaseline(identifier)
|
|
308
|
+
: await storage.getBaselineByRunId(identifier);
|
|
309
|
+
|
|
310
|
+
if (!existing) {
|
|
311
|
+
console.log();
|
|
312
|
+
console.log(
|
|
313
|
+
chalk.yellow(
|
|
314
|
+
options.scenario
|
|
315
|
+
? `No baseline found for scenario: ${identifier}`
|
|
316
|
+
: `No baseline found with run ID: ${identifier}`
|
|
317
|
+
)
|
|
318
|
+
);
|
|
319
|
+
console.log();
|
|
320
|
+
console.log(chalk.dim('List baselines with:'));
|
|
321
|
+
console.log(chalk.dim(' artemiskit baseline list'));
|
|
322
|
+
process.exit(1);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Confirm if not forced
|
|
326
|
+
if (!options.force && isTTY) {
|
|
327
|
+
const { promptConfirm } = await import('../ui/index.js');
|
|
328
|
+
const confirmed = await promptConfirm(
|
|
329
|
+
`Remove baseline for "${existing.scenario}"? (Run ID: ${existing.runId})`,
|
|
330
|
+
false
|
|
331
|
+
);
|
|
332
|
+
if (!confirmed) {
|
|
333
|
+
console.log(chalk.dim('Cancelled.'));
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
spinner.start();
|
|
339
|
+
const removed = options.scenario
|
|
340
|
+
? await storage.removeBaseline(identifier)
|
|
341
|
+
: await storage.removeBaselineByRunId(identifier);
|
|
342
|
+
|
|
343
|
+
if (removed) {
|
|
344
|
+
spinner.succeed('Baseline removed');
|
|
345
|
+
console.log();
|
|
346
|
+
console.log(`${icons.passed} Removed baseline for: ${chalk.cyan(existing.scenario)}`);
|
|
347
|
+
} else {
|
|
348
|
+
spinner.fail('Baseline not found');
|
|
349
|
+
}
|
|
350
|
+
} catch (error) {
|
|
351
|
+
spinner.fail('Error');
|
|
352
|
+
console.log();
|
|
353
|
+
console.log(
|
|
354
|
+
renderError({
|
|
355
|
+
title: 'Failed to Remove Baseline',
|
|
356
|
+
reason: (error as Error).message,
|
|
357
|
+
suggestions: [
|
|
358
|
+
'Check the run ID or scenario name',
|
|
359
|
+
'Run "artemiskit baseline list" to see baselines',
|
|
360
|
+
],
|
|
361
|
+
})
|
|
362
|
+
);
|
|
363
|
+
process.exit(1);
|
|
364
|
+
}
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
return cmd;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Create the baseline get subcommand
|
|
372
|
+
*/
|
|
373
|
+
function baselineGetCommand(): Command {
|
|
374
|
+
const cmd = new Command('get');
|
|
375
|
+
|
|
376
|
+
cmd
|
|
377
|
+
.description('Get baseline details')
|
|
378
|
+
.argument('<identifier>', 'Run ID of the baseline (or scenario name with --scenario)')
|
|
379
|
+
.option('--config <path>', 'Path to config file')
|
|
380
|
+
.option('--json', 'Output as JSON')
|
|
381
|
+
.option('-s, --scenario', 'Treat identifier as scenario name instead of run ID')
|
|
382
|
+
.action(
|
|
383
|
+
async (
|
|
384
|
+
identifier: string,
|
|
385
|
+
options: { config?: string; json?: boolean; scenario?: boolean }
|
|
386
|
+
) => {
|
|
387
|
+
try {
|
|
388
|
+
const config = await loadConfig(options.config);
|
|
389
|
+
const storage = createStorage({ fileConfig: config });
|
|
390
|
+
|
|
391
|
+
if (!isBaselineStorage(storage)) {
|
|
392
|
+
console.log(
|
|
393
|
+
renderError({
|
|
394
|
+
title: 'Baselines Not Supported',
|
|
395
|
+
reason: 'Current storage adapter does not support baseline management',
|
|
396
|
+
suggestions: ['Use local storage which supports baselines'],
|
|
397
|
+
})
|
|
398
|
+
);
|
|
399
|
+
process.exit(1);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// Look up by run ID or scenario name
|
|
403
|
+
const baseline = options.scenario
|
|
404
|
+
? await storage.getBaseline(identifier)
|
|
405
|
+
: await storage.getBaselineByRunId(identifier);
|
|
406
|
+
|
|
407
|
+
if (!baseline) {
|
|
408
|
+
console.log(
|
|
409
|
+
chalk.yellow(
|
|
410
|
+
options.scenario
|
|
411
|
+
? `No baseline found for scenario: ${identifier}`
|
|
412
|
+
: `No baseline found with run ID: ${identifier}`
|
|
413
|
+
)
|
|
414
|
+
);
|
|
415
|
+
process.exit(1);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
if (options.json) {
|
|
419
|
+
console.log(JSON.stringify(baseline, null, 2));
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
console.log();
|
|
424
|
+
console.log(chalk.bold(`Baseline: ${baseline.scenario}`));
|
|
425
|
+
console.log();
|
|
426
|
+
console.log(` Run ID: ${baseline.runId}`);
|
|
427
|
+
console.log(` Created: ${new Date(baseline.createdAt).toLocaleString()}`);
|
|
428
|
+
console.log(
|
|
429
|
+
` Success Rate: ${chalk.green(`${(baseline.metrics.successRate * 100).toFixed(1)}%`)}`
|
|
430
|
+
);
|
|
431
|
+
console.log(
|
|
432
|
+
` Test Cases: ${baseline.metrics.passedCases}/${baseline.metrics.totalCases}`
|
|
433
|
+
);
|
|
434
|
+
console.log(` Latency: ${baseline.metrics.medianLatencyMs}ms (median)`);
|
|
435
|
+
console.log(` Tokens: ${baseline.metrics.totalTokens.toLocaleString()}`);
|
|
436
|
+
if (baseline.tag) {
|
|
437
|
+
console.log(` Tag: ${baseline.tag}`);
|
|
438
|
+
}
|
|
439
|
+
console.log();
|
|
440
|
+
} catch (error) {
|
|
441
|
+
console.log(
|
|
442
|
+
renderError({
|
|
443
|
+
title: 'Failed to Get Baseline',
|
|
444
|
+
reason: (error as Error).message,
|
|
445
|
+
suggestions: [
|
|
446
|
+
'Check the run ID or scenario name',
|
|
447
|
+
'Run "artemiskit baseline list" to see baselines',
|
|
448
|
+
],
|
|
449
|
+
})
|
|
450
|
+
);
|
|
451
|
+
process.exit(1);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
);
|
|
455
|
+
|
|
456
|
+
return cmd;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Create the main baseline command with subcommands
|
|
461
|
+
*/
|
|
462
|
+
export function baselineCommand(): Command {
|
|
463
|
+
const cmd = new Command('baseline');
|
|
464
|
+
|
|
465
|
+
cmd.description('Manage baseline runs for regression detection');
|
|
466
|
+
|
|
467
|
+
cmd.addCommand(baselineSetCommand());
|
|
468
|
+
cmd.addCommand(baselineListCommand());
|
|
469
|
+
cmd.addCommand(baselineRemoveCommand());
|
|
470
|
+
cmd.addCommand(baselineGetCommand());
|
|
471
|
+
|
|
472
|
+
return cmd;
|
|
473
|
+
}
|