@artemiskit/cli 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAWpC,wBAAgB,SAAS,IAAI,OAAO,CAuCnC"}
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAYpC,wBAAgB,SAAS,IAAI,OAAO,CAwCnC"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Baseline command - Manage baseline runs for regression detection
3
+ */
4
+ import { Command } from 'commander';
5
+ /**
6
+ * Create the main baseline command with subcommands
7
+ */
8
+ export declare function baselineCommand(): Command;
9
+ //# sourceMappingURL=baseline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"baseline.d.ts","sourceRoot":"","sources":["../../../src/commands/baseline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAocpC;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAWzC"}
@@ -1 +1 @@
1
- {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA8FpC,wBAAgB,cAAc,IAAI,OAAO,CAiFxC"}
1
+ {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../../src/commands/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA4IpC,wBAAgB,cAAc,IAAI,OAAO,CAmFxC"}
@@ -1 +1 @@
1
- {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAkCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAkCpC,wBAAgB,cAAc,IAAI,OAAO,CA6bxC"}
1
+ {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAsCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAoCpC,wBAAgB,cAAc,IAAI,OAAO,CAycxC"}
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAYH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAgVpC,wBAAgB,UAAU,IAAI,OAAO,CAmTpC"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAyiBpC,wBAAgB,UAAU,IAAI,OAAO,CAggBpC"}
@@ -1 +1 @@
1
- {"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAmBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAmCpC,wBAAgB,aAAa,IAAI,OAAO,CAuRvC"}
1
+ {"version":3,"file":"stress.d.ts","sourceRoot":"","sources":["../../../src/commands/stress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAoBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAsCpC,wBAAgB,aAAa,IAAI,OAAO,CA+SvC"}
@@ -13,6 +13,7 @@ declare const ProviderConfigSchema: z.ZodObject<{
13
13
  deploymentName: z.ZodOptional<z.ZodString>;
14
14
  apiVersion: z.ZodOptional<z.ZodString>;
15
15
  embeddingDeploymentName: z.ZodOptional<z.ZodString>;
16
+ modelFamily: z.ZodOptional<z.ZodString>;
16
17
  underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
17
18
  }, "strip", z.ZodTypeAny, {
18
19
  apiKey?: string | undefined;
@@ -25,6 +26,7 @@ declare const ProviderConfigSchema: z.ZodObject<{
25
26
  deploymentName?: string | undefined;
26
27
  apiVersion?: string | undefined;
27
28
  embeddingDeploymentName?: string | undefined;
29
+ modelFamily?: string | undefined;
28
30
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
29
31
  }, {
30
32
  apiKey?: string | undefined;
@@ -37,6 +39,7 @@ declare const ProviderConfigSchema: z.ZodObject<{
37
39
  deploymentName?: string | undefined;
38
40
  apiVersion?: string | undefined;
39
41
  embeddingDeploymentName?: string | undefined;
42
+ modelFamily?: string | undefined;
40
43
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
41
44
  }>;
42
45
  declare const StorageConfigSchema: z.ZodObject<{
@@ -73,6 +76,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
73
76
  deploymentName: z.ZodOptional<z.ZodString>;
74
77
  apiVersion: z.ZodOptional<z.ZodString>;
75
78
  embeddingDeploymentName: z.ZodOptional<z.ZodString>;
79
+ modelFamily: z.ZodOptional<z.ZodString>;
76
80
  underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
77
81
  }, "strip", z.ZodTypeAny, {
78
82
  apiKey?: string | undefined;
@@ -85,6 +89,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
85
89
  deploymentName?: string | undefined;
86
90
  apiVersion?: string | undefined;
87
91
  embeddingDeploymentName?: string | undefined;
92
+ modelFamily?: string | undefined;
88
93
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
89
94
  }, {
90
95
  apiKey?: string | undefined;
@@ -97,6 +102,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
97
102
  deploymentName?: string | undefined;
98
103
  apiVersion?: string | undefined;
99
104
  embeddingDeploymentName?: string | undefined;
105
+ modelFamily?: string | undefined;
100
106
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
101
107
  }>>>;
102
108
  storage: z.ZodOptional<z.ZodObject<{
@@ -161,6 +167,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
161
167
  deploymentName?: string | undefined;
162
168
  apiVersion?: string | undefined;
163
169
  embeddingDeploymentName?: string | undefined;
170
+ modelFamily?: string | undefined;
164
171
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
165
172
  }> | undefined;
166
173
  storage?: {
@@ -195,6 +202,7 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
195
202
  deploymentName?: string | undefined;
196
203
  apiVersion?: string | undefined;
197
204
  embeddingDeploymentName?: string | undefined;
205
+ modelFamily?: string | undefined;
198
206
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
199
207
  }> | undefined;
200
208
  storage?: {
@@ -1 +1 @@
1
- {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAexB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA8ErF;AA+WD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA8ErF;AAsXD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/cli",
3
- "version": "0.2.0",
3
+ "version": "0.2.3",
4
4
  "description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -45,11 +45,11 @@
45
45
  "test": "bun test"
46
46
  },
47
47
  "dependencies": {
48
- "@artemiskit/adapter-openai": "0.1.7",
49
- "@artemiskit/adapter-vercel-ai": "0.1.7",
50
- "@artemiskit/core": "0.2.0",
51
- "@artemiskit/redteam": "0.2.0",
52
- "@artemiskit/reports": "0.2.0",
48
+ "@artemiskit/adapter-openai": "0.1.10",
49
+ "@artemiskit/adapter-vercel-ai": "0.1.10",
50
+ "@artemiskit/core": "0.2.3",
51
+ "@artemiskit/redteam": "0.2.3",
52
+ "@artemiskit/reports": "0.2.3",
53
53
  "chalk": "^5.3.0",
54
54
  "cli-table3": "^0.6.3",
55
55
  "commander": "^12.0.0",
package/src/cli.ts CHANGED
@@ -4,6 +4,7 @@
4
4
 
5
5
  import { Command } from 'commander';
6
6
  import { version } from '../package.json';
7
+ import { baselineCommand } from './commands/baseline';
7
8
  import { compareCommand } from './commands/compare';
8
9
  import { historyCommand } from './commands/history';
9
10
  import { initCommand } from './commands/init';
@@ -45,6 +46,7 @@ export function createCLI(): Command {
45
46
 
46
47
  program.addCommand(initCommand());
47
48
  program.addCommand(runCommand());
49
+ program.addCommand(baselineCommand());
48
50
  program.addCommand(compareCommand());
49
51
  program.addCommand(historyCommand());
50
52
  program.addCommand(reportCommand());
@@ -0,0 +1,473 @@
1
+ /**
2
+ * Baseline command - Manage baseline runs for regression detection
3
+ */
4
+
5
+ import type { BaselineMetadata, BaselineStorageAdapter } from '@artemiskit/core';
6
+ import chalk from 'chalk';
7
+ import { Command } from 'commander';
8
+ import { loadConfig } from '../config/loader.js';
9
+ import { createSpinner, icons, isTTY, padText, renderError } from '../ui/index.js';
10
+ import { createStorage } from '../utils/storage.js';
11
+
12
+ interface BaselineSetOptions {
13
+ scenario?: string;
14
+ tag?: string;
15
+ config?: string;
16
+ }
17
+
18
+ interface BaselineListOptions {
19
+ config?: string;
20
+ json?: boolean;
21
+ }
22
+
23
+ interface BaselineRemoveOptions {
24
+ config?: string;
25
+ force?: boolean;
26
+ }
27
+
28
+ /**
29
+ * Check if storage adapter supports baselines
30
+ */
31
+ function isBaselineStorage(storage: unknown): storage is BaselineStorageAdapter {
32
+ return (
33
+ typeof storage === 'object' &&
34
+ storage !== null &&
35
+ 'setBaseline' in storage &&
36
+ 'getBaseline' in storage &&
37
+ 'listBaselines' in storage &&
38
+ 'removeBaseline' in storage
39
+ );
40
+ }
41
+
42
+ /**
43
+ * Render baselines table for TTY
44
+ */
45
+ function renderBaselinesTable(baselines: BaselineMetadata[]): string {
46
+ const scenarioWidth = 30;
47
+ const runIdWidth = 16;
48
+ const rateWidth = 12;
49
+ const dateWidth = 20;
50
+ const tagWidth = 12;
51
+
52
+ const width =
53
+ 2 + scenarioWidth + 1 + runIdWidth + 1 + rateWidth + 1 + dateWidth + 1 + tagWidth + 2;
54
+ const border = '═'.repeat(width - 2);
55
+
56
+ const formatHeaderRow = () => {
57
+ const scenarioPad = padText('Scenario', scenarioWidth);
58
+ const runIdPad = padText('Run ID', runIdWidth);
59
+ const ratePad = padText('Success Rate', rateWidth, 'right');
60
+ const datePad = padText('Created', dateWidth, 'right');
61
+ const tagPad = padText('Tag', tagWidth, 'right');
62
+ return `║ ${scenarioPad} ${runIdPad} ${ratePad} ${datePad} ${tagPad} ║`;
63
+ };
64
+
65
+ const lines = [
66
+ `╔${border}╗`,
67
+ `║${padText('BASELINES', width - 2, 'center')}║`,
68
+ `╠${border}╣`,
69
+ formatHeaderRow(),
70
+ `╟${'─'.repeat(width - 2)}╢`,
71
+ ];
72
+
73
+ for (const baseline of baselines) {
74
+ const rateColor =
75
+ baseline.metrics.successRate >= 0.9
76
+ ? chalk.green
77
+ : baseline.metrics.successRate >= 0.7
78
+ ? chalk.yellow
79
+ : chalk.red;
80
+
81
+ const truncScenario =
82
+ baseline.scenario.length > scenarioWidth - 2
83
+ ? `${baseline.scenario.slice(0, scenarioWidth - 3)}…`
84
+ : baseline.scenario;
85
+ const scenarioPad = padText(truncScenario, scenarioWidth);
86
+ const runIdPad = padText(baseline.runId, runIdWidth);
87
+
88
+ const rateValue = `${(baseline.metrics.successRate * 100).toFixed(1)}%`;
89
+ const ratePad = padText(rateValue, rateWidth, 'right');
90
+ const rateColored = rateColor(ratePad);
91
+
92
+ const dateObj = new Date(baseline.createdAt);
93
+ const dateStr = `${dateObj.toLocaleDateString()} ${dateObj.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}`;
94
+ const datePad = padText(dateStr, dateWidth, 'right');
95
+
96
+ const tagPad = padText(baseline.tag || '-', tagWidth, 'right');
97
+
98
+ lines.push(`║ ${scenarioPad} ${runIdPad} ${rateColored} ${datePad} ${tagPad} ║`);
99
+ }
100
+
101
+ lines.push(`╚${border}╝`);
102
+
103
+ return lines.join('\n');
104
+ }
105
+
106
+ /**
107
+ * Render baselines as plain text for CI/non-TTY
108
+ */
109
+ function renderBaselinesPlain(baselines: BaselineMetadata[]): string {
110
+ const lines = ['=== BASELINES ===', ''];
111
+
112
+ for (const baseline of baselines) {
113
+ const rate = `${(baseline.metrics.successRate * 100).toFixed(1)}%`;
114
+ const date = new Date(baseline.createdAt).toLocaleString();
115
+ const tag = baseline.tag ? ` [${baseline.tag}]` : '';
116
+ lines.push(`${baseline.scenario} ${baseline.runId} ${rate} ${date}${tag}`);
117
+ }
118
+
119
+ return lines.join('\n');
120
+ }
121
+
122
+ /**
123
+ * Create the baseline set subcommand
124
+ */
125
+ function baselineSetCommand(): Command {
126
+ const cmd = new Command('set');
127
+
128
+ cmd
129
+ .description('Set a run as the baseline for regression comparison')
130
+ .argument('<run-id>', 'Run ID to set as baseline')
131
+ .option('-s, --scenario <name>', 'Override scenario name (defaults to scenario from run)')
132
+ .option('-t, --tag <tag>', 'Optional tag/description for the baseline')
133
+ .option('--config <path>', 'Path to config file')
134
+ .action(async (runId: string, options: BaselineSetOptions) => {
135
+ const spinner = createSpinner('Setting baseline...');
136
+ spinner.start();
137
+
138
+ try {
139
+ const config = await loadConfig(options.config);
140
+ const storage = createStorage({ fileConfig: config });
141
+
142
+ if (!isBaselineStorage(storage)) {
143
+ spinner.fail('Error');
144
+ console.log();
145
+ console.log(
146
+ renderError({
147
+ title: 'Baselines Not Supported',
148
+ reason: 'Current storage adapter does not support baseline management',
149
+ suggestions: [
150
+ 'Use local storage which supports baselines',
151
+ 'Check your storage configuration in artemis.config.yaml',
152
+ ],
153
+ })
154
+ );
155
+ process.exit(1);
156
+ }
157
+
158
+ const baseline = await storage.setBaseline(options.scenario || '', runId, options.tag);
159
+
160
+ spinner.succeed('Baseline set successfully');
161
+ console.log();
162
+ console.log(`${icons.passed} ${chalk.bold('Baseline created')}`);
163
+ console.log();
164
+ console.log(` Scenario: ${chalk.cyan(baseline.scenario)}`);
165
+ console.log(` Run ID: ${chalk.dim(baseline.runId)}`);
166
+ console.log(
167
+ ` Success Rate: ${chalk.green(`${(baseline.metrics.successRate * 100).toFixed(1)}%`)}`
168
+ );
169
+ console.log(
170
+ ` Test Cases: ${baseline.metrics.passedCases}/${baseline.metrics.totalCases} passed`
171
+ );
172
+ if (baseline.tag) {
173
+ console.log(` Tag: ${chalk.dim(baseline.tag)}`);
174
+ }
175
+ console.log();
176
+ console.log(
177
+ chalk.dim('Future runs of this scenario will be compared against this baseline.')
178
+ );
179
+ } catch (error) {
180
+ spinner.fail('Error');
181
+ console.log();
182
+ console.log(
183
+ renderError({
184
+ title: 'Failed to Set Baseline',
185
+ reason: (error as Error).message,
186
+ suggestions: [
187
+ 'Check that the run ID exists',
188
+ 'Run "artemiskit history" to see available runs',
189
+ 'Verify storage configuration',
190
+ ],
191
+ })
192
+ );
193
+ process.exit(1);
194
+ }
195
+ });
196
+
197
+ return cmd;
198
+ }
199
+
200
+ /**
201
+ * Create the baseline list subcommand
202
+ */
203
+ function baselineListCommand(): Command {
204
+ const cmd = new Command('list');
205
+
206
+ cmd
207
+ .description('List all baselines')
208
+ .option('--config <path>', 'Path to config file')
209
+ .option('--json', 'Output as JSON')
210
+ .action(async (options: BaselineListOptions) => {
211
+ const spinner = createSpinner('Loading baselines...');
212
+ spinner.start();
213
+
214
+ try {
215
+ const config = await loadConfig(options.config);
216
+ const storage = createStorage({ fileConfig: config });
217
+
218
+ if (!isBaselineStorage(storage)) {
219
+ spinner.fail('Error');
220
+ console.log();
221
+ console.log(
222
+ renderError({
223
+ title: 'Baselines Not Supported',
224
+ reason: 'Current storage adapter does not support baseline management',
225
+ suggestions: ['Use local storage which supports baselines'],
226
+ })
227
+ );
228
+ process.exit(1);
229
+ }
230
+
231
+ const baselines = await storage.listBaselines();
232
+ spinner.succeed('Loaded baselines');
233
+ console.log();
234
+
235
+ if (baselines.length === 0) {
236
+ console.log(chalk.dim('No baselines set.'));
237
+ console.log();
238
+ console.log(chalk.dim('Set a baseline with:'));
239
+ console.log(chalk.dim(' artemiskit baseline set <run-id>'));
240
+ return;
241
+ }
242
+
243
+ if (options.json) {
244
+ console.log(JSON.stringify(baselines, null, 2));
245
+ return;
246
+ }
247
+
248
+ if (isTTY) {
249
+ console.log(renderBaselinesTable(baselines));
250
+ } else {
251
+ console.log(renderBaselinesPlain(baselines));
252
+ }
253
+
254
+ console.log();
255
+ console.log(
256
+ chalk.dim(`${baselines.length} baseline${baselines.length === 1 ? '' : 's'} configured`)
257
+ );
258
+ } catch (error) {
259
+ spinner.fail('Error');
260
+ console.log();
261
+ console.log(
262
+ renderError({
263
+ title: 'Failed to List Baselines',
264
+ reason: (error as Error).message,
265
+ suggestions: ['Verify storage configuration'],
266
+ })
267
+ );
268
+ process.exit(1);
269
+ }
270
+ });
271
+
272
+ return cmd;
273
+ }
274
+
275
+ /**
276
+ * Create the baseline remove subcommand
277
+ */
278
+ function baselineRemoveCommand(): Command {
279
+ const cmd = new Command('remove');
280
+
281
+ cmd
282
+ .description('Remove a baseline')
283
+ .argument('<identifier>', 'Run ID of the baseline to remove (or scenario name with --scenario)')
284
+ .option('--config <path>', 'Path to config file')
285
+ .option('-f, --force', 'Skip confirmation prompt')
286
+ .option('-s, --scenario', 'Treat identifier as scenario name instead of run ID')
287
+ .action(async (identifier: string, options: BaselineRemoveOptions & { scenario?: boolean }) => {
288
+ const spinner = createSpinner('Removing baseline...');
289
+
290
+ try {
291
+ const config = await loadConfig(options.config);
292
+ const storage = createStorage({ fileConfig: config });
293
+
294
+ if (!isBaselineStorage(storage)) {
295
+ console.log(
296
+ renderError({
297
+ title: 'Baselines Not Supported',
298
+ reason: 'Current storage adapter does not support baseline management',
299
+ suggestions: ['Use local storage which supports baselines'],
300
+ })
301
+ );
302
+ process.exit(1);
303
+ }
304
+
305
+ // Check if baseline exists first - by run ID or scenario name
306
+ const existing = options.scenario
307
+ ? await storage.getBaseline(identifier)
308
+ : await storage.getBaselineByRunId(identifier);
309
+
310
+ if (!existing) {
311
+ console.log();
312
+ console.log(
313
+ chalk.yellow(
314
+ options.scenario
315
+ ? `No baseline found for scenario: ${identifier}`
316
+ : `No baseline found with run ID: ${identifier}`
317
+ )
318
+ );
319
+ console.log();
320
+ console.log(chalk.dim('List baselines with:'));
321
+ console.log(chalk.dim(' artemiskit baseline list'));
322
+ process.exit(1);
323
+ }
324
+
325
+ // Confirm if not forced
326
+ if (!options.force && isTTY) {
327
+ const { promptConfirm } = await import('../ui/index.js');
328
+ const confirmed = await promptConfirm(
329
+ `Remove baseline for "${existing.scenario}"? (Run ID: ${existing.runId})`,
330
+ false
331
+ );
332
+ if (!confirmed) {
333
+ console.log(chalk.dim('Cancelled.'));
334
+ return;
335
+ }
336
+ }
337
+
338
+ spinner.start();
339
+ const removed = options.scenario
340
+ ? await storage.removeBaseline(identifier)
341
+ : await storage.removeBaselineByRunId(identifier);
342
+
343
+ if (removed) {
344
+ spinner.succeed('Baseline removed');
345
+ console.log();
346
+ console.log(`${icons.passed} Removed baseline for: ${chalk.cyan(existing.scenario)}`);
347
+ } else {
348
+ spinner.fail('Baseline not found');
349
+ }
350
+ } catch (error) {
351
+ spinner.fail('Error');
352
+ console.log();
353
+ console.log(
354
+ renderError({
355
+ title: 'Failed to Remove Baseline',
356
+ reason: (error as Error).message,
357
+ suggestions: [
358
+ 'Check the run ID or scenario name',
359
+ 'Run "artemiskit baseline list" to see baselines',
360
+ ],
361
+ })
362
+ );
363
+ process.exit(1);
364
+ }
365
+ });
366
+
367
+ return cmd;
368
+ }
369
+
370
+ /**
371
+ * Create the baseline get subcommand
372
+ */
373
+ function baselineGetCommand(): Command {
374
+ const cmd = new Command('get');
375
+
376
+ cmd
377
+ .description('Get baseline details')
378
+ .argument('<identifier>', 'Run ID of the baseline (or scenario name with --scenario)')
379
+ .option('--config <path>', 'Path to config file')
380
+ .option('--json', 'Output as JSON')
381
+ .option('-s, --scenario', 'Treat identifier as scenario name instead of run ID')
382
+ .action(
383
+ async (
384
+ identifier: string,
385
+ options: { config?: string; json?: boolean; scenario?: boolean }
386
+ ) => {
387
+ try {
388
+ const config = await loadConfig(options.config);
389
+ const storage = createStorage({ fileConfig: config });
390
+
391
+ if (!isBaselineStorage(storage)) {
392
+ console.log(
393
+ renderError({
394
+ title: 'Baselines Not Supported',
395
+ reason: 'Current storage adapter does not support baseline management',
396
+ suggestions: ['Use local storage which supports baselines'],
397
+ })
398
+ );
399
+ process.exit(1);
400
+ }
401
+
402
+ // Look up by run ID or scenario name
403
+ const baseline = options.scenario
404
+ ? await storage.getBaseline(identifier)
405
+ : await storage.getBaselineByRunId(identifier);
406
+
407
+ if (!baseline) {
408
+ console.log(
409
+ chalk.yellow(
410
+ options.scenario
411
+ ? `No baseline found for scenario: ${identifier}`
412
+ : `No baseline found with run ID: ${identifier}`
413
+ )
414
+ );
415
+ process.exit(1);
416
+ }
417
+
418
+ if (options.json) {
419
+ console.log(JSON.stringify(baseline, null, 2));
420
+ return;
421
+ }
422
+
423
+ console.log();
424
+ console.log(chalk.bold(`Baseline: ${baseline.scenario}`));
425
+ console.log();
426
+ console.log(` Run ID: ${baseline.runId}`);
427
+ console.log(` Created: ${new Date(baseline.createdAt).toLocaleString()}`);
428
+ console.log(
429
+ ` Success Rate: ${chalk.green(`${(baseline.metrics.successRate * 100).toFixed(1)}%`)}`
430
+ );
431
+ console.log(
432
+ ` Test Cases: ${baseline.metrics.passedCases}/${baseline.metrics.totalCases}`
433
+ );
434
+ console.log(` Latency: ${baseline.metrics.medianLatencyMs}ms (median)`);
435
+ console.log(` Tokens: ${baseline.metrics.totalTokens.toLocaleString()}`);
436
+ if (baseline.tag) {
437
+ console.log(` Tag: ${baseline.tag}`);
438
+ }
439
+ console.log();
440
+ } catch (error) {
441
+ console.log(
442
+ renderError({
443
+ title: 'Failed to Get Baseline',
444
+ reason: (error as Error).message,
445
+ suggestions: [
446
+ 'Check the run ID or scenario name',
447
+ 'Run "artemiskit baseline list" to see baselines',
448
+ ],
449
+ })
450
+ );
451
+ process.exit(1);
452
+ }
453
+ }
454
+ );
455
+
456
+ return cmd;
457
+ }
458
+
459
+ /**
460
+ * Create the main baseline command with subcommands
461
+ */
462
+ export function baselineCommand(): Command {
463
+ const cmd = new Command('baseline');
464
+
465
+ cmd.description('Manage baseline runs for regression detection');
466
+
467
+ cmd.addCommand(baselineSetCommand());
468
+ cmd.addCommand(baselineListCommand());
469
+ cmd.addCommand(baselineRemoveCommand());
470
+ cmd.addCommand(baselineGetCommand());
471
+
472
+ return cmd;
473
+ }