@artemiskit/cli 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +182 -0
- package/adapters/openai/dist/index.js +5626 -0
- package/dist/index.js +2947 -509
- package/dist/src/adapters.d.ts.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/commands/redteam.d.ts +5 -0
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/run.d.ts.map +1 -1
- package/dist/src/commands/validate.d.ts +6 -0
- package/dist/src/commands/validate.d.ts.map +1 -0
- package/dist/src/config/schema.d.ts +32 -0
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/utils/adapter.d.ts.map +1 -1
- package/package.json +8 -6
- package/src/__tests__/integration/ui.test.ts +17 -17
- package/src/adapters.ts +30 -0
- package/src/cli.ts +2 -0
- package/src/commands/redteam.ts +174 -17
- package/src/commands/run.ts +20 -11
- package/src/commands/validate.ts +254 -0
- package/src/config/schema.ts +6 -0
- package/src/utils/adapter.ts +167 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAqDtD"}
|
package/dist/src/cli.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAapC,wBAAgB,SAAS,IAAI,OAAO,CAyCnC"}
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Redteam command - Run red-team adversarial tests
|
|
3
|
+
*
|
|
4
|
+
* Supports OWASP LLM Top 10 2025 security testing with new flags:
|
|
5
|
+
* --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
|
|
6
|
+
* --owasp-full: Full OWASP compliance scan
|
|
7
|
+
* --min-severity: Filter attacks by minimum severity level
|
|
3
8
|
*/
|
|
4
9
|
import { Command } from 'commander';
|
|
5
10
|
export declare function redteamCommand(): Command;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAgDH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAwCpC,wBAAgB,cAAc,IAAI,OAAO,CA8exC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA0iBpC,wBAAgB,UAAU,IAAI,OAAO,CAwgBpC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../../src/commands/validate.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAWpC,wBAAgB,eAAe,IAAI,OAAO,CAkHzC"}
|
|
@@ -15,6 +15,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
15
15
|
embeddingDeploymentName: z.ZodOptional<z.ZodString>;
|
|
16
16
|
modelFamily: z.ZodOptional<z.ZodString>;
|
|
17
17
|
underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
|
|
18
|
+
name: z.ZodOptional<z.ZodString>;
|
|
19
|
+
runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
|
|
20
|
+
captureTraces: z.ZodOptional<z.ZodBoolean>;
|
|
21
|
+
captureMessages: z.ZodOptional<z.ZodBoolean>;
|
|
18
22
|
}, "strip", z.ZodTypeAny, {
|
|
19
23
|
apiKey?: string | undefined;
|
|
20
24
|
baseUrl?: string | undefined;
|
|
@@ -28,6 +32,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
28
32
|
embeddingDeploymentName?: string | undefined;
|
|
29
33
|
modelFamily?: string | undefined;
|
|
30
34
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
35
|
+
name?: string | undefined;
|
|
36
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
37
|
+
captureTraces?: boolean | undefined;
|
|
38
|
+
captureMessages?: boolean | undefined;
|
|
31
39
|
}, {
|
|
32
40
|
apiKey?: string | undefined;
|
|
33
41
|
baseUrl?: string | undefined;
|
|
@@ -41,6 +49,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
41
49
|
embeddingDeploymentName?: string | undefined;
|
|
42
50
|
modelFamily?: string | undefined;
|
|
43
51
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
52
|
+
name?: string | undefined;
|
|
53
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
54
|
+
captureTraces?: boolean | undefined;
|
|
55
|
+
captureMessages?: boolean | undefined;
|
|
44
56
|
}>;
|
|
45
57
|
declare const StorageConfigSchema: z.ZodObject<{
|
|
46
58
|
type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
|
|
@@ -78,6 +90,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
78
90
|
embeddingDeploymentName: z.ZodOptional<z.ZodString>;
|
|
79
91
|
modelFamily: z.ZodOptional<z.ZodString>;
|
|
80
92
|
underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
|
|
93
|
+
name: z.ZodOptional<z.ZodString>;
|
|
94
|
+
runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
|
|
95
|
+
captureTraces: z.ZodOptional<z.ZodBoolean>;
|
|
96
|
+
captureMessages: z.ZodOptional<z.ZodBoolean>;
|
|
81
97
|
}, "strip", z.ZodTypeAny, {
|
|
82
98
|
apiKey?: string | undefined;
|
|
83
99
|
baseUrl?: string | undefined;
|
|
@@ -91,6 +107,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
91
107
|
embeddingDeploymentName?: string | undefined;
|
|
92
108
|
modelFamily?: string | undefined;
|
|
93
109
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
110
|
+
name?: string | undefined;
|
|
111
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
112
|
+
captureTraces?: boolean | undefined;
|
|
113
|
+
captureMessages?: boolean | undefined;
|
|
94
114
|
}, {
|
|
95
115
|
apiKey?: string | undefined;
|
|
96
116
|
baseUrl?: string | undefined;
|
|
@@ -104,6 +124,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
104
124
|
embeddingDeploymentName?: string | undefined;
|
|
105
125
|
modelFamily?: string | undefined;
|
|
106
126
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
127
|
+
name?: string | undefined;
|
|
128
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
129
|
+
captureTraces?: boolean | undefined;
|
|
130
|
+
captureMessages?: boolean | undefined;
|
|
107
131
|
}>>>;
|
|
108
132
|
storage: z.ZodOptional<z.ZodObject<{
|
|
109
133
|
type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
|
|
@@ -169,6 +193,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
169
193
|
embeddingDeploymentName?: string | undefined;
|
|
170
194
|
modelFamily?: string | undefined;
|
|
171
195
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
196
|
+
name?: string | undefined;
|
|
197
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
198
|
+
captureTraces?: boolean | undefined;
|
|
199
|
+
captureMessages?: boolean | undefined;
|
|
172
200
|
}> | undefined;
|
|
173
201
|
storage?: {
|
|
174
202
|
type: "supabase" | "local";
|
|
@@ -204,6 +232,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
204
232
|
embeddingDeploymentName?: string | undefined;
|
|
205
233
|
modelFamily?: string | undefined;
|
|
206
234
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
235
|
+
name?: string | undefined;
|
|
236
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
237
|
+
captureTraces?: boolean | undefined;
|
|
238
|
+
captureMessages?: boolean | undefined;
|
|
207
239
|
}> | undefined;
|
|
208
240
|
storage?: {
|
|
209
241
|
type?: "supabase" | "local" | undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAwBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CAsGrF;AAqgBD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@artemiskit/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -45,11 +45,13 @@
|
|
|
45
45
|
"test": "bun test"
|
|
46
46
|
},
|
|
47
47
|
"dependencies": {
|
|
48
|
-
"@artemiskit/adapter-
|
|
49
|
-
"@artemiskit/adapter-
|
|
50
|
-
"@artemiskit/
|
|
51
|
-
"@artemiskit/
|
|
52
|
-
"@artemiskit/
|
|
48
|
+
"@artemiskit/adapter-deepagents": "workspace:*",
|
|
49
|
+
"@artemiskit/adapter-langchain": "workspace:*",
|
|
50
|
+
"@artemiskit/adapter-openai": "0.1.12",
|
|
51
|
+
"@artemiskit/adapter-vercel-ai": "0.1.12",
|
|
52
|
+
"@artemiskit/core": "0.3.0",
|
|
53
|
+
"@artemiskit/redteam": "0.3.0",
|
|
54
|
+
"@artemiskit/reports": "0.3.0",
|
|
53
55
|
"chalk": "^5.3.0",
|
|
54
56
|
"cli-table3": "^0.6.3",
|
|
55
57
|
"commander": "^12.0.0",
|
|
@@ -62,7 +62,7 @@ describe('UI Components', () => {
|
|
|
62
62
|
expect(panel).toContain('TEST RESULTS');
|
|
63
63
|
});
|
|
64
64
|
|
|
65
|
-
it('should
|
|
65
|
+
it('should render panel with consistent formatting', () => {
|
|
66
66
|
const panel = renderSummaryPanel({
|
|
67
67
|
passed: 5,
|
|
68
68
|
failed: 0,
|
|
@@ -71,11 +71,11 @@ describe('UI Components', () => {
|
|
|
71
71
|
duration: 5000,
|
|
72
72
|
});
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
expect(panel).toContain('
|
|
77
|
-
expect(panel).toContain('
|
|
78
|
-
expect(panel).toContain('
|
|
74
|
+
// Panel should contain key information regardless of formatting style
|
|
75
|
+
// (box-drawing in TTY mode, ASCII fallback in non-TTY)
|
|
76
|
+
expect(panel).toContain('Passed');
|
|
77
|
+
expect(panel).toContain('5');
|
|
78
|
+
expect(panel).toContain('100');
|
|
79
79
|
});
|
|
80
80
|
|
|
81
81
|
it('should support custom title', () => {
|
|
@@ -116,16 +116,17 @@ describe('UI Components', () => {
|
|
|
116
116
|
expect(error).toContain('Suggestions');
|
|
117
117
|
});
|
|
118
118
|
|
|
119
|
-
it('should
|
|
119
|
+
it('should render error with consistent formatting', () => {
|
|
120
120
|
const error = renderError({
|
|
121
121
|
title: 'Test Error',
|
|
122
122
|
reason: 'Test reason',
|
|
123
123
|
});
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
expect(error).toContain('
|
|
128
|
-
expect(error).toContain('
|
|
125
|
+
// Error should contain key information regardless of formatting style
|
|
126
|
+
// (box-drawing in TTY mode, ASCII fallback in non-TTY)
|
|
127
|
+
expect(error).toContain('ERROR');
|
|
128
|
+
expect(error).toContain('Test Error');
|
|
129
|
+
expect(error).toContain('Test reason');
|
|
129
130
|
});
|
|
130
131
|
});
|
|
131
132
|
|
|
@@ -138,14 +139,13 @@ describe('UI Components', () => {
|
|
|
138
139
|
expect(box).toContain('Line 2');
|
|
139
140
|
});
|
|
140
141
|
|
|
141
|
-
it('should
|
|
142
|
+
it('should render info box with consistent formatting', () => {
|
|
142
143
|
const box = renderInfoBox('Test', ['content']);
|
|
143
144
|
|
|
144
|
-
//
|
|
145
|
-
|
|
146
|
-
expect(box).toContain('
|
|
147
|
-
expect(box).toContain('
|
|
148
|
-
expect(box).toContain('┘');
|
|
145
|
+
// Info box should contain key information regardless of formatting style
|
|
146
|
+
// (box-drawing in TTY mode, ASCII fallback in non-TTY)
|
|
147
|
+
expect(box).toContain('Test');
|
|
148
|
+
expect(box).toContain('content');
|
|
149
149
|
});
|
|
150
150
|
});
|
|
151
151
|
|
package/src/adapters.ts
CHANGED
|
@@ -23,6 +23,36 @@ export async function registerAdapters(): Promise<void> {
|
|
|
23
23
|
return new VercelAIAdapter(config);
|
|
24
24
|
});
|
|
25
25
|
|
|
26
|
+
// LangChain adapter - requires runnable via metadata
|
|
27
|
+
adapterRegistry.register('langchain', async (config: AdapterConfig): Promise<ModelClient> => {
|
|
28
|
+
// Dynamic import to avoid bundling LangChain dependencies
|
|
29
|
+
// biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid runnable
|
|
30
|
+
const { LangChainAdapter } = (await import('@artemiskit/adapter-langchain')) as any;
|
|
31
|
+
const runnable = (config as { metadata?: { runnable?: unknown } }).metadata?.runnable;
|
|
32
|
+
if (!runnable) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
'LangChain adapter requires a runnable instance. ' +
|
|
35
|
+
'Pass it via config.metadata.runnable or use createLangChainAdapter() directly.'
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
return new LangChainAdapter(config, runnable);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// DeepAgents adapter - requires system via metadata
|
|
42
|
+
adapterRegistry.register('deepagents', async (config: AdapterConfig): Promise<ModelClient> => {
|
|
43
|
+
// Dynamic import to avoid bundling DeepAgents dependencies
|
|
44
|
+
// biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid system
|
|
45
|
+
const { DeepAgentsAdapter } = (await import('@artemiskit/adapter-deepagents')) as any;
|
|
46
|
+
const system = (config as { metadata?: { system?: unknown } }).metadata?.system;
|
|
47
|
+
if (!system) {
|
|
48
|
+
throw new Error(
|
|
49
|
+
'DeepAgents adapter requires a system instance. ' +
|
|
50
|
+
'Pass it via config.metadata.system or use createDeepAgentsAdapter() directly.'
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
return new DeepAgentsAdapter(config, system);
|
|
54
|
+
});
|
|
55
|
+
|
|
26
56
|
// Mark post-MVP adapters as unavailable
|
|
27
57
|
adapterRegistry.markUnavailable('anthropic', 'Anthropic adapter coming in v0.2.0');
|
|
28
58
|
adapterRegistry.markUnavailable('google', 'Google adapter coming in v0.3.0');
|
package/src/cli.ts
CHANGED
|
@@ -12,6 +12,7 @@ import { redteamCommand } from './commands/redteam';
|
|
|
12
12
|
import { reportCommand } from './commands/report';
|
|
13
13
|
import { runCommand } from './commands/run';
|
|
14
14
|
import { stressCommand } from './commands/stress';
|
|
15
|
+
import { validateCommand } from './commands/validate';
|
|
15
16
|
import { checkForUpdate, formatUpdateMessage, formatVersionDisplay } from './utils/update-checker';
|
|
16
17
|
|
|
17
18
|
export function createCLI(): Command {
|
|
@@ -46,6 +47,7 @@ export function createCLI(): Command {
|
|
|
46
47
|
|
|
47
48
|
program.addCommand(initCommand());
|
|
48
49
|
program.addCommand(runCommand());
|
|
50
|
+
program.addCommand(validateCommand());
|
|
49
51
|
program.addCommand(baselineCommand());
|
|
50
52
|
program.addCommand(compareCommand());
|
|
51
53
|
program.addCommand(historyCommand());
|
package/src/commands/redteam.ts
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Redteam command - Run red-team adversarial tests
|
|
3
|
+
*
|
|
4
|
+
* Supports OWASP LLM Top 10 2025 security testing with new flags:
|
|
5
|
+
* --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
|
|
6
|
+
* --owasp-full: Full OWASP compliance scan
|
|
7
|
+
* --min-severity: Filter attacks by minimum severity level
|
|
3
8
|
*/
|
|
4
9
|
|
|
5
10
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
@@ -19,22 +24,32 @@ import {
|
|
|
19
24
|
parseScenarioFile,
|
|
20
25
|
} from '@artemiskit/core';
|
|
21
26
|
import {
|
|
27
|
+
BadLikertJudgeMutation,
|
|
22
28
|
type ConversationTurn,
|
|
23
29
|
CotInjectionMutation,
|
|
30
|
+
CrescendoMutation,
|
|
31
|
+
DeceptiveDelightMutation,
|
|
24
32
|
EncodingMutation,
|
|
33
|
+
ExcessiveAgencyMutation,
|
|
34
|
+
HallucinationTrapMutation,
|
|
25
35
|
InstructionFlipMutation,
|
|
26
36
|
MultiTurnMutation,
|
|
27
37
|
type Mutation,
|
|
38
|
+
OWASP_CATEGORIES,
|
|
39
|
+
OutputInjectionMutation,
|
|
28
40
|
RedTeamGenerator,
|
|
29
41
|
RoleSpoofMutation,
|
|
30
42
|
SeverityMapper,
|
|
43
|
+
SystemExtractionMutation,
|
|
31
44
|
TypoMutation,
|
|
32
45
|
UnsafeResponseDetector,
|
|
46
|
+
getMutationsForCategory,
|
|
33
47
|
loadCustomAttacks,
|
|
34
48
|
} from '@artemiskit/redteam';
|
|
35
49
|
import {
|
|
36
50
|
generateJSONReport,
|
|
37
51
|
generateRedTeamHTMLReport,
|
|
52
|
+
generateRedTeamJUnitReport,
|
|
38
53
|
generateRedTeamMarkdownReport,
|
|
39
54
|
} from '@artemiskit/reports';
|
|
40
55
|
import chalk from 'chalk';
|
|
@@ -70,8 +85,12 @@ interface RedteamOptions {
|
|
|
70
85
|
config?: string;
|
|
71
86
|
redact?: boolean;
|
|
72
87
|
redactPatterns?: string[];
|
|
73
|
-
export?: 'markdown';
|
|
88
|
+
export?: 'markdown' | 'junit';
|
|
74
89
|
exportOutput?: string;
|
|
90
|
+
// OWASP options
|
|
91
|
+
owasp?: string[];
|
|
92
|
+
owaspFull?: boolean;
|
|
93
|
+
minSeverity?: 'low' | 'medium' | 'high' | 'critical';
|
|
75
94
|
}
|
|
76
95
|
|
|
77
96
|
export function redteamCommand(): Command {
|
|
@@ -84,7 +103,7 @@ export function redteamCommand(): Command {
|
|
|
84
103
|
.option('-m, --model <model>', 'Model to use')
|
|
85
104
|
.option(
|
|
86
105
|
'--mutations <mutations...>',
|
|
87
|
-
'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn)'
|
|
106
|
+
'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn, bad-likert-judge, crescendo, deceptive-delight, output-injection, excessive-agency, system-extraction, hallucination-trap)'
|
|
88
107
|
)
|
|
89
108
|
.option('-c, --count <number>', 'Number of mutated prompts per case', '5')
|
|
90
109
|
.option('--custom-attacks <path>', 'Path to custom attacks YAML file')
|
|
@@ -97,8 +116,18 @@ export function redteamCommand(): Command {
|
|
|
97
116
|
'--redact-patterns <patterns...>',
|
|
98
117
|
'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
|
|
99
118
|
)
|
|
100
|
-
.option('--export <format>', 'Export results to format (markdown)')
|
|
119
|
+
.option('--export <format>', 'Export results to format (markdown or junit)')
|
|
101
120
|
.option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
|
|
121
|
+
// OWASP options
|
|
122
|
+
.option(
|
|
123
|
+
'--owasp <categories...>',
|
|
124
|
+
'Test specific OWASP LLM Top 10 categories (e.g., LLM01, LLM05, LLM06)'
|
|
125
|
+
)
|
|
126
|
+
.option('--owasp-full', 'Run full OWASP LLM Top 10 compliance scan (all applicable categories)')
|
|
127
|
+
.option(
|
|
128
|
+
'--min-severity <level>',
|
|
129
|
+
'Minimum severity level for attacks (low, medium, high, critical)'
|
|
130
|
+
)
|
|
102
131
|
.action(async (scenarioPath: string, options: RedteamOptions) => {
|
|
103
132
|
const spinner = createSpinner('Loading configuration...');
|
|
104
133
|
spinner.start();
|
|
@@ -144,8 +173,15 @@ export function redteamCommand(): Command {
|
|
|
144
173
|
const client = await createAdapter(adapterConfig);
|
|
145
174
|
spinner.succeed(`Connected to ${provider}`);
|
|
146
175
|
|
|
147
|
-
// Set up mutations
|
|
148
|
-
const mutations = selectMutations(
|
|
176
|
+
// Set up mutations - check for OWASP flags first
|
|
177
|
+
const mutations = selectMutations({
|
|
178
|
+
names: options.mutations,
|
|
179
|
+
customAttacksPath: options.customAttacks,
|
|
180
|
+
owaspCategories: options.owasp,
|
|
181
|
+
owaspFull: options.owaspFull,
|
|
182
|
+
minSeverity: options.minSeverity,
|
|
183
|
+
});
|
|
184
|
+
|
|
149
185
|
const generator = new RedTeamGenerator(mutations);
|
|
150
186
|
const detector = new UnsafeResponseDetector();
|
|
151
187
|
const count = Number.parseInt(String(options.count)) || 5;
|
|
@@ -157,6 +193,14 @@ export function redteamCommand(): Command {
|
|
|
157
193
|
`Prompts per case: ${count}`,
|
|
158
194
|
`Total cases: ${scenario.cases.length}`,
|
|
159
195
|
];
|
|
196
|
+
if (options.owasp || options.owaspFull) {
|
|
197
|
+
configLines.push(
|
|
198
|
+
`OWASP Mode: ${options.owaspFull ? 'Full Compliance Scan' : options.owasp?.join(', ')}`
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
if (options.minSeverity) {
|
|
202
|
+
configLines.push(`Min Severity: ${options.minSeverity}`);
|
|
203
|
+
}
|
|
160
204
|
if (options.redact) {
|
|
161
205
|
configLines.push(
|
|
162
206
|
`Redaction: enabled${options.redactPatterns ? ` (${options.redactPatterns.join(', ')})` : ''}`
|
|
@@ -416,6 +460,10 @@ export function redteamCommand(): Command {
|
|
|
416
460
|
model: resolvedConfig.model,
|
|
417
461
|
mutations: mutations.map((m) => m.name),
|
|
418
462
|
count_per_case: count,
|
|
463
|
+
// Include OWASP info in config
|
|
464
|
+
...(options.owaspFull && { owasp_mode: 'full' }),
|
|
465
|
+
...(options.owasp && { owasp_categories: options.owasp }),
|
|
466
|
+
...(options.minSeverity && { min_severity: options.minSeverity }),
|
|
419
467
|
},
|
|
420
468
|
resolved_config: resolvedConfig,
|
|
421
469
|
metrics,
|
|
@@ -503,14 +551,22 @@ export function redteamCommand(): Command {
|
|
|
503
551
|
console.log(chalk.dim(` JSON: ${jsonPath}`));
|
|
504
552
|
}
|
|
505
553
|
|
|
506
|
-
// Export
|
|
507
|
-
if (options.export
|
|
554
|
+
// Export if requested
|
|
555
|
+
if (options.export) {
|
|
508
556
|
const exportDir = options.exportOutput || './artemis-exports';
|
|
509
557
|
await mkdir(exportDir, { recursive: true });
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
558
|
+
|
|
559
|
+
if (options.export === 'markdown') {
|
|
560
|
+
const markdown = generateRedTeamMarkdownReport(manifest);
|
|
561
|
+
const mdPath = join(exportDir, `${runId}.md`);
|
|
562
|
+
await writeFile(mdPath, markdown);
|
|
563
|
+
console.log(chalk.dim(`Exported: ${mdPath}`));
|
|
564
|
+
} else if (options.export === 'junit') {
|
|
565
|
+
const junit = generateRedTeamJUnitReport(manifest);
|
|
566
|
+
const junitPath = join(exportDir, `${runId}.xml`);
|
|
567
|
+
await writeFile(junitPath, junit);
|
|
568
|
+
console.log(chalk.dim(`Exported: ${junitPath}`));
|
|
569
|
+
}
|
|
514
570
|
}
|
|
515
571
|
|
|
516
572
|
// Exit with error if there were unsafe responses
|
|
@@ -533,22 +589,123 @@ export function redteamCommand(): Command {
|
|
|
533
589
|
return cmd;
|
|
534
590
|
}
|
|
535
591
|
|
|
536
|
-
|
|
537
|
-
|
|
592
|
+
/**
|
|
593
|
+
* All available mutations registry
|
|
594
|
+
*/
|
|
595
|
+
function getAllMutations(): Record<string, Mutation> {
|
|
596
|
+
return {
|
|
597
|
+
// Core mutations (v0.1.x - v0.2.x)
|
|
538
598
|
typo: new TypoMutation(),
|
|
539
599
|
'role-spoof': new RoleSpoofMutation(),
|
|
540
600
|
'instruction-flip': new InstructionFlipMutation(),
|
|
541
601
|
'cot-injection': new CotInjectionMutation(),
|
|
542
602
|
encoding: new EncodingMutation(),
|
|
543
603
|
'multi-turn': new MultiTurnMutation(),
|
|
604
|
+
|
|
605
|
+
// OWASP LLM Top 10 2025 mutations (v0.3.0)
|
|
606
|
+
// LLM01 - Prompt Injection
|
|
607
|
+
'bad-likert-judge': new BadLikertJudgeMutation(),
|
|
608
|
+
crescendo: new CrescendoMutation(),
|
|
609
|
+
'deceptive-delight': new DeceptiveDelightMutation(),
|
|
610
|
+
|
|
611
|
+
// LLM05 - Insecure Output Handling
|
|
612
|
+
'output-injection': new OutputInjectionMutation(),
|
|
613
|
+
|
|
614
|
+
// LLM06 - Excessive Agency
|
|
615
|
+
'excessive-agency': new ExcessiveAgencyMutation(),
|
|
616
|
+
|
|
617
|
+
// LLM07 - System Prompt Leakage
|
|
618
|
+
'system-extraction': new SystemExtractionMutation(),
|
|
619
|
+
|
|
620
|
+
// LLM09 - Misinformation
|
|
621
|
+
'hallucination-trap': new HallucinationTrapMutation(),
|
|
544
622
|
};
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/**
|
|
626
|
+
* Get OWASP mutations for specific categories
|
|
627
|
+
*/
|
|
628
|
+
function getOwaspMutations(categories: string[]): string[] {
|
|
629
|
+
const mutationNames = new Set<string>();
|
|
630
|
+
|
|
631
|
+
for (const category of categories) {
|
|
632
|
+
const upperCategory = category.toUpperCase();
|
|
633
|
+
if (upperCategory in OWASP_CATEGORIES) {
|
|
634
|
+
const mutations = getMutationsForCategory(upperCategory as keyof typeof OWASP_CATEGORIES);
|
|
635
|
+
for (const mutation of mutations) {
|
|
636
|
+
mutationNames.add(mutation);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
}
|
|
545
640
|
|
|
546
|
-
|
|
641
|
+
return Array.from(mutationNames);
|
|
642
|
+
}
|
|
547
643
|
|
|
548
|
-
|
|
549
|
-
|
|
644
|
+
/**
|
|
645
|
+
* Get all OWASP mutations
|
|
646
|
+
*/
|
|
647
|
+
function getAllOwaspMutations(): string[] {
|
|
648
|
+
return [
|
|
649
|
+
'bad-likert-judge',
|
|
650
|
+
'crescendo',
|
|
651
|
+
'deceptive-delight',
|
|
652
|
+
'output-injection',
|
|
653
|
+
'excessive-agency',
|
|
654
|
+
'system-extraction',
|
|
655
|
+
'hallucination-trap',
|
|
656
|
+
];
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
interface SelectMutationsOptions {
|
|
660
|
+
names?: string[];
|
|
661
|
+
customAttacksPath?: string;
|
|
662
|
+
owaspCategories?: string[];
|
|
663
|
+
owaspFull?: boolean;
|
|
664
|
+
minSeverity?: 'low' | 'medium' | 'high' | 'critical';
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
function selectMutations(options: SelectMutationsOptions): Mutation[] {
|
|
668
|
+
const { names, customAttacksPath, owaspCategories, owaspFull, minSeverity } = options;
|
|
669
|
+
|
|
670
|
+
const allMutations = getAllMutations();
|
|
671
|
+
let selectedNames: string[] = [];
|
|
672
|
+
|
|
673
|
+
// Determine which mutations to use based on options
|
|
674
|
+
if (owaspFull) {
|
|
675
|
+
// Full OWASP scan - use all OWASP mutations
|
|
676
|
+
selectedNames = getAllOwaspMutations();
|
|
677
|
+
} else if (owaspCategories && owaspCategories.length > 0) {
|
|
678
|
+
// Specific OWASP categories
|
|
679
|
+
selectedNames = getOwaspMutations(owaspCategories);
|
|
680
|
+
} else if (names && names.length > 0) {
|
|
681
|
+
// Explicit mutation names
|
|
682
|
+
selectedNames = names;
|
|
550
683
|
} else {
|
|
551
|
-
mutations
|
|
684
|
+
// Default: use core mutations (not OWASP ones to maintain backward compatibility)
|
|
685
|
+
selectedNames = [
|
|
686
|
+
'typo',
|
|
687
|
+
'role-spoof',
|
|
688
|
+
'instruction-flip',
|
|
689
|
+
'cot-injection',
|
|
690
|
+
'encoding',
|
|
691
|
+
'multi-turn',
|
|
692
|
+
];
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Filter to valid mutation names
|
|
696
|
+
let mutations = selectedNames
|
|
697
|
+
.filter((name) => name in allMutations)
|
|
698
|
+
.map((name) => allMutations[name]);
|
|
699
|
+
|
|
700
|
+
// Apply severity filter if specified
|
|
701
|
+
if (minSeverity) {
|
|
702
|
+
const severityOrder = ['low', 'medium', 'high', 'critical'];
|
|
703
|
+
const minIndex = severityOrder.indexOf(minSeverity);
|
|
704
|
+
|
|
705
|
+
mutations = mutations.filter((m) => {
|
|
706
|
+
const mutationIndex = severityOrder.indexOf(m.severity);
|
|
707
|
+
return mutationIndex >= minIndex;
|
|
708
|
+
});
|
|
552
709
|
}
|
|
553
710
|
|
|
554
711
|
// Load custom attacks if path provided
|
package/src/commands/run.ts
CHANGED
|
@@ -15,7 +15,7 @@ import {
|
|
|
15
15
|
resolveScenarioPaths,
|
|
16
16
|
runScenario,
|
|
17
17
|
} from '@artemiskit/core';
|
|
18
|
-
import { generateMarkdownReport } from '@artemiskit/reports';
|
|
18
|
+
import { generateJUnitReport, generateMarkdownReport } from '@artemiskit/reports';
|
|
19
19
|
import chalk from 'chalk';
|
|
20
20
|
import { Command } from 'commander';
|
|
21
21
|
import { loadConfig } from '../config/loader.js';
|
|
@@ -68,8 +68,8 @@ interface RunOptions {
|
|
|
68
68
|
threshold?: number;
|
|
69
69
|
/** Budget limit in USD - fail if cost exceeds this */
|
|
70
70
|
budget?: number;
|
|
71
|
-
/** Export format: markdown */
|
|
72
|
-
export?: 'markdown';
|
|
71
|
+
/** Export format: markdown or junit */
|
|
72
|
+
export?: 'markdown' | 'junit';
|
|
73
73
|
/** Output directory for exports */
|
|
74
74
|
exportOutput?: string;
|
|
75
75
|
}
|
|
@@ -554,7 +554,8 @@ async function runScenariosInParallel(
|
|
|
554
554
|
while (queue.length > 0 || inProgress.size > 0) {
|
|
555
555
|
// Start new tasks up to the limit
|
|
556
556
|
while (queue.length > 0 && inProgress.size < parallelLimit) {
|
|
557
|
-
const path = queue.shift()
|
|
557
|
+
const path = queue.shift();
|
|
558
|
+
if (!path) break;
|
|
558
559
|
const promise = processScenario(path).then(() => {
|
|
559
560
|
inProgress.delete(promise);
|
|
560
561
|
});
|
|
@@ -607,7 +608,7 @@ export function runCommand(): Command {
|
|
|
607
608
|
.option('--baseline', 'Compare against baseline and detect regression')
|
|
608
609
|
.option('--threshold <number>', 'Regression threshold (0-1), e.g., 0.05 for 5%', '0.05')
|
|
609
610
|
.option('--budget <amount>', 'Maximum budget in USD - fail if estimated cost exceeds this')
|
|
610
|
-
.option('--export <format>', 'Export format: markdown')
|
|
611
|
+
.option('--export <format>', 'Export format: markdown or junit (for CI integration)')
|
|
611
612
|
.option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
|
|
612
613
|
.action(async (scenarioPath: string | undefined, options: RunOptions) => {
|
|
613
614
|
// Determine CI mode: explicit flag, environment variable, or summary format that implies CI
|
|
@@ -819,14 +820,22 @@ export function runCommand(): Command {
|
|
|
819
820
|
console.log(chalk.dim(`Saved: ${savedPath}`));
|
|
820
821
|
}
|
|
821
822
|
|
|
822
|
-
// Export
|
|
823
|
-
if (options.export
|
|
823
|
+
// Export if requested
|
|
824
|
+
if (options.export) {
|
|
824
825
|
const exportDir = options.exportOutput || './artemis-exports';
|
|
825
826
|
await mkdir(exportDir, { recursive: true });
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
827
|
+
|
|
828
|
+
if (options.export === 'markdown') {
|
|
829
|
+
const markdown = generateMarkdownReport(result.manifest);
|
|
830
|
+
const mdPath = join(exportDir, `${result.manifest.run_id}.md`);
|
|
831
|
+
await writeFile(mdPath, markdown);
|
|
832
|
+
console.log(chalk.dim(`Exported: ${mdPath}`));
|
|
833
|
+
} else if (options.export === 'junit') {
|
|
834
|
+
const junit = generateJUnitReport(result.manifest);
|
|
835
|
+
const junitPath = join(exportDir, `${result.manifest.run_id}.xml`);
|
|
836
|
+
await writeFile(junitPath, junit);
|
|
837
|
+
console.log(chalk.dim(`Exported: ${junitPath}`));
|
|
838
|
+
}
|
|
830
839
|
}
|
|
831
840
|
} catch (error) {
|
|
832
841
|
// Record failed scenario
|