@artemiskit/cli 0.2.4 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -0
- package/adapters/openai/dist/index.js +5626 -0
- package/dist/index.js +1874 -50
- package/dist/src/adapters.d.ts.map +1 -1
- package/dist/src/commands/redteam.d.ts +5 -0
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/run.d.ts.map +1 -1
- package/dist/src/config/schema.d.ts +32 -0
- package/dist/src/config/schema.d.ts.map +1 -1
- package/dist/src/utils/adapter.d.ts.map +1 -1
- package/package.json +8 -6
- package/src/__tests__/integration/ui.test.ts +17 -17
- package/src/adapters.ts +30 -0
- package/src/commands/redteam.ts +157 -9
- package/src/commands/run.ts +2 -1
- package/src/config/schema.ts +6 -0
- package/src/utils/adapter.ts +167 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAqDtD"}
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Redteam command - Run red-team adversarial tests
|
|
3
|
+
*
|
|
4
|
+
* Supports OWASP LLM Top 10 2025 security testing with new flags:
|
|
5
|
+
* --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
|
|
6
|
+
* --owasp-full: Full OWASP compliance scan
|
|
7
|
+
* --min-severity: Filter attacks by minimum severity level
|
|
3
8
|
*/
|
|
4
9
|
import { Command } from 'commander';
|
|
5
10
|
export declare function redteamCommand(): Command;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAgDH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAwCpC,wBAAgB,cAAc,IAAI,OAAO,CA8exC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA0iBpC,wBAAgB,UAAU,IAAI,OAAO,CAwgBpC"}
|
|
@@ -15,6 +15,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
15
15
|
embeddingDeploymentName: z.ZodOptional<z.ZodString>;
|
|
16
16
|
modelFamily: z.ZodOptional<z.ZodString>;
|
|
17
17
|
underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
|
|
18
|
+
name: z.ZodOptional<z.ZodString>;
|
|
19
|
+
runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
|
|
20
|
+
captureTraces: z.ZodOptional<z.ZodBoolean>;
|
|
21
|
+
captureMessages: z.ZodOptional<z.ZodBoolean>;
|
|
18
22
|
}, "strip", z.ZodTypeAny, {
|
|
19
23
|
apiKey?: string | undefined;
|
|
20
24
|
baseUrl?: string | undefined;
|
|
@@ -28,6 +32,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
28
32
|
embeddingDeploymentName?: string | undefined;
|
|
29
33
|
modelFamily?: string | undefined;
|
|
30
34
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
35
|
+
name?: string | undefined;
|
|
36
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
37
|
+
captureTraces?: boolean | undefined;
|
|
38
|
+
captureMessages?: boolean | undefined;
|
|
31
39
|
}, {
|
|
32
40
|
apiKey?: string | undefined;
|
|
33
41
|
baseUrl?: string | undefined;
|
|
@@ -41,6 +49,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
|
|
|
41
49
|
embeddingDeploymentName?: string | undefined;
|
|
42
50
|
modelFamily?: string | undefined;
|
|
43
51
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
52
|
+
name?: string | undefined;
|
|
53
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
54
|
+
captureTraces?: boolean | undefined;
|
|
55
|
+
captureMessages?: boolean | undefined;
|
|
44
56
|
}>;
|
|
45
57
|
declare const StorageConfigSchema: z.ZodObject<{
|
|
46
58
|
type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
|
|
@@ -78,6 +90,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
78
90
|
embeddingDeploymentName: z.ZodOptional<z.ZodString>;
|
|
79
91
|
modelFamily: z.ZodOptional<z.ZodString>;
|
|
80
92
|
underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
|
|
93
|
+
name: z.ZodOptional<z.ZodString>;
|
|
94
|
+
runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
|
|
95
|
+
captureTraces: z.ZodOptional<z.ZodBoolean>;
|
|
96
|
+
captureMessages: z.ZodOptional<z.ZodBoolean>;
|
|
81
97
|
}, "strip", z.ZodTypeAny, {
|
|
82
98
|
apiKey?: string | undefined;
|
|
83
99
|
baseUrl?: string | undefined;
|
|
@@ -91,6 +107,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
91
107
|
embeddingDeploymentName?: string | undefined;
|
|
92
108
|
modelFamily?: string | undefined;
|
|
93
109
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
110
|
+
name?: string | undefined;
|
|
111
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
112
|
+
captureTraces?: boolean | undefined;
|
|
113
|
+
captureMessages?: boolean | undefined;
|
|
94
114
|
}, {
|
|
95
115
|
apiKey?: string | undefined;
|
|
96
116
|
baseUrl?: string | undefined;
|
|
@@ -104,6 +124,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
104
124
|
embeddingDeploymentName?: string | undefined;
|
|
105
125
|
modelFamily?: string | undefined;
|
|
106
126
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
127
|
+
name?: string | undefined;
|
|
128
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
129
|
+
captureTraces?: boolean | undefined;
|
|
130
|
+
captureMessages?: boolean | undefined;
|
|
107
131
|
}>>>;
|
|
108
132
|
storage: z.ZodOptional<z.ZodObject<{
|
|
109
133
|
type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
|
|
@@ -169,6 +193,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
169
193
|
embeddingDeploymentName?: string | undefined;
|
|
170
194
|
modelFamily?: string | undefined;
|
|
171
195
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
196
|
+
name?: string | undefined;
|
|
197
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
198
|
+
captureTraces?: boolean | undefined;
|
|
199
|
+
captureMessages?: boolean | undefined;
|
|
172
200
|
}> | undefined;
|
|
173
201
|
storage?: {
|
|
174
202
|
type: "supabase" | "local";
|
|
@@ -204,6 +232,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
|
|
|
204
232
|
embeddingDeploymentName?: string | undefined;
|
|
205
233
|
modelFamily?: string | undefined;
|
|
206
234
|
underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
|
|
235
|
+
name?: string | undefined;
|
|
236
|
+
runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
|
|
237
|
+
captureTraces?: boolean | undefined;
|
|
238
|
+
captureMessages?: boolean | undefined;
|
|
207
239
|
}> | undefined;
|
|
208
240
|
storage?: {
|
|
209
241
|
type?: "supabase" | "local" | undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAwBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CAsGrF;AAqgBD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@artemiskit/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -45,11 +45,13 @@
|
|
|
45
45
|
"test": "bun test"
|
|
46
46
|
},
|
|
47
47
|
"dependencies": {
|
|
48
|
-
"@artemiskit/adapter-
|
|
49
|
-
"@artemiskit/adapter-
|
|
50
|
-
"@artemiskit/
|
|
51
|
-
"@artemiskit/
|
|
52
|
-
"@artemiskit/
|
|
48
|
+
"@artemiskit/adapter-deepagents": "0.2.0",
|
|
49
|
+
"@artemiskit/adapter-langchain": "0.2.0",
|
|
50
|
+
"@artemiskit/adapter-openai": "0.1.12",
|
|
51
|
+
"@artemiskit/adapter-vercel-ai": "0.1.12",
|
|
52
|
+
"@artemiskit/core": "0.3.0",
|
|
53
|
+
"@artemiskit/redteam": "0.3.0",
|
|
54
|
+
"@artemiskit/reports": "0.3.0",
|
|
53
55
|
"chalk": "^5.3.0",
|
|
54
56
|
"cli-table3": "^0.6.3",
|
|
55
57
|
"commander": "^12.0.0",
|
|
@@ -62,7 +62,7 @@ describe('UI Components', () => {
|
|
|
62
62
|
expect(panel).toContain('TEST RESULTS');
|
|
63
63
|
});
|
|
64
64
|
|
|
65
|
-
it('should
|
|
65
|
+
it('should render panel with consistent formatting', () => {
|
|
66
66
|
const panel = renderSummaryPanel({
|
|
67
67
|
passed: 5,
|
|
68
68
|
failed: 0,
|
|
@@ -71,11 +71,11 @@ describe('UI Components', () => {
|
|
|
71
71
|
duration: 5000,
|
|
72
72
|
});
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
expect(panel).toContain('
|
|
77
|
-
expect(panel).toContain('
|
|
78
|
-
expect(panel).toContain('
|
|
74
|
+
// Panel should contain key information regardless of formatting style
|
|
75
|
+
// (box-drawing in TTY mode, ASCII fallback in non-TTY)
|
|
76
|
+
expect(panel).toContain('Passed');
|
|
77
|
+
expect(panel).toContain('5');
|
|
78
|
+
expect(panel).toContain('100');
|
|
79
79
|
});
|
|
80
80
|
|
|
81
81
|
it('should support custom title', () => {
|
|
@@ -116,16 +116,17 @@ describe('UI Components', () => {
|
|
|
116
116
|
expect(error).toContain('Suggestions');
|
|
117
117
|
});
|
|
118
118
|
|
|
119
|
-
it('should
|
|
119
|
+
it('should render error with consistent formatting', () => {
|
|
120
120
|
const error = renderError({
|
|
121
121
|
title: 'Test Error',
|
|
122
122
|
reason: 'Test reason',
|
|
123
123
|
});
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
expect(error).toContain('
|
|
128
|
-
expect(error).toContain('
|
|
125
|
+
// Error should contain key information regardless of formatting style
|
|
126
|
+
// (box-drawing in TTY mode, ASCII fallback in non-TTY)
|
|
127
|
+
expect(error).toContain('ERROR');
|
|
128
|
+
expect(error).toContain('Test Error');
|
|
129
|
+
expect(error).toContain('Test reason');
|
|
129
130
|
});
|
|
130
131
|
});
|
|
131
132
|
|
|
@@ -138,14 +139,13 @@ describe('UI Components', () => {
|
|
|
138
139
|
expect(box).toContain('Line 2');
|
|
139
140
|
});
|
|
140
141
|
|
|
141
|
-
it('should
|
|
142
|
+
it('should render info box with consistent formatting', () => {
|
|
142
143
|
const box = renderInfoBox('Test', ['content']);
|
|
143
144
|
|
|
144
|
-
//
|
|
145
|
-
|
|
146
|
-
expect(box).toContain('
|
|
147
|
-
expect(box).toContain('
|
|
148
|
-
expect(box).toContain('┘');
|
|
145
|
+
// Info box should contain key information regardless of formatting style
|
|
146
|
+
// (box-drawing in TTY mode, ASCII fallback in non-TTY)
|
|
147
|
+
expect(box).toContain('Test');
|
|
148
|
+
expect(box).toContain('content');
|
|
149
149
|
});
|
|
150
150
|
});
|
|
151
151
|
|
package/src/adapters.ts
CHANGED
|
@@ -23,6 +23,36 @@ export async function registerAdapters(): Promise<void> {
|
|
|
23
23
|
return new VercelAIAdapter(config);
|
|
24
24
|
});
|
|
25
25
|
|
|
26
|
+
// LangChain adapter - requires runnable via metadata
|
|
27
|
+
adapterRegistry.register('langchain', async (config: AdapterConfig): Promise<ModelClient> => {
|
|
28
|
+
// Dynamic import to avoid bundling LangChain dependencies
|
|
29
|
+
// biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid runnable
|
|
30
|
+
const { LangChainAdapter } = (await import('@artemiskit/adapter-langchain')) as any;
|
|
31
|
+
const runnable = (config as { metadata?: { runnable?: unknown } }).metadata?.runnable;
|
|
32
|
+
if (!runnable) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
'LangChain adapter requires a runnable instance. ' +
|
|
35
|
+
'Pass it via config.metadata.runnable or use createLangChainAdapter() directly.'
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
return new LangChainAdapter(config, runnable);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// DeepAgents adapter - requires system via metadata
|
|
42
|
+
adapterRegistry.register('deepagents', async (config: AdapterConfig): Promise<ModelClient> => {
|
|
43
|
+
// Dynamic import to avoid bundling DeepAgents dependencies
|
|
44
|
+
// biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid system
|
|
45
|
+
const { DeepAgentsAdapter } = (await import('@artemiskit/adapter-deepagents')) as any;
|
|
46
|
+
const system = (config as { metadata?: { system?: unknown } }).metadata?.system;
|
|
47
|
+
if (!system) {
|
|
48
|
+
throw new Error(
|
|
49
|
+
'DeepAgents adapter requires a system instance. ' +
|
|
50
|
+
'Pass it via config.metadata.system or use createDeepAgentsAdapter() directly.'
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
return new DeepAgentsAdapter(config, system);
|
|
54
|
+
});
|
|
55
|
+
|
|
26
56
|
// Mark post-MVP adapters as unavailable
|
|
27
57
|
adapterRegistry.markUnavailable('anthropic', 'Anthropic adapter coming in v0.2.0');
|
|
28
58
|
adapterRegistry.markUnavailable('google', 'Google adapter coming in v0.3.0');
|
package/src/commands/redteam.ts
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Redteam command - Run red-team adversarial tests
|
|
3
|
+
*
|
|
4
|
+
* Supports OWASP LLM Top 10 2025 security testing with new flags:
|
|
5
|
+
* --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
|
|
6
|
+
* --owasp-full: Full OWASP compliance scan
|
|
7
|
+
* --min-severity: Filter attacks by minimum severity level
|
|
3
8
|
*/
|
|
4
9
|
|
|
5
10
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
@@ -19,17 +24,26 @@ import {
|
|
|
19
24
|
parseScenarioFile,
|
|
20
25
|
} from '@artemiskit/core';
|
|
21
26
|
import {
|
|
27
|
+
BadLikertJudgeMutation,
|
|
22
28
|
type ConversationTurn,
|
|
23
29
|
CotInjectionMutation,
|
|
30
|
+
CrescendoMutation,
|
|
31
|
+
DeceptiveDelightMutation,
|
|
24
32
|
EncodingMutation,
|
|
33
|
+
ExcessiveAgencyMutation,
|
|
34
|
+
HallucinationTrapMutation,
|
|
25
35
|
InstructionFlipMutation,
|
|
26
36
|
MultiTurnMutation,
|
|
27
37
|
type Mutation,
|
|
38
|
+
OWASP_CATEGORIES,
|
|
39
|
+
OutputInjectionMutation,
|
|
28
40
|
RedTeamGenerator,
|
|
29
41
|
RoleSpoofMutation,
|
|
30
42
|
SeverityMapper,
|
|
43
|
+
SystemExtractionMutation,
|
|
31
44
|
TypoMutation,
|
|
32
45
|
UnsafeResponseDetector,
|
|
46
|
+
getMutationsForCategory,
|
|
33
47
|
loadCustomAttacks,
|
|
34
48
|
} from '@artemiskit/redteam';
|
|
35
49
|
import {
|
|
@@ -73,6 +87,10 @@ interface RedteamOptions {
|
|
|
73
87
|
redactPatterns?: string[];
|
|
74
88
|
export?: 'markdown' | 'junit';
|
|
75
89
|
exportOutput?: string;
|
|
90
|
+
// OWASP options
|
|
91
|
+
owasp?: string[];
|
|
92
|
+
owaspFull?: boolean;
|
|
93
|
+
minSeverity?: 'low' | 'medium' | 'high' | 'critical';
|
|
76
94
|
}
|
|
77
95
|
|
|
78
96
|
export function redteamCommand(): Command {
|
|
@@ -85,7 +103,7 @@ export function redteamCommand(): Command {
|
|
|
85
103
|
.option('-m, --model <model>', 'Model to use')
|
|
86
104
|
.option(
|
|
87
105
|
'--mutations <mutations...>',
|
|
88
|
-
'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn)'
|
|
106
|
+
'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn, bad-likert-judge, crescendo, deceptive-delight, output-injection, excessive-agency, system-extraction, hallucination-trap)'
|
|
89
107
|
)
|
|
90
108
|
.option('-c, --count <number>', 'Number of mutated prompts per case', '5')
|
|
91
109
|
.option('--custom-attacks <path>', 'Path to custom attacks YAML file')
|
|
@@ -100,6 +118,16 @@ export function redteamCommand(): Command {
|
|
|
100
118
|
)
|
|
101
119
|
.option('--export <format>', 'Export results to format (markdown or junit)')
|
|
102
120
|
.option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
|
|
121
|
+
// OWASP options
|
|
122
|
+
.option(
|
|
123
|
+
'--owasp <categories...>',
|
|
124
|
+
'Test specific OWASP LLM Top 10 categories (e.g., LLM01, LLM05, LLM06)'
|
|
125
|
+
)
|
|
126
|
+
.option('--owasp-full', 'Run full OWASP LLM Top 10 compliance scan (all applicable categories)')
|
|
127
|
+
.option(
|
|
128
|
+
'--min-severity <level>',
|
|
129
|
+
'Minimum severity level for attacks (low, medium, high, critical)'
|
|
130
|
+
)
|
|
103
131
|
.action(async (scenarioPath: string, options: RedteamOptions) => {
|
|
104
132
|
const spinner = createSpinner('Loading configuration...');
|
|
105
133
|
spinner.start();
|
|
@@ -145,8 +173,15 @@ export function redteamCommand(): Command {
|
|
|
145
173
|
const client = await createAdapter(adapterConfig);
|
|
146
174
|
spinner.succeed(`Connected to ${provider}`);
|
|
147
175
|
|
|
148
|
-
// Set up mutations
|
|
149
|
-
const mutations = selectMutations(
|
|
176
|
+
// Set up mutations - check for OWASP flags first
|
|
177
|
+
const mutations = selectMutations({
|
|
178
|
+
names: options.mutations,
|
|
179
|
+
customAttacksPath: options.customAttacks,
|
|
180
|
+
owaspCategories: options.owasp,
|
|
181
|
+
owaspFull: options.owaspFull,
|
|
182
|
+
minSeverity: options.minSeverity,
|
|
183
|
+
});
|
|
184
|
+
|
|
150
185
|
const generator = new RedTeamGenerator(mutations);
|
|
151
186
|
const detector = new UnsafeResponseDetector();
|
|
152
187
|
const count = Number.parseInt(String(options.count)) || 5;
|
|
@@ -158,6 +193,14 @@ export function redteamCommand(): Command {
|
|
|
158
193
|
`Prompts per case: ${count}`,
|
|
159
194
|
`Total cases: ${scenario.cases.length}`,
|
|
160
195
|
];
|
|
196
|
+
if (options.owasp || options.owaspFull) {
|
|
197
|
+
configLines.push(
|
|
198
|
+
`OWASP Mode: ${options.owaspFull ? 'Full Compliance Scan' : options.owasp?.join(', ')}`
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
if (options.minSeverity) {
|
|
202
|
+
configLines.push(`Min Severity: ${options.minSeverity}`);
|
|
203
|
+
}
|
|
161
204
|
if (options.redact) {
|
|
162
205
|
configLines.push(
|
|
163
206
|
`Redaction: enabled${options.redactPatterns ? ` (${options.redactPatterns.join(', ')})` : ''}`
|
|
@@ -417,6 +460,10 @@ export function redteamCommand(): Command {
|
|
|
417
460
|
model: resolvedConfig.model,
|
|
418
461
|
mutations: mutations.map((m) => m.name),
|
|
419
462
|
count_per_case: count,
|
|
463
|
+
// Include OWASP info in config
|
|
464
|
+
...(options.owaspFull && { owasp_mode: 'full' }),
|
|
465
|
+
...(options.owasp && { owasp_categories: options.owasp }),
|
|
466
|
+
...(options.minSeverity && { min_severity: options.minSeverity }),
|
|
420
467
|
},
|
|
421
468
|
resolved_config: resolvedConfig,
|
|
422
469
|
metrics,
|
|
@@ -542,22 +589,123 @@ export function redteamCommand(): Command {
|
|
|
542
589
|
return cmd;
|
|
543
590
|
}
|
|
544
591
|
|
|
545
|
-
|
|
546
|
-
|
|
592
|
+
/**
|
|
593
|
+
* All available mutations registry
|
|
594
|
+
*/
|
|
595
|
+
function getAllMutations(): Record<string, Mutation> {
|
|
596
|
+
return {
|
|
597
|
+
// Core mutations (v0.1.x - v0.2.x)
|
|
547
598
|
typo: new TypoMutation(),
|
|
548
599
|
'role-spoof': new RoleSpoofMutation(),
|
|
549
600
|
'instruction-flip': new InstructionFlipMutation(),
|
|
550
601
|
'cot-injection': new CotInjectionMutation(),
|
|
551
602
|
encoding: new EncodingMutation(),
|
|
552
603
|
'multi-turn': new MultiTurnMutation(),
|
|
604
|
+
|
|
605
|
+
// OWASP LLM Top 10 2025 mutations (v0.3.0)
|
|
606
|
+
// LLM01 - Prompt Injection
|
|
607
|
+
'bad-likert-judge': new BadLikertJudgeMutation(),
|
|
608
|
+
crescendo: new CrescendoMutation(),
|
|
609
|
+
'deceptive-delight': new DeceptiveDelightMutation(),
|
|
610
|
+
|
|
611
|
+
// LLM05 - Insecure Output Handling
|
|
612
|
+
'output-injection': new OutputInjectionMutation(),
|
|
613
|
+
|
|
614
|
+
// LLM06 - Excessive Agency
|
|
615
|
+
'excessive-agency': new ExcessiveAgencyMutation(),
|
|
616
|
+
|
|
617
|
+
// LLM07 - System Prompt Leakage
|
|
618
|
+
'system-extraction': new SystemExtractionMutation(),
|
|
619
|
+
|
|
620
|
+
// LLM09 - Misinformation
|
|
621
|
+
'hallucination-trap': new HallucinationTrapMutation(),
|
|
553
622
|
};
|
|
623
|
+
}
|
|
554
624
|
|
|
555
|
-
|
|
625
|
+
/**
|
|
626
|
+
* Get OWASP mutations for specific categories
|
|
627
|
+
*/
|
|
628
|
+
function getOwaspMutations(categories: string[]): string[] {
|
|
629
|
+
const mutationNames = new Set<string>();
|
|
630
|
+
|
|
631
|
+
for (const category of categories) {
|
|
632
|
+
const upperCategory = category.toUpperCase();
|
|
633
|
+
if (upperCategory in OWASP_CATEGORIES) {
|
|
634
|
+
const mutations = getMutationsForCategory(upperCategory as keyof typeof OWASP_CATEGORIES);
|
|
635
|
+
for (const mutation of mutations) {
|
|
636
|
+
mutationNames.add(mutation);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
return Array.from(mutationNames);
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
/**
|
|
645
|
+
* Get all OWASP mutations
|
|
646
|
+
*/
|
|
647
|
+
function getAllOwaspMutations(): string[] {
|
|
648
|
+
return [
|
|
649
|
+
'bad-likert-judge',
|
|
650
|
+
'crescendo',
|
|
651
|
+
'deceptive-delight',
|
|
652
|
+
'output-injection',
|
|
653
|
+
'excessive-agency',
|
|
654
|
+
'system-extraction',
|
|
655
|
+
'hallucination-trap',
|
|
656
|
+
];
|
|
657
|
+
}
|
|
556
658
|
|
|
557
|
-
|
|
558
|
-
|
|
659
|
+
interface SelectMutationsOptions {
|
|
660
|
+
names?: string[];
|
|
661
|
+
customAttacksPath?: string;
|
|
662
|
+
owaspCategories?: string[];
|
|
663
|
+
owaspFull?: boolean;
|
|
664
|
+
minSeverity?: 'low' | 'medium' | 'high' | 'critical';
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
function selectMutations(options: SelectMutationsOptions): Mutation[] {
|
|
668
|
+
const { names, customAttacksPath, owaspCategories, owaspFull, minSeverity } = options;
|
|
669
|
+
|
|
670
|
+
const allMutations = getAllMutations();
|
|
671
|
+
let selectedNames: string[] = [];
|
|
672
|
+
|
|
673
|
+
// Determine which mutations to use based on options
|
|
674
|
+
if (owaspFull) {
|
|
675
|
+
// Full OWASP scan - use all OWASP mutations
|
|
676
|
+
selectedNames = getAllOwaspMutations();
|
|
677
|
+
} else if (owaspCategories && owaspCategories.length > 0) {
|
|
678
|
+
// Specific OWASP categories
|
|
679
|
+
selectedNames = getOwaspMutations(owaspCategories);
|
|
680
|
+
} else if (names && names.length > 0) {
|
|
681
|
+
// Explicit mutation names
|
|
682
|
+
selectedNames = names;
|
|
559
683
|
} else {
|
|
560
|
-
mutations
|
|
684
|
+
// Default: use core mutations (not OWASP ones to maintain backward compatibility)
|
|
685
|
+
selectedNames = [
|
|
686
|
+
'typo',
|
|
687
|
+
'role-spoof',
|
|
688
|
+
'instruction-flip',
|
|
689
|
+
'cot-injection',
|
|
690
|
+
'encoding',
|
|
691
|
+
'multi-turn',
|
|
692
|
+
];
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Filter to valid mutation names
|
|
696
|
+
let mutations = selectedNames
|
|
697
|
+
.filter((name) => name in allMutations)
|
|
698
|
+
.map((name) => allMutations[name]);
|
|
699
|
+
|
|
700
|
+
// Apply severity filter if specified
|
|
701
|
+
if (minSeverity) {
|
|
702
|
+
const severityOrder = ['low', 'medium', 'high', 'critical'];
|
|
703
|
+
const minIndex = severityOrder.indexOf(minSeverity);
|
|
704
|
+
|
|
705
|
+
mutations = mutations.filter((m) => {
|
|
706
|
+
const mutationIndex = severityOrder.indexOf(m.severity);
|
|
707
|
+
return mutationIndex >= minIndex;
|
|
708
|
+
});
|
|
561
709
|
}
|
|
562
710
|
|
|
563
711
|
// Load custom attacks if path provided
|
package/src/commands/run.ts
CHANGED
|
@@ -554,7 +554,8 @@ async function runScenariosInParallel(
|
|
|
554
554
|
while (queue.length > 0 || inProgress.size > 0) {
|
|
555
555
|
// Start new tasks up to the limit
|
|
556
556
|
while (queue.length > 0 && inProgress.size < parallelLimit) {
|
|
557
|
-
const path = queue.shift()
|
|
557
|
+
const path = queue.shift();
|
|
558
|
+
if (!path) break;
|
|
558
559
|
const promise = processScenario(path).then(() => {
|
|
559
560
|
inProgress.delete(promise);
|
|
560
561
|
});
|
package/src/config/schema.ts
CHANGED
|
@@ -22,6 +22,12 @@ const ProviderConfigSchema = z.object({
|
|
|
22
22
|
modelFamily: z.string().optional(),
|
|
23
23
|
// Vercel AI specific
|
|
24
24
|
underlyingProvider: z.enum(['openai', 'azure', 'anthropic', 'google', 'mistral']).optional(),
|
|
25
|
+
// LangChain specific
|
|
26
|
+
name: z.string().optional(),
|
|
27
|
+
runnableType: z.enum(['chain', 'agent', 'llm', 'runnable']).optional(),
|
|
28
|
+
// DeepAgents specific
|
|
29
|
+
captureTraces: z.boolean().optional(),
|
|
30
|
+
captureMessages: z.boolean().optional(),
|
|
25
31
|
});
|
|
26
32
|
|
|
27
33
|
const StorageConfigSchema = z.object({
|