@artemiskit/cli 0.2.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAuBtD"}
1
+ {"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAqDtD"}
@@ -1,5 +1,10 @@
1
1
  /**
2
2
  * Redteam command - Run red-team adversarial tests
3
+ *
4
+ * Supports OWASP LLM Top 10 2025 security testing with new flags:
5
+ * --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
6
+ * --owasp-full: Full OWASP compliance scan
7
+ * --min-severity: Filter attacks by minimum severity level
3
8
  */
4
9
  import { Command } from 'commander';
5
10
  export declare function redteamCommand(): Command;
@@ -1 +1 @@
1
- {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAuCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAoCpC,wBAAgB,cAAc,IAAI,OAAO,CAidxC"}
1
+ {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAgDH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAwCpC,wBAAgB,cAAc,IAAI,OAAO,CA8exC"}
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAyiBpC,wBAAgB,UAAU,IAAI,OAAO,CAwgBpC"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA0iBpC,wBAAgB,UAAU,IAAI,OAAO,CAwgBpC"}
@@ -15,6 +15,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
15
15
  embeddingDeploymentName: z.ZodOptional<z.ZodString>;
16
16
  modelFamily: z.ZodOptional<z.ZodString>;
17
17
  underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
18
+ name: z.ZodOptional<z.ZodString>;
19
+ runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
20
+ captureTraces: z.ZodOptional<z.ZodBoolean>;
21
+ captureMessages: z.ZodOptional<z.ZodBoolean>;
18
22
  }, "strip", z.ZodTypeAny, {
19
23
  apiKey?: string | undefined;
20
24
  baseUrl?: string | undefined;
@@ -28,6 +32,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
28
32
  embeddingDeploymentName?: string | undefined;
29
33
  modelFamily?: string | undefined;
30
34
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
35
+ name?: string | undefined;
36
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
37
+ captureTraces?: boolean | undefined;
38
+ captureMessages?: boolean | undefined;
31
39
  }, {
32
40
  apiKey?: string | undefined;
33
41
  baseUrl?: string | undefined;
@@ -41,6 +49,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
41
49
  embeddingDeploymentName?: string | undefined;
42
50
  modelFamily?: string | undefined;
43
51
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
52
+ name?: string | undefined;
53
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
54
+ captureTraces?: boolean | undefined;
55
+ captureMessages?: boolean | undefined;
44
56
  }>;
45
57
  declare const StorageConfigSchema: z.ZodObject<{
46
58
  type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
@@ -78,6 +90,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
78
90
  embeddingDeploymentName: z.ZodOptional<z.ZodString>;
79
91
  modelFamily: z.ZodOptional<z.ZodString>;
80
92
  underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
93
+ name: z.ZodOptional<z.ZodString>;
94
+ runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
95
+ captureTraces: z.ZodOptional<z.ZodBoolean>;
96
+ captureMessages: z.ZodOptional<z.ZodBoolean>;
81
97
  }, "strip", z.ZodTypeAny, {
82
98
  apiKey?: string | undefined;
83
99
  baseUrl?: string | undefined;
@@ -91,6 +107,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
91
107
  embeddingDeploymentName?: string | undefined;
92
108
  modelFamily?: string | undefined;
93
109
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
110
+ name?: string | undefined;
111
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
112
+ captureTraces?: boolean | undefined;
113
+ captureMessages?: boolean | undefined;
94
114
  }, {
95
115
  apiKey?: string | undefined;
96
116
  baseUrl?: string | undefined;
@@ -104,6 +124,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
104
124
  embeddingDeploymentName?: string | undefined;
105
125
  modelFamily?: string | undefined;
106
126
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
127
+ name?: string | undefined;
128
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
129
+ captureTraces?: boolean | undefined;
130
+ captureMessages?: boolean | undefined;
107
131
  }>>>;
108
132
  storage: z.ZodOptional<z.ZodObject<{
109
133
  type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
@@ -169,6 +193,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
169
193
  embeddingDeploymentName?: string | undefined;
170
194
  modelFamily?: string | undefined;
171
195
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
196
+ name?: string | undefined;
197
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
198
+ captureTraces?: boolean | undefined;
199
+ captureMessages?: boolean | undefined;
172
200
  }> | undefined;
173
201
  storage?: {
174
202
  type: "supabase" | "local";
@@ -204,6 +232,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
204
232
  embeddingDeploymentName?: string | undefined;
205
233
  modelFamily?: string | undefined;
206
234
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
235
+ name?: string | undefined;
236
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
237
+ captureTraces?: boolean | undefined;
238
+ captureMessages?: boolean | undefined;
207
239
  }> | undefined;
208
240
  storage?: {
209
241
  type?: "supabase" | "local" | undefined;
@@ -1 +1 @@
1
- {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAwBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA8ErF;AAsXD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CAsGrF;AAqgBD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/cli",
3
- "version": "0.2.4",
3
+ "version": "0.3.1",
4
4
  "description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -45,11 +45,13 @@
45
45
  "test": "bun test"
46
46
  },
47
47
  "dependencies": {
48
- "@artemiskit/adapter-openai": "0.1.11",
49
- "@artemiskit/adapter-vercel-ai": "0.1.11",
50
- "@artemiskit/core": "0.2.4",
51
- "@artemiskit/redteam": "0.2.4",
52
- "@artemiskit/reports": "0.2.4",
48
+ "@artemiskit/adapter-deepagents": "0.2.0",
49
+ "@artemiskit/adapter-langchain": "0.2.0",
50
+ "@artemiskit/adapter-openai": "0.1.12",
51
+ "@artemiskit/adapter-vercel-ai": "0.1.12",
52
+ "@artemiskit/core": "0.3.0",
53
+ "@artemiskit/redteam": "0.3.0",
54
+ "@artemiskit/reports": "0.3.0",
53
55
  "chalk": "^5.3.0",
54
56
  "cli-table3": "^0.6.3",
55
57
  "commander": "^12.0.0",
@@ -62,7 +62,7 @@ describe('UI Components', () => {
62
62
  expect(panel).toContain('TEST RESULTS');
63
63
  });
64
64
 
65
- it('should use box drawing characters', () => {
65
+ it('should render panel with consistent formatting', () => {
66
66
  const panel = renderSummaryPanel({
67
67
  passed: 5,
68
68
  failed: 0,
@@ -71,11 +71,11 @@ describe('UI Components', () => {
71
71
  duration: 5000,
72
72
  });
73
73
 
74
- expect(panel).toContain('╔');
75
- expect(panel).toContain('╗');
76
- expect(panel).toContain('');
77
- expect(panel).toContain('');
78
- expect(panel).toContain('');
74
+ // Panel should contain key information regardless of formatting style
75
+ // (box-drawing in TTY mode, ASCII fallback in non-TTY)
76
+ expect(panel).toContain('Passed');
77
+ expect(panel).toContain('5');
78
+ expect(panel).toContain('100');
79
79
  });
80
80
 
81
81
  it('should support custom title', () => {
@@ -116,16 +116,17 @@ describe('UI Components', () => {
116
116
  expect(error).toContain('Suggestions');
117
117
  });
118
118
 
119
- it('should use box drawing characters', () => {
119
+ it('should render error with consistent formatting', () => {
120
120
  const error = renderError({
121
121
  title: 'Test Error',
122
122
  reason: 'Test reason',
123
123
  });
124
124
 
125
- expect(error).toContain('┌');
126
- expect(error).toContain('┐');
127
- expect(error).toContain('');
128
- expect(error).toContain('');
125
+ // Error should contain key information regardless of formatting style
126
+ // (box-drawing in TTY mode, ASCII fallback in non-TTY)
127
+ expect(error).toContain('ERROR');
128
+ expect(error).toContain('Test Error');
129
+ expect(error).toContain('Test reason');
129
130
  });
130
131
  });
131
132
 
@@ -138,14 +139,13 @@ describe('UI Components', () => {
138
139
  expect(box).toContain('Line 2');
139
140
  });
140
141
 
141
- it('should use box drawing characters', () => {
142
+ it('should render info box with consistent formatting', () => {
142
143
  const box = renderInfoBox('Test', ['content']);
143
144
 
144
- // Uses standard box drawing (may be rounded or square depending on implementation)
145
- expect(box).toContain('┌');
146
- expect(box).toContain('');
147
- expect(box).toContain('');
148
- expect(box).toContain('┘');
145
+ // Info box should contain key information regardless of formatting style
146
+ // (box-drawing in TTY mode, ASCII fallback in non-TTY)
147
+ expect(box).toContain('Test');
148
+ expect(box).toContain('content');
149
149
  });
150
150
  });
151
151
 
package/src/adapters.ts CHANGED
@@ -23,6 +23,36 @@ export async function registerAdapters(): Promise<void> {
23
23
  return new VercelAIAdapter(config);
24
24
  });
25
25
 
26
+ // LangChain adapter - requires runnable via metadata
27
+ adapterRegistry.register('langchain', async (config: AdapterConfig): Promise<ModelClient> => {
28
+ // Dynamic import to avoid bundling LangChain dependencies
29
+ // biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid runnable
30
+ const { LangChainAdapter } = (await import('@artemiskit/adapter-langchain')) as any;
31
+ const runnable = (config as { metadata?: { runnable?: unknown } }).metadata?.runnable;
32
+ if (!runnable) {
33
+ throw new Error(
34
+ 'LangChain adapter requires a runnable instance. ' +
35
+ 'Pass it via config.metadata.runnable or use createLangChainAdapter() directly.'
36
+ );
37
+ }
38
+ return new LangChainAdapter(config, runnable);
39
+ });
40
+
41
+ // DeepAgents adapter - requires system via metadata
42
+ adapterRegistry.register('deepagents', async (config: AdapterConfig): Promise<ModelClient> => {
43
+ // Dynamic import to avoid bundling DeepAgents dependencies
44
+ // biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid system
45
+ const { DeepAgentsAdapter } = (await import('@artemiskit/adapter-deepagents')) as any;
46
+ const system = (config as { metadata?: { system?: unknown } }).metadata?.system;
47
+ if (!system) {
48
+ throw new Error(
49
+ 'DeepAgents adapter requires a system instance. ' +
50
+ 'Pass it via config.metadata.system or use createDeepAgentsAdapter() directly.'
51
+ );
52
+ }
53
+ return new DeepAgentsAdapter(config, system);
54
+ });
55
+
26
56
  // Mark post-MVP adapters as unavailable
27
57
  adapterRegistry.markUnavailable('anthropic', 'Anthropic adapter coming in v0.2.0');
28
58
  adapterRegistry.markUnavailable('google', 'Google adapter coming in v0.3.0');
@@ -1,5 +1,10 @@
1
1
  /**
2
2
  * Redteam command - Run red-team adversarial tests
3
+ *
4
+ * Supports OWASP LLM Top 10 2025 security testing with new flags:
5
+ * --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
6
+ * --owasp-full: Full OWASP compliance scan
7
+ * --min-severity: Filter attacks by minimum severity level
3
8
  */
4
9
 
5
10
  import { mkdir, writeFile } from 'node:fs/promises';
@@ -19,17 +24,26 @@ import {
19
24
  parseScenarioFile,
20
25
  } from '@artemiskit/core';
21
26
  import {
27
+ BadLikertJudgeMutation,
22
28
  type ConversationTurn,
23
29
  CotInjectionMutation,
30
+ CrescendoMutation,
31
+ DeceptiveDelightMutation,
24
32
  EncodingMutation,
33
+ ExcessiveAgencyMutation,
34
+ HallucinationTrapMutation,
25
35
  InstructionFlipMutation,
26
36
  MultiTurnMutation,
27
37
  type Mutation,
38
+ OWASP_CATEGORIES,
39
+ OutputInjectionMutation,
28
40
  RedTeamGenerator,
29
41
  RoleSpoofMutation,
30
42
  SeverityMapper,
43
+ SystemExtractionMutation,
31
44
  TypoMutation,
32
45
  UnsafeResponseDetector,
46
+ getMutationsForCategory,
33
47
  loadCustomAttacks,
34
48
  } from '@artemiskit/redteam';
35
49
  import {
@@ -73,6 +87,10 @@ interface RedteamOptions {
73
87
  redactPatterns?: string[];
74
88
  export?: 'markdown' | 'junit';
75
89
  exportOutput?: string;
90
+ // OWASP options
91
+ owasp?: string[];
92
+ owaspFull?: boolean;
93
+ minSeverity?: 'low' | 'medium' | 'high' | 'critical';
76
94
  }
77
95
 
78
96
  export function redteamCommand(): Command {
@@ -85,7 +103,7 @@ export function redteamCommand(): Command {
85
103
  .option('-m, --model <model>', 'Model to use')
86
104
  .option(
87
105
  '--mutations <mutations...>',
88
- 'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn)'
106
+ 'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn, bad-likert-judge, crescendo, deceptive-delight, output-injection, excessive-agency, system-extraction, hallucination-trap)'
89
107
  )
90
108
  .option('-c, --count <number>', 'Number of mutated prompts per case', '5')
91
109
  .option('--custom-attacks <path>', 'Path to custom attacks YAML file')
@@ -100,6 +118,16 @@ export function redteamCommand(): Command {
100
118
  )
101
119
  .option('--export <format>', 'Export results to format (markdown or junit)')
102
120
  .option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
121
+ // OWASP options
122
+ .option(
123
+ '--owasp <categories...>',
124
+ 'Test specific OWASP LLM Top 10 categories (e.g., LLM01, LLM05, LLM06)'
125
+ )
126
+ .option('--owasp-full', 'Run full OWASP LLM Top 10 compliance scan (all applicable categories)')
127
+ .option(
128
+ '--min-severity <level>',
129
+ 'Minimum severity level for attacks (low, medium, high, critical)'
130
+ )
103
131
  .action(async (scenarioPath: string, options: RedteamOptions) => {
104
132
  const spinner = createSpinner('Loading configuration...');
105
133
  spinner.start();
@@ -145,8 +173,15 @@ export function redteamCommand(): Command {
145
173
  const client = await createAdapter(adapterConfig);
146
174
  spinner.succeed(`Connected to ${provider}`);
147
175
 
148
- // Set up mutations
149
- const mutations = selectMutations(options.mutations, options.customAttacks);
176
+ // Set up mutations - check for OWASP flags first
177
+ const mutations = selectMutations({
178
+ names: options.mutations,
179
+ customAttacksPath: options.customAttacks,
180
+ owaspCategories: options.owasp,
181
+ owaspFull: options.owaspFull,
182
+ minSeverity: options.minSeverity,
183
+ });
184
+
150
185
  const generator = new RedTeamGenerator(mutations);
151
186
  const detector = new UnsafeResponseDetector();
152
187
  const count = Number.parseInt(String(options.count)) || 5;
@@ -158,6 +193,14 @@ export function redteamCommand(): Command {
158
193
  `Prompts per case: ${count}`,
159
194
  `Total cases: ${scenario.cases.length}`,
160
195
  ];
196
+ if (options.owasp || options.owaspFull) {
197
+ configLines.push(
198
+ `OWASP Mode: ${options.owaspFull ? 'Full Compliance Scan' : options.owasp?.join(', ')}`
199
+ );
200
+ }
201
+ if (options.minSeverity) {
202
+ configLines.push(`Min Severity: ${options.minSeverity}`);
203
+ }
161
204
  if (options.redact) {
162
205
  configLines.push(
163
206
  `Redaction: enabled${options.redactPatterns ? ` (${options.redactPatterns.join(', ')})` : ''}`
@@ -417,6 +460,10 @@ export function redteamCommand(): Command {
417
460
  model: resolvedConfig.model,
418
461
  mutations: mutations.map((m) => m.name),
419
462
  count_per_case: count,
463
+ // Include OWASP info in config
464
+ ...(options.owaspFull && { owasp_mode: 'full' }),
465
+ ...(options.owasp && { owasp_categories: options.owasp }),
466
+ ...(options.minSeverity && { min_severity: options.minSeverity }),
420
467
  },
421
468
  resolved_config: resolvedConfig,
422
469
  metrics,
@@ -542,22 +589,123 @@ export function redteamCommand(): Command {
542
589
  return cmd;
543
590
  }
544
591
 
545
- function selectMutations(names?: string[], customAttacksPath?: string): Mutation[] {
546
- const allMutations: Record<string, Mutation> = {
592
+ /**
593
+ * All available mutations registry
594
+ */
595
+ function getAllMutations(): Record<string, Mutation> {
596
+ return {
597
+ // Core mutations (v0.1.x - v0.2.x)
547
598
  typo: new TypoMutation(),
548
599
  'role-spoof': new RoleSpoofMutation(),
549
600
  'instruction-flip': new InstructionFlipMutation(),
550
601
  'cot-injection': new CotInjectionMutation(),
551
602
  encoding: new EncodingMutation(),
552
603
  'multi-turn': new MultiTurnMutation(),
604
+
605
+ // OWASP LLM Top 10 2025 mutations (v0.3.0)
606
+ // LLM01 - Prompt Injection
607
+ 'bad-likert-judge': new BadLikertJudgeMutation(),
608
+ crescendo: new CrescendoMutation(),
609
+ 'deceptive-delight': new DeceptiveDelightMutation(),
610
+
611
+ // LLM05 - Insecure Output Handling
612
+ 'output-injection': new OutputInjectionMutation(),
613
+
614
+ // LLM06 - Excessive Agency
615
+ 'excessive-agency': new ExcessiveAgencyMutation(),
616
+
617
+ // LLM07 - System Prompt Leakage
618
+ 'system-extraction': new SystemExtractionMutation(),
619
+
620
+ // LLM09 - Misinformation
621
+ 'hallucination-trap': new HallucinationTrapMutation(),
553
622
  };
623
+ }
554
624
 
555
- let mutations: Mutation[];
625
+ /**
626
+ * Get OWASP mutations for specific categories
627
+ */
628
+ function getOwaspMutations(categories: string[]): string[] {
629
+ const mutationNames = new Set<string>();
630
+
631
+ for (const category of categories) {
632
+ const upperCategory = category.toUpperCase();
633
+ if (upperCategory in OWASP_CATEGORIES) {
634
+ const mutations = getMutationsForCategory(upperCategory as keyof typeof OWASP_CATEGORIES);
635
+ for (const mutation of mutations) {
636
+ mutationNames.add(mutation);
637
+ }
638
+ }
639
+ }
640
+
641
+ return Array.from(mutationNames);
642
+ }
643
+
644
+ /**
645
+ * Get all OWASP mutations
646
+ */
647
+ function getAllOwaspMutations(): string[] {
648
+ return [
649
+ 'bad-likert-judge',
650
+ 'crescendo',
651
+ 'deceptive-delight',
652
+ 'output-injection',
653
+ 'excessive-agency',
654
+ 'system-extraction',
655
+ 'hallucination-trap',
656
+ ];
657
+ }
556
658
 
557
- if (!names || names.length === 0) {
558
- mutations = Object.values(allMutations);
659
+ interface SelectMutationsOptions {
660
+ names?: string[];
661
+ customAttacksPath?: string;
662
+ owaspCategories?: string[];
663
+ owaspFull?: boolean;
664
+ minSeverity?: 'low' | 'medium' | 'high' | 'critical';
665
+ }
666
+
667
+ function selectMutations(options: SelectMutationsOptions): Mutation[] {
668
+ const { names, customAttacksPath, owaspCategories, owaspFull, minSeverity } = options;
669
+
670
+ const allMutations = getAllMutations();
671
+ let selectedNames: string[] = [];
672
+
673
+ // Determine which mutations to use based on options
674
+ if (owaspFull) {
675
+ // Full OWASP scan - use all OWASP mutations
676
+ selectedNames = getAllOwaspMutations();
677
+ } else if (owaspCategories && owaspCategories.length > 0) {
678
+ // Specific OWASP categories
679
+ selectedNames = getOwaspMutations(owaspCategories);
680
+ } else if (names && names.length > 0) {
681
+ // Explicit mutation names
682
+ selectedNames = names;
559
683
  } else {
560
- mutations = names.filter((name) => name in allMutations).map((name) => allMutations[name]);
684
+ // Default: use core mutations (not OWASP ones to maintain backward compatibility)
685
+ selectedNames = [
686
+ 'typo',
687
+ 'role-spoof',
688
+ 'instruction-flip',
689
+ 'cot-injection',
690
+ 'encoding',
691
+ 'multi-turn',
692
+ ];
693
+ }
694
+
695
+ // Filter to valid mutation names
696
+ let mutations = selectedNames
697
+ .filter((name) => name in allMutations)
698
+ .map((name) => allMutations[name]);
699
+
700
+ // Apply severity filter if specified
701
+ if (minSeverity) {
702
+ const severityOrder = ['low', 'medium', 'high', 'critical'];
703
+ const minIndex = severityOrder.indexOf(minSeverity);
704
+
705
+ mutations = mutations.filter((m) => {
706
+ const mutationIndex = severityOrder.indexOf(m.severity);
707
+ return mutationIndex >= minIndex;
708
+ });
561
709
  }
562
710
 
563
711
  // Load custom attacks if path provided
@@ -554,7 +554,8 @@ async function runScenariosInParallel(
554
554
  while (queue.length > 0 || inProgress.size > 0) {
555
555
  // Start new tasks up to the limit
556
556
  while (queue.length > 0 && inProgress.size < parallelLimit) {
557
- const path = queue.shift()!;
557
+ const path = queue.shift();
558
+ if (!path) break;
558
559
  const promise = processScenario(path).then(() => {
559
560
  inProgress.delete(promise);
560
561
  });
@@ -22,6 +22,12 @@ const ProviderConfigSchema = z.object({
22
22
  modelFamily: z.string().optional(),
23
23
  // Vercel AI specific
24
24
  underlyingProvider: z.enum(['openai', 'azure', 'anthropic', 'google', 'mistral']).optional(),
25
+ // LangChain specific
26
+ name: z.string().optional(),
27
+ runnableType: z.enum(['chain', 'agent', 'llm', 'runnable']).optional(),
28
+ // DeepAgents specific
29
+ captureTraces: z.boolean().optional(),
30
+ captureMessages: z.boolean().optional(),
25
31
  });
26
32
 
27
33
  const StorageConfigSchema = z.object({