@artemiskit/cli 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAuBtD"}
1
+ {"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../src/adapters.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAqDtD"}
@@ -1 +1 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAYpC,wBAAgB,SAAS,IAAI,OAAO,CAwCnC"}
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAapC,wBAAgB,SAAS,IAAI,OAAO,CAyCnC"}
@@ -1,5 +1,10 @@
1
1
  /**
2
2
  * Redteam command - Run red-team adversarial tests
3
+ *
4
+ * Supports OWASP LLM Top 10 2025 security testing with new flags:
5
+ * --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
6
+ * --owasp-full: Full OWASP compliance scan
7
+ * --min-severity: Filter attacks by minimum severity level
3
8
  */
4
9
  import { Command } from 'commander';
5
10
  export declare function redteamCommand(): Command;
@@ -1 +1 @@
1
- {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;GAEG;AAsCH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAoCpC,wBAAgB,cAAc,IAAI,OAAO,CAycxC"}
1
+ {"version":3,"file":"redteam.d.ts","sourceRoot":"","sources":["../../../src/commands/redteam.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAgDH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAwCpC,wBAAgB,cAAc,IAAI,OAAO,CA8exC"}
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAyiBpC,wBAAgB,UAAU,IAAI,OAAO,CAggBpC"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/commands/run.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA0iBpC,wBAAgB,UAAU,IAAI,OAAO,CAwgBpC"}
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Validate command - Validate scenarios without running them
3
+ */
4
+ import { Command } from 'commander';
5
+ export declare function validateCommand(): Command;
6
+ //# sourceMappingURL=validate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validate.d.ts","sourceRoot":"","sources":["../../../src/commands/validate.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAWpC,wBAAgB,eAAe,IAAI,OAAO,CAkHzC"}
@@ -15,6 +15,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
15
15
  embeddingDeploymentName: z.ZodOptional<z.ZodString>;
16
16
  modelFamily: z.ZodOptional<z.ZodString>;
17
17
  underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
18
+ name: z.ZodOptional<z.ZodString>;
19
+ runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
20
+ captureTraces: z.ZodOptional<z.ZodBoolean>;
21
+ captureMessages: z.ZodOptional<z.ZodBoolean>;
18
22
  }, "strip", z.ZodTypeAny, {
19
23
  apiKey?: string | undefined;
20
24
  baseUrl?: string | undefined;
@@ -28,6 +32,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
28
32
  embeddingDeploymentName?: string | undefined;
29
33
  modelFamily?: string | undefined;
30
34
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
35
+ name?: string | undefined;
36
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
37
+ captureTraces?: boolean | undefined;
38
+ captureMessages?: boolean | undefined;
31
39
  }, {
32
40
  apiKey?: string | undefined;
33
41
  baseUrl?: string | undefined;
@@ -41,6 +49,10 @@ declare const ProviderConfigSchema: z.ZodObject<{
41
49
  embeddingDeploymentName?: string | undefined;
42
50
  modelFamily?: string | undefined;
43
51
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
52
+ name?: string | undefined;
53
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
54
+ captureTraces?: boolean | undefined;
55
+ captureMessages?: boolean | undefined;
44
56
  }>;
45
57
  declare const StorageConfigSchema: z.ZodObject<{
46
58
  type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
@@ -78,6 +90,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
78
90
  embeddingDeploymentName: z.ZodOptional<z.ZodString>;
79
91
  modelFamily: z.ZodOptional<z.ZodString>;
80
92
  underlyingProvider: z.ZodOptional<z.ZodEnum<["openai", "azure", "anthropic", "google", "mistral"]>>;
93
+ name: z.ZodOptional<z.ZodString>;
94
+ runnableType: z.ZodOptional<z.ZodEnum<["chain", "agent", "llm", "runnable"]>>;
95
+ captureTraces: z.ZodOptional<z.ZodBoolean>;
96
+ captureMessages: z.ZodOptional<z.ZodBoolean>;
81
97
  }, "strip", z.ZodTypeAny, {
82
98
  apiKey?: string | undefined;
83
99
  baseUrl?: string | undefined;
@@ -91,6 +107,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
91
107
  embeddingDeploymentName?: string | undefined;
92
108
  modelFamily?: string | undefined;
93
109
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
110
+ name?: string | undefined;
111
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
112
+ captureTraces?: boolean | undefined;
113
+ captureMessages?: boolean | undefined;
94
114
  }, {
95
115
  apiKey?: string | undefined;
96
116
  baseUrl?: string | undefined;
@@ -104,6 +124,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
104
124
  embeddingDeploymentName?: string | undefined;
105
125
  modelFamily?: string | undefined;
106
126
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
127
+ name?: string | undefined;
128
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
129
+ captureTraces?: boolean | undefined;
130
+ captureMessages?: boolean | undefined;
107
131
  }>>>;
108
132
  storage: z.ZodOptional<z.ZodObject<{
109
133
  type: z.ZodDefault<z.ZodEnum<["supabase", "local"]>>;
@@ -169,6 +193,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
169
193
  embeddingDeploymentName?: string | undefined;
170
194
  modelFamily?: string | undefined;
171
195
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
196
+ name?: string | undefined;
197
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
198
+ captureTraces?: boolean | undefined;
199
+ captureMessages?: boolean | undefined;
172
200
  }> | undefined;
173
201
  storage?: {
174
202
  type: "supabase" | "local";
@@ -204,6 +232,10 @@ export declare const ArtemisConfigSchema: z.ZodObject<{
204
232
  embeddingDeploymentName?: string | undefined;
205
233
  modelFamily?: string | undefined;
206
234
  underlyingProvider?: "openai" | "anthropic" | "google" | "mistral" | "azure" | undefined;
235
+ name?: string | undefined;
236
+ runnableType?: "chain" | "agent" | "llm" | "runnable" | undefined;
237
+ captureTraces?: boolean | undefined;
238
+ captureMessages?: boolean | undefined;
207
239
  }> | undefined;
208
240
  storage?: {
209
241
  type?: "supabase" | "local" | undefined;
@@ -1 +1 @@
1
- {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/config/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,QAAA,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAwBxB,CAAC;AAEH,QAAA,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAMvB,CAAC;AAcH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAS9B,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CA8ErF;AAsXD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../src/utils/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACrE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,MAAM,WAAW,oBAAoB;IACnC,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yCAAyC;IACzC,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,sCAAsC;IACtC,UAAU,CAAC,EAAE,aAAa,GAAG,IAAI,CAAC;IAClC,wCAAwC;IACxC,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,qCAAqC;IACrC,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AAOD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,oDAAoD;IACpD,aAAa,EAAE,aAAa,CAAC;IAC7B,+DAA+D;IAC/D,cAAc,EAAE,cAAc,CAAC;CAChC;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,oBAAoB,GAAG,mBAAmB,CAsGrF;AAqgBD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,YAAY,CAAA;CAAE,CAK5C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB;IAAE,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAAC,MAAM,EAAE,YAAY,GAAG,SAAS,CAAA;CAAE,CAKjE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EACzB,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,CAAC,EAAE,MAAM,EACjB,aAAa,CAAC,EAAE,MAAM,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,GAAG,SAAS,CAEpB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/cli",
3
- "version": "0.2.3",
3
+ "version": "0.3.0",
4
4
  "description": "Command-line interface for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -45,11 +45,13 @@
45
45
  "test": "bun test"
46
46
  },
47
47
  "dependencies": {
48
- "@artemiskit/adapter-openai": "0.1.10",
49
- "@artemiskit/adapter-vercel-ai": "0.1.10",
50
- "@artemiskit/core": "0.2.3",
51
- "@artemiskit/redteam": "0.2.3",
52
- "@artemiskit/reports": "0.2.3",
48
+ "@artemiskit/adapter-deepagents": "workspace:*",
49
+ "@artemiskit/adapter-langchain": "workspace:*",
50
+ "@artemiskit/adapter-openai": "0.1.12",
51
+ "@artemiskit/adapter-vercel-ai": "0.1.12",
52
+ "@artemiskit/core": "0.3.0",
53
+ "@artemiskit/redteam": "0.3.0",
54
+ "@artemiskit/reports": "0.3.0",
53
55
  "chalk": "^5.3.0",
54
56
  "cli-table3": "^0.6.3",
55
57
  "commander": "^12.0.0",
@@ -62,7 +62,7 @@ describe('UI Components', () => {
62
62
  expect(panel).toContain('TEST RESULTS');
63
63
  });
64
64
 
65
- it('should use box drawing characters', () => {
65
+ it('should render panel with consistent formatting', () => {
66
66
  const panel = renderSummaryPanel({
67
67
  passed: 5,
68
68
  failed: 0,
@@ -71,11 +71,11 @@ describe('UI Components', () => {
71
71
  duration: 5000,
72
72
  });
73
73
 
74
- expect(panel).toContain('╔');
75
- expect(panel).toContain('╗');
76
- expect(panel).toContain('');
77
- expect(panel).toContain('');
78
- expect(panel).toContain('');
74
+ // Panel should contain key information regardless of formatting style
75
+ // (box-drawing in TTY mode, ASCII fallback in non-TTY)
76
+ expect(panel).toContain('Passed');
77
+ expect(panel).toContain('5');
78
+ expect(panel).toContain('100');
79
79
  });
80
80
 
81
81
  it('should support custom title', () => {
@@ -116,16 +116,17 @@ describe('UI Components', () => {
116
116
  expect(error).toContain('Suggestions');
117
117
  });
118
118
 
119
- it('should use box drawing characters', () => {
119
+ it('should render error with consistent formatting', () => {
120
120
  const error = renderError({
121
121
  title: 'Test Error',
122
122
  reason: 'Test reason',
123
123
  });
124
124
 
125
- expect(error).toContain('┌');
126
- expect(error).toContain('┐');
127
- expect(error).toContain('');
128
- expect(error).toContain('');
125
+ // Error should contain key information regardless of formatting style
126
+ // (box-drawing in TTY mode, ASCII fallback in non-TTY)
127
+ expect(error).toContain('ERROR');
128
+ expect(error).toContain('Test Error');
129
+ expect(error).toContain('Test reason');
129
130
  });
130
131
  });
131
132
 
@@ -138,14 +139,13 @@ describe('UI Components', () => {
138
139
  expect(box).toContain('Line 2');
139
140
  });
140
141
 
141
- it('should use box drawing characters', () => {
142
+ it('should render info box with consistent formatting', () => {
142
143
  const box = renderInfoBox('Test', ['content']);
143
144
 
144
- // Uses standard box drawing (may be rounded or square depending on implementation)
145
- expect(box).toContain('┌');
146
- expect(box).toContain('');
147
- expect(box).toContain('');
148
- expect(box).toContain('┘');
145
+ // Info box should contain key information regardless of formatting style
146
+ // (box-drawing in TTY mode, ASCII fallback in non-TTY)
147
+ expect(box).toContain('Test');
148
+ expect(box).toContain('content');
149
149
  });
150
150
  });
151
151
 
package/src/adapters.ts CHANGED
@@ -23,6 +23,36 @@ export async function registerAdapters(): Promise<void> {
23
23
  return new VercelAIAdapter(config);
24
24
  });
25
25
 
26
+ // LangChain adapter - requires runnable via metadata
27
+ adapterRegistry.register('langchain', async (config: AdapterConfig): Promise<ModelClient> => {
28
+ // Dynamic import to avoid bundling LangChain dependencies
29
+ // biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid runnable
30
+ const { LangChainAdapter } = (await import('@artemiskit/adapter-langchain')) as any;
31
+ const runnable = (config as { metadata?: { runnable?: unknown } }).metadata?.runnable;
32
+ if (!runnable) {
33
+ throw new Error(
34
+ 'LangChain adapter requires a runnable instance. ' +
35
+ 'Pass it via config.metadata.runnable or use createLangChainAdapter() directly.'
36
+ );
37
+ }
38
+ return new LangChainAdapter(config, runnable);
39
+ });
40
+
41
+ // DeepAgents adapter - requires system via metadata
42
+ adapterRegistry.register('deepagents', async (config: AdapterConfig): Promise<ModelClient> => {
43
+ // Dynamic import to avoid bundling DeepAgents dependencies
44
+ // biome-ignore lint/suspicious/noExplicitAny: Runtime validation ensures valid system
45
+ const { DeepAgentsAdapter } = (await import('@artemiskit/adapter-deepagents')) as any;
46
+ const system = (config as { metadata?: { system?: unknown } }).metadata?.system;
47
+ if (!system) {
48
+ throw new Error(
49
+ 'DeepAgents adapter requires a system instance. ' +
50
+ 'Pass it via config.metadata.system or use createDeepAgentsAdapter() directly.'
51
+ );
52
+ }
53
+ return new DeepAgentsAdapter(config, system);
54
+ });
55
+
26
56
  // Mark post-MVP adapters as unavailable
27
57
  adapterRegistry.markUnavailable('anthropic', 'Anthropic adapter coming in v0.2.0');
28
58
  adapterRegistry.markUnavailable('google', 'Google adapter coming in v0.3.0');
package/src/cli.ts CHANGED
@@ -12,6 +12,7 @@ import { redteamCommand } from './commands/redteam';
12
12
  import { reportCommand } from './commands/report';
13
13
  import { runCommand } from './commands/run';
14
14
  import { stressCommand } from './commands/stress';
15
+ import { validateCommand } from './commands/validate';
15
16
  import { checkForUpdate, formatUpdateMessage, formatVersionDisplay } from './utils/update-checker';
16
17
 
17
18
  export function createCLI(): Command {
@@ -46,6 +47,7 @@ export function createCLI(): Command {
46
47
 
47
48
  program.addCommand(initCommand());
48
49
  program.addCommand(runCommand());
50
+ program.addCommand(validateCommand());
49
51
  program.addCommand(baselineCommand());
50
52
  program.addCommand(compareCommand());
51
53
  program.addCommand(historyCommand());
@@ -1,5 +1,10 @@
1
1
  /**
2
2
  * Redteam command - Run red-team adversarial tests
3
+ *
4
+ * Supports OWASP LLM Top 10 2025 security testing with new flags:
5
+ * --owasp: Test specific OWASP categories (e.g., --owasp LLM01,LLM05)
6
+ * --owasp-full: Full OWASP compliance scan
7
+ * --min-severity: Filter attacks by minimum severity level
3
8
  */
4
9
 
5
10
  import { mkdir, writeFile } from 'node:fs/promises';
@@ -19,22 +24,32 @@ import {
19
24
  parseScenarioFile,
20
25
  } from '@artemiskit/core';
21
26
  import {
27
+ BadLikertJudgeMutation,
22
28
  type ConversationTurn,
23
29
  CotInjectionMutation,
30
+ CrescendoMutation,
31
+ DeceptiveDelightMutation,
24
32
  EncodingMutation,
33
+ ExcessiveAgencyMutation,
34
+ HallucinationTrapMutation,
25
35
  InstructionFlipMutation,
26
36
  MultiTurnMutation,
27
37
  type Mutation,
38
+ OWASP_CATEGORIES,
39
+ OutputInjectionMutation,
28
40
  RedTeamGenerator,
29
41
  RoleSpoofMutation,
30
42
  SeverityMapper,
43
+ SystemExtractionMutation,
31
44
  TypoMutation,
32
45
  UnsafeResponseDetector,
46
+ getMutationsForCategory,
33
47
  loadCustomAttacks,
34
48
  } from '@artemiskit/redteam';
35
49
  import {
36
50
  generateJSONReport,
37
51
  generateRedTeamHTMLReport,
52
+ generateRedTeamJUnitReport,
38
53
  generateRedTeamMarkdownReport,
39
54
  } from '@artemiskit/reports';
40
55
  import chalk from 'chalk';
@@ -70,8 +85,12 @@ interface RedteamOptions {
70
85
  config?: string;
71
86
  redact?: boolean;
72
87
  redactPatterns?: string[];
73
- export?: 'markdown';
88
+ export?: 'markdown' | 'junit';
74
89
  exportOutput?: string;
90
+ // OWASP options
91
+ owasp?: string[];
92
+ owaspFull?: boolean;
93
+ minSeverity?: 'low' | 'medium' | 'high' | 'critical';
75
94
  }
76
95
 
77
96
  export function redteamCommand(): Command {
@@ -84,7 +103,7 @@ export function redteamCommand(): Command {
84
103
  .option('-m, --model <model>', 'Model to use')
85
104
  .option(
86
105
  '--mutations <mutations...>',
87
- 'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn)'
106
+ 'Mutations to apply (typo, role-spoof, instruction-flip, cot-injection, encoding, multi-turn, bad-likert-judge, crescendo, deceptive-delight, output-injection, excessive-agency, system-extraction, hallucination-trap)'
88
107
  )
89
108
  .option('-c, --count <number>', 'Number of mutated prompts per case', '5')
90
109
  .option('--custom-attacks <path>', 'Path to custom attacks YAML file')
@@ -97,8 +116,18 @@ export function redteamCommand(): Command {
97
116
  '--redact-patterns <patterns...>',
98
117
  'Custom redaction patterns (regex or built-in: email, phone, credit_card, ssn, api_key)'
99
118
  )
100
- .option('--export <format>', 'Export results to format (markdown)')
119
+ .option('--export <format>', 'Export results to format (markdown or junit)')
101
120
  .option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
121
+ // OWASP options
122
+ .option(
123
+ '--owasp <categories...>',
124
+ 'Test specific OWASP LLM Top 10 categories (e.g., LLM01, LLM05, LLM06)'
125
+ )
126
+ .option('--owasp-full', 'Run full OWASP LLM Top 10 compliance scan (all applicable categories)')
127
+ .option(
128
+ '--min-severity <level>',
129
+ 'Minimum severity level for attacks (low, medium, high, critical)'
130
+ )
102
131
  .action(async (scenarioPath: string, options: RedteamOptions) => {
103
132
  const spinner = createSpinner('Loading configuration...');
104
133
  spinner.start();
@@ -144,8 +173,15 @@ export function redteamCommand(): Command {
144
173
  const client = await createAdapter(adapterConfig);
145
174
  spinner.succeed(`Connected to ${provider}`);
146
175
 
147
- // Set up mutations
148
- const mutations = selectMutations(options.mutations, options.customAttacks);
176
+ // Set up mutations - check for OWASP flags first
177
+ const mutations = selectMutations({
178
+ names: options.mutations,
179
+ customAttacksPath: options.customAttacks,
180
+ owaspCategories: options.owasp,
181
+ owaspFull: options.owaspFull,
182
+ minSeverity: options.minSeverity,
183
+ });
184
+
149
185
  const generator = new RedTeamGenerator(mutations);
150
186
  const detector = new UnsafeResponseDetector();
151
187
  const count = Number.parseInt(String(options.count)) || 5;
@@ -157,6 +193,14 @@ export function redteamCommand(): Command {
157
193
  `Prompts per case: ${count}`,
158
194
  `Total cases: ${scenario.cases.length}`,
159
195
  ];
196
+ if (options.owasp || options.owaspFull) {
197
+ configLines.push(
198
+ `OWASP Mode: ${options.owaspFull ? 'Full Compliance Scan' : options.owasp?.join(', ')}`
199
+ );
200
+ }
201
+ if (options.minSeverity) {
202
+ configLines.push(`Min Severity: ${options.minSeverity}`);
203
+ }
160
204
  if (options.redact) {
161
205
  configLines.push(
162
206
  `Redaction: enabled${options.redactPatterns ? ` (${options.redactPatterns.join(', ')})` : ''}`
@@ -416,6 +460,10 @@ export function redteamCommand(): Command {
416
460
  model: resolvedConfig.model,
417
461
  mutations: mutations.map((m) => m.name),
418
462
  count_per_case: count,
463
+ // Include OWASP info in config
464
+ ...(options.owaspFull && { owasp_mode: 'full' }),
465
+ ...(options.owasp && { owasp_categories: options.owasp }),
466
+ ...(options.minSeverity && { min_severity: options.minSeverity }),
419
467
  },
420
468
  resolved_config: resolvedConfig,
421
469
  metrics,
@@ -503,14 +551,22 @@ export function redteamCommand(): Command {
503
551
  console.log(chalk.dim(` JSON: ${jsonPath}`));
504
552
  }
505
553
 
506
- // Export to markdown if requested
507
- if (options.export === 'markdown') {
554
+ // Export if requested
555
+ if (options.export) {
508
556
  const exportDir = options.exportOutput || './artemis-exports';
509
557
  await mkdir(exportDir, { recursive: true });
510
- const markdown = generateRedTeamMarkdownReport(manifest);
511
- const mdPath = join(exportDir, `${runId}.md`);
512
- await writeFile(mdPath, markdown);
513
- console.log(chalk.dim(`Exported: ${mdPath}`));
558
+
559
+ if (options.export === 'markdown') {
560
+ const markdown = generateRedTeamMarkdownReport(manifest);
561
+ const mdPath = join(exportDir, `${runId}.md`);
562
+ await writeFile(mdPath, markdown);
563
+ console.log(chalk.dim(`Exported: ${mdPath}`));
564
+ } else if (options.export === 'junit') {
565
+ const junit = generateRedTeamJUnitReport(manifest);
566
+ const junitPath = join(exportDir, `${runId}.xml`);
567
+ await writeFile(junitPath, junit);
568
+ console.log(chalk.dim(`Exported: ${junitPath}`));
569
+ }
514
570
  }
515
571
 
516
572
  // Exit with error if there were unsafe responses
@@ -533,22 +589,123 @@ export function redteamCommand(): Command {
533
589
  return cmd;
534
590
  }
535
591
 
536
- function selectMutations(names?: string[], customAttacksPath?: string): Mutation[] {
537
- const allMutations: Record<string, Mutation> = {
592
+ /**
593
+ * All available mutations registry
594
+ */
595
+ function getAllMutations(): Record<string, Mutation> {
596
+ return {
597
+ // Core mutations (v0.1.x - v0.2.x)
538
598
  typo: new TypoMutation(),
539
599
  'role-spoof': new RoleSpoofMutation(),
540
600
  'instruction-flip': new InstructionFlipMutation(),
541
601
  'cot-injection': new CotInjectionMutation(),
542
602
  encoding: new EncodingMutation(),
543
603
  'multi-turn': new MultiTurnMutation(),
604
+
605
+ // OWASP LLM Top 10 2025 mutations (v0.3.0)
606
+ // LLM01 - Prompt Injection
607
+ 'bad-likert-judge': new BadLikertJudgeMutation(),
608
+ crescendo: new CrescendoMutation(),
609
+ 'deceptive-delight': new DeceptiveDelightMutation(),
610
+
611
+ // LLM05 - Insecure Output Handling
612
+ 'output-injection': new OutputInjectionMutation(),
613
+
614
+ // LLM06 - Excessive Agency
615
+ 'excessive-agency': new ExcessiveAgencyMutation(),
616
+
617
+ // LLM07 - System Prompt Leakage
618
+ 'system-extraction': new SystemExtractionMutation(),
619
+
620
+ // LLM09 - Misinformation
621
+ 'hallucination-trap': new HallucinationTrapMutation(),
544
622
  };
623
+ }
624
+
625
+ /**
626
+ * Get OWASP mutations for specific categories
627
+ */
628
+ function getOwaspMutations(categories: string[]): string[] {
629
+ const mutationNames = new Set<string>();
630
+
631
+ for (const category of categories) {
632
+ const upperCategory = category.toUpperCase();
633
+ if (upperCategory in OWASP_CATEGORIES) {
634
+ const mutations = getMutationsForCategory(upperCategory as keyof typeof OWASP_CATEGORIES);
635
+ for (const mutation of mutations) {
636
+ mutationNames.add(mutation);
637
+ }
638
+ }
639
+ }
545
640
 
546
- let mutations: Mutation[];
641
+ return Array.from(mutationNames);
642
+ }
547
643
 
548
- if (!names || names.length === 0) {
549
- mutations = Object.values(allMutations);
644
+ /**
645
+ * Get all OWASP mutations
646
+ */
647
+ function getAllOwaspMutations(): string[] {
648
+ return [
649
+ 'bad-likert-judge',
650
+ 'crescendo',
651
+ 'deceptive-delight',
652
+ 'output-injection',
653
+ 'excessive-agency',
654
+ 'system-extraction',
655
+ 'hallucination-trap',
656
+ ];
657
+ }
658
+
659
+ interface SelectMutationsOptions {
660
+ names?: string[];
661
+ customAttacksPath?: string;
662
+ owaspCategories?: string[];
663
+ owaspFull?: boolean;
664
+ minSeverity?: 'low' | 'medium' | 'high' | 'critical';
665
+ }
666
+
667
+ function selectMutations(options: SelectMutationsOptions): Mutation[] {
668
+ const { names, customAttacksPath, owaspCategories, owaspFull, minSeverity } = options;
669
+
670
+ const allMutations = getAllMutations();
671
+ let selectedNames: string[] = [];
672
+
673
+ // Determine which mutations to use based on options
674
+ if (owaspFull) {
675
+ // Full OWASP scan - use all OWASP mutations
676
+ selectedNames = getAllOwaspMutations();
677
+ } else if (owaspCategories && owaspCategories.length > 0) {
678
+ // Specific OWASP categories
679
+ selectedNames = getOwaspMutations(owaspCategories);
680
+ } else if (names && names.length > 0) {
681
+ // Explicit mutation names
682
+ selectedNames = names;
550
683
  } else {
551
- mutations = names.filter((name) => name in allMutations).map((name) => allMutations[name]);
684
+ // Default: use core mutations (not OWASP ones to maintain backward compatibility)
685
+ selectedNames = [
686
+ 'typo',
687
+ 'role-spoof',
688
+ 'instruction-flip',
689
+ 'cot-injection',
690
+ 'encoding',
691
+ 'multi-turn',
692
+ ];
693
+ }
694
+
695
+ // Filter to valid mutation names
696
+ let mutations = selectedNames
697
+ .filter((name) => name in allMutations)
698
+ .map((name) => allMutations[name]);
699
+
700
+ // Apply severity filter if specified
701
+ if (minSeverity) {
702
+ const severityOrder = ['low', 'medium', 'high', 'critical'];
703
+ const minIndex = severityOrder.indexOf(minSeverity);
704
+
705
+ mutations = mutations.filter((m) => {
706
+ const mutationIndex = severityOrder.indexOf(m.severity);
707
+ return mutationIndex >= minIndex;
708
+ });
552
709
  }
553
710
 
554
711
  // Load custom attacks if path provided
@@ -15,7 +15,7 @@ import {
15
15
  resolveScenarioPaths,
16
16
  runScenario,
17
17
  } from '@artemiskit/core';
18
- import { generateMarkdownReport } from '@artemiskit/reports';
18
+ import { generateJUnitReport, generateMarkdownReport } from '@artemiskit/reports';
19
19
  import chalk from 'chalk';
20
20
  import { Command } from 'commander';
21
21
  import { loadConfig } from '../config/loader.js';
@@ -68,8 +68,8 @@ interface RunOptions {
68
68
  threshold?: number;
69
69
  /** Budget limit in USD - fail if cost exceeds this */
70
70
  budget?: number;
71
- /** Export format: markdown */
72
- export?: 'markdown';
71
+ /** Export format: markdown or junit */
72
+ export?: 'markdown' | 'junit';
73
73
  /** Output directory for exports */
74
74
  exportOutput?: string;
75
75
  }
@@ -554,7 +554,8 @@ async function runScenariosInParallel(
554
554
  while (queue.length > 0 || inProgress.size > 0) {
555
555
  // Start new tasks up to the limit
556
556
  while (queue.length > 0 && inProgress.size < parallelLimit) {
557
- const path = queue.shift()!;
557
+ const path = queue.shift();
558
+ if (!path) break;
558
559
  const promise = processScenario(path).then(() => {
559
560
  inProgress.delete(promise);
560
561
  });
@@ -607,7 +608,7 @@ export function runCommand(): Command {
607
608
  .option('--baseline', 'Compare against baseline and detect regression')
608
609
  .option('--threshold <number>', 'Regression threshold (0-1), e.g., 0.05 for 5%', '0.05')
609
610
  .option('--budget <amount>', 'Maximum budget in USD - fail if estimated cost exceeds this')
610
- .option('--export <format>', 'Export format: markdown')
611
+ .option('--export <format>', 'Export format: markdown or junit (for CI integration)')
611
612
  .option('--export-output <dir>', 'Output directory for exports (default: ./artemis-exports)')
612
613
  .action(async (scenarioPath: string | undefined, options: RunOptions) => {
613
614
  // Determine CI mode: explicit flag, environment variable, or summary format that implies CI
@@ -819,14 +820,22 @@ export function runCommand(): Command {
819
820
  console.log(chalk.dim(`Saved: ${savedPath}`));
820
821
  }
821
822
 
822
- // Export to markdown if requested
823
- if (options.export === 'markdown') {
823
+ // Export if requested
824
+ if (options.export) {
824
825
  const exportDir = options.exportOutput || './artemis-exports';
825
826
  await mkdir(exportDir, { recursive: true });
826
- const markdown = generateMarkdownReport(result.manifest);
827
- const mdPath = join(exportDir, `${result.manifest.run_id}.md`);
828
- await writeFile(mdPath, markdown);
829
- console.log(chalk.dim(`Exported: ${mdPath}`));
827
+
828
+ if (options.export === 'markdown') {
829
+ const markdown = generateMarkdownReport(result.manifest);
830
+ const mdPath = join(exportDir, `${result.manifest.run_id}.md`);
831
+ await writeFile(mdPath, markdown);
832
+ console.log(chalk.dim(`Exported: ${mdPath}`));
833
+ } else if (options.export === 'junit') {
834
+ const junit = generateJUnitReport(result.manifest);
835
+ const junitPath = join(exportDir, `${result.manifest.run_id}.xml`);
836
+ await writeFile(junitPath, junit);
837
+ console.log(chalk.dim(`Exported: ${junitPath}`));
838
+ }
830
839
  }
831
840
  } catch (error) {
832
841
  // Record failed scenario