@artemiskit/reports 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +77 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +457 -0
- package/dist/junit/generator.d.ts +44 -0
- package/dist/junit/generator.d.ts.map +1 -0
- package/dist/markdown/generator.d.ts +21 -0
- package/dist/markdown/generator.d.ts.map +1 -0
- package/package.json +2 -2
- package/src/index.ts +15 -0
- package/src/junit/generator.ts +350 -0
- package/src/markdown/generator.ts +360 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JUnit XML Report Generator
|
|
3
|
+
*
|
|
4
|
+
* Generates JUnit-compatible XML reports for CI/CD integration.
|
|
5
|
+
* Follows the JUnit XML format specification for compatibility with
|
|
6
|
+
* Jenkins, GitHub Actions, GitLab CI, and other CI systems.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
|
|
10
|
+
|
|
11
|
+
export interface JUnitReportOptions {
|
|
12
|
+
/** Test suite name (defaults to scenario name) */
|
|
13
|
+
suiteName?: string;
|
|
14
|
+
/** Include system-out with response content */
|
|
15
|
+
includeSystemOut?: boolean;
|
|
16
|
+
/** Include system-err with error details */
|
|
17
|
+
includeSystemErr?: boolean;
|
|
18
|
+
/** Maximum content length for outputs */
|
|
19
|
+
maxOutputLength?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Escape special XML characters
|
|
24
|
+
*/
|
|
25
|
+
function escapeXml(str: string): string {
|
|
26
|
+
// Remove invalid XML control characters (0x00-0x08, 0x0B, 0x0C, 0x0E-0x1F)
|
|
27
|
+
// These are not allowed in XML 1.0 and would cause parsing errors
|
|
28
|
+
// biome-ignore lint/suspicious/noControlCharactersInRegex: Required to strip invalid XML chars
|
|
29
|
+
const invalidXmlChars = /[\x00-\x08\x0B\x0C\x0E-\x1F]/g;
|
|
30
|
+
|
|
31
|
+
return str
|
|
32
|
+
.replace(/&/g, '&')
|
|
33
|
+
.replace(/</g, '<')
|
|
34
|
+
.replace(/>/g, '>')
|
|
35
|
+
.replace(/"/g, '"')
|
|
36
|
+
.replace(/'/g, ''')
|
|
37
|
+
.replace(invalidXmlChars, '');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Truncate text to maximum length
|
|
42
|
+
*/
|
|
43
|
+
function truncate(text: string, maxLength: number): string {
|
|
44
|
+
if (text.length <= maxLength) return text;
|
|
45
|
+
return `${text.slice(0, maxLength)}...(truncated)`;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Format timestamp as ISO 8601
|
|
50
|
+
*/
|
|
51
|
+
function formatTimestamp(dateStr: string): string {
|
|
52
|
+
return new Date(dateStr).toISOString();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Generate JUnit XML report for a standard run
|
|
57
|
+
*/
|
|
58
|
+
export function generateJUnitReport(
|
|
59
|
+
manifest: RunManifest,
|
|
60
|
+
options: JUnitReportOptions = {}
|
|
61
|
+
): string {
|
|
62
|
+
const {
|
|
63
|
+
suiteName = manifest.config.scenario,
|
|
64
|
+
includeSystemOut = true,
|
|
65
|
+
includeSystemErr = true,
|
|
66
|
+
maxOutputLength = 2000,
|
|
67
|
+
} = options;
|
|
68
|
+
|
|
69
|
+
const lines: string[] = [];
|
|
70
|
+
|
|
71
|
+
// XML declaration
|
|
72
|
+
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
73
|
+
|
|
74
|
+
// Calculate totals
|
|
75
|
+
const tests = manifest.metrics.total_cases;
|
|
76
|
+
const failures = manifest.metrics.failed_cases;
|
|
77
|
+
const errors = 0; // We treat all failures as failures, not errors
|
|
78
|
+
const skipped = 0;
|
|
79
|
+
const time = manifest.duration_ms / 1000; // JUnit uses seconds
|
|
80
|
+
|
|
81
|
+
// Root testsuite element
|
|
82
|
+
lines.push(
|
|
83
|
+
`<testsuite name="${escapeXml(suiteName)}" ` +
|
|
84
|
+
`tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` +
|
|
85
|
+
`time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
// Properties
|
|
89
|
+
lines.push(' <properties>');
|
|
90
|
+
lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
|
|
91
|
+
lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
|
|
92
|
+
lines.push(
|
|
93
|
+
` <property name="artemis.provider" value="${escapeXml(manifest.config.provider)}" />`
|
|
94
|
+
);
|
|
95
|
+
if (manifest.config.model) {
|
|
96
|
+
lines.push(` <property name="artemis.model" value="${escapeXml(manifest.config.model)}" />`);
|
|
97
|
+
}
|
|
98
|
+
lines.push(
|
|
99
|
+
` <property name="artemis.success_rate" value="${(manifest.metrics.success_rate * 100).toFixed(1)}%" />`
|
|
100
|
+
);
|
|
101
|
+
lines.push(
|
|
102
|
+
` <property name="artemis.total_tokens" value="${manifest.metrics.total_tokens}" />`
|
|
103
|
+
);
|
|
104
|
+
if (manifest.metrics.cost) {
|
|
105
|
+
lines.push(
|
|
106
|
+
` <property name="artemis.cost_usd" value="${manifest.metrics.cost.total_usd.toFixed(6)}" />`
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
lines.push(' </properties>');
|
|
110
|
+
|
|
111
|
+
// Test cases
|
|
112
|
+
for (const testCase of manifest.cases) {
|
|
113
|
+
const className = escapeXml(suiteName);
|
|
114
|
+
const testName = escapeXml(testCase.id);
|
|
115
|
+
const testTime = testCase.latencyMs / 1000;
|
|
116
|
+
|
|
117
|
+
lines.push(
|
|
118
|
+
` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
if (!testCase.ok) {
|
|
122
|
+
// Failed test
|
|
123
|
+
const failureMessage = escapeXml(testCase.reason || 'Test failed');
|
|
124
|
+
const failureType = escapeXml(testCase.matcherType);
|
|
125
|
+
|
|
126
|
+
lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
|
|
127
|
+
|
|
128
|
+
// Include details in failure content
|
|
129
|
+
const details: string[] = [];
|
|
130
|
+
details.push(`Matcher Type: ${testCase.matcherType}`);
|
|
131
|
+
details.push(`Expected: ${JSON.stringify(testCase.expected, null, 2)}`);
|
|
132
|
+
details.push(`Score: ${(testCase.score * 100).toFixed(1)}%`);
|
|
133
|
+
if (testCase.reason) {
|
|
134
|
+
details.push(`Reason: ${testCase.reason}`);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
lines.push(escapeXml(details.join('\n')));
|
|
138
|
+
lines.push(' </failure>');
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// System out (response)
|
|
142
|
+
if (includeSystemOut && testCase.response) {
|
|
143
|
+
lines.push(' <system-out>');
|
|
144
|
+
lines.push(`<![CDATA[${truncate(testCase.response, maxOutputLength)}]]>`);
|
|
145
|
+
lines.push(' </system-out>');
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// System err (error details for failed tests)
|
|
149
|
+
if (includeSystemErr && !testCase.ok && testCase.reason) {
|
|
150
|
+
lines.push(' <system-err>');
|
|
151
|
+
const errorDetails: string[] = [];
|
|
152
|
+
errorDetails.push(`Error: ${testCase.reason}`);
|
|
153
|
+
const promptStr =
|
|
154
|
+
typeof testCase.prompt === 'string' ? testCase.prompt : JSON.stringify(testCase.prompt);
|
|
155
|
+
errorDetails.push(`Prompt: ${truncate(promptStr, maxOutputLength / 2)}`);
|
|
156
|
+
lines.push(`<![CDATA[${errorDetails.join('\n')}]]>`);
|
|
157
|
+
lines.push(' </system-err>');
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
lines.push(' </testcase>');
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Close testsuite
|
|
164
|
+
lines.push('</testsuite>');
|
|
165
|
+
|
|
166
|
+
return lines.join('\n');
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Generate JUnit XML report for red team results
|
|
171
|
+
*/
|
|
172
|
+
export function generateRedTeamJUnitReport(
|
|
173
|
+
manifest: RedTeamManifest,
|
|
174
|
+
options: JUnitReportOptions = {}
|
|
175
|
+
): string {
|
|
176
|
+
const {
|
|
177
|
+
suiteName = `RedTeam: ${manifest.config.scenario}`,
|
|
178
|
+
includeSystemOut = true,
|
|
179
|
+
includeSystemErr = true,
|
|
180
|
+
maxOutputLength = 2000,
|
|
181
|
+
} = options;
|
|
182
|
+
|
|
183
|
+
const lines: string[] = [];
|
|
184
|
+
|
|
185
|
+
// XML declaration
|
|
186
|
+
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
187
|
+
|
|
188
|
+
// Calculate totals - unsafe responses are failures
|
|
189
|
+
const tests = manifest.metrics.total_tests;
|
|
190
|
+
const failures = manifest.metrics.unsafe_responses;
|
|
191
|
+
const errors = manifest.metrics.error_responses;
|
|
192
|
+
const skipped = 0;
|
|
193
|
+
const time = manifest.duration_ms / 1000;
|
|
194
|
+
|
|
195
|
+
// Root testsuite element
|
|
196
|
+
lines.push(
|
|
197
|
+
`<testsuite name="${escapeXml(suiteName)}" ` +
|
|
198
|
+
`tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` +
|
|
199
|
+
`time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`
|
|
200
|
+
);
|
|
201
|
+
|
|
202
|
+
// Properties
|
|
203
|
+
lines.push(' <properties>');
|
|
204
|
+
lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
|
|
205
|
+
lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
|
|
206
|
+
lines.push(` <property name="artemis.test_type" value="redteam" />`);
|
|
207
|
+
lines.push(
|
|
208
|
+
` <property name="artemis.defense_rate" value="${(manifest.metrics.defense_rate * 100).toFixed(1)}%" />`
|
|
209
|
+
);
|
|
210
|
+
lines.push(
|
|
211
|
+
` <property name="artemis.safe_responses" value="${manifest.metrics.safe_responses}" />`
|
|
212
|
+
);
|
|
213
|
+
lines.push(
|
|
214
|
+
` <property name="artemis.blocked_responses" value="${manifest.metrics.blocked_responses}" />`
|
|
215
|
+
);
|
|
216
|
+
lines.push(
|
|
217
|
+
` <property name="artemis.unsafe_responses" value="${manifest.metrics.unsafe_responses}" />`
|
|
218
|
+
);
|
|
219
|
+
lines.push(' </properties>');
|
|
220
|
+
|
|
221
|
+
// Test cases
|
|
222
|
+
for (const result of manifest.results) {
|
|
223
|
+
const className = escapeXml(suiteName);
|
|
224
|
+
const testName = escapeXml(`${result.caseId} (${result.mutation})`);
|
|
225
|
+
const testTime = (result.latencyMs || 0) / 1000;
|
|
226
|
+
|
|
227
|
+
lines.push(
|
|
228
|
+
` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`
|
|
229
|
+
);
|
|
230
|
+
|
|
231
|
+
if (result.status === 'unsafe') {
|
|
232
|
+
// Security vulnerability found - this is a failure
|
|
233
|
+
const failureMessage = escapeXml(`Security vulnerability: ${result.mutation}`);
|
|
234
|
+
const failureType = escapeXml(`severity:${result.severity}`);
|
|
235
|
+
|
|
236
|
+
lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
|
|
237
|
+
|
|
238
|
+
const details: string[] = [];
|
|
239
|
+
details.push(`Severity: ${result.severity.toUpperCase()}`);
|
|
240
|
+
details.push(`Mutation: ${result.mutation}`);
|
|
241
|
+
if (result.reasons.length > 0) {
|
|
242
|
+
details.push(`Reasons: ${result.reasons.join(', ')}`);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
lines.push(escapeXml(details.join('\n')));
|
|
246
|
+
lines.push(' </failure>');
|
|
247
|
+
} else if (result.status === 'error') {
|
|
248
|
+
// Error during test
|
|
249
|
+
lines.push(
|
|
250
|
+
` <error message="${escapeXml(result.response || 'Error during test')}" type="error">`
|
|
251
|
+
);
|
|
252
|
+
lines.push(escapeXml(`Attack: ${result.mutation}\nCase: ${result.caseId}`));
|
|
253
|
+
lines.push(' </error>');
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// System out (response)
|
|
257
|
+
if (includeSystemOut && result.response) {
|
|
258
|
+
lines.push(' <system-out>');
|
|
259
|
+
lines.push(`<![CDATA[${truncate(result.response, maxOutputLength)}]]>`);
|
|
260
|
+
lines.push(' </system-out>');
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// System err (attack prompt for reference)
|
|
264
|
+
if (includeSystemErr && result.status === 'unsafe') {
|
|
265
|
+
lines.push(' <system-err>');
|
|
266
|
+
const errDetails: string[] = [];
|
|
267
|
+
errDetails.push(`Attack Prompt: ${truncate(result.prompt, maxOutputLength / 2)}`);
|
|
268
|
+
errDetails.push(`Severity: ${result.severity.toUpperCase()}`);
|
|
269
|
+
lines.push(`<![CDATA[${errDetails.join('\n')}]]>`);
|
|
270
|
+
lines.push(' </system-err>');
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
lines.push(' </testcase>');
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Close testsuite
|
|
277
|
+
lines.push('</testsuite>');
|
|
278
|
+
|
|
279
|
+
return lines.join('\n');
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Generate JUnit XML for validation results
|
|
284
|
+
*/
|
|
285
|
+
export function generateValidationJUnitReport(
|
|
286
|
+
results: Array<{
|
|
287
|
+
file: string;
|
|
288
|
+
valid: boolean;
|
|
289
|
+
errors: Array<{ line: number; message: string; rule: string }>;
|
|
290
|
+
warnings: Array<{ line: number; message: string; rule: string }>;
|
|
291
|
+
}>,
|
|
292
|
+
options: JUnitReportOptions = {}
|
|
293
|
+
): string {
|
|
294
|
+
const { suiteName = 'ArtemisKit Validation' } = options;
|
|
295
|
+
|
|
296
|
+
const lines: string[] = [];
|
|
297
|
+
|
|
298
|
+
// XML declaration
|
|
299
|
+
lines.push('<?xml version="1.0" encoding="UTF-8"?>');
|
|
300
|
+
|
|
301
|
+
// Calculate totals
|
|
302
|
+
const tests = results.length;
|
|
303
|
+
const failures = results.filter((r) => !r.valid).length;
|
|
304
|
+
const errors = 0;
|
|
305
|
+
const skipped = 0;
|
|
306
|
+
|
|
307
|
+
// Root testsuite element
|
|
308
|
+
lines.push(
|
|
309
|
+
`<testsuite name="${escapeXml(suiteName)}" ` +
|
|
310
|
+
`tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` +
|
|
311
|
+
`time="0" timestamp="${new Date().toISOString()}">`
|
|
312
|
+
);
|
|
313
|
+
|
|
314
|
+
// Test cases
|
|
315
|
+
for (const result of results) {
|
|
316
|
+
const className = escapeXml(suiteName);
|
|
317
|
+
const testName = escapeXml(result.file);
|
|
318
|
+
|
|
319
|
+
lines.push(` <testcase classname="${className}" name="${testName}" time="0">`);
|
|
320
|
+
|
|
321
|
+
if (!result.valid) {
|
|
322
|
+
const errorMessages = result.errors.map((e) => `Line ${e.line}: ${e.message}`).join('; ');
|
|
323
|
+
lines.push(` <failure message="${escapeXml(errorMessages)}" type="validation">`);
|
|
324
|
+
|
|
325
|
+
const details: string[] = [];
|
|
326
|
+
for (const error of result.errors) {
|
|
327
|
+
details.push(`[${error.rule}] Line ${error.line}: ${error.message}`);
|
|
328
|
+
}
|
|
329
|
+
lines.push(escapeXml(details.join('\n')));
|
|
330
|
+
lines.push(' </failure>');
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Warnings as system-err
|
|
334
|
+
if (result.warnings.length > 0) {
|
|
335
|
+
lines.push(' <system-err>');
|
|
336
|
+
const warningDetails = result.warnings
|
|
337
|
+
.map((w) => `[${w.rule}] Line ${w.line}: ${w.message}`)
|
|
338
|
+
.join('\n');
|
|
339
|
+
lines.push(`<![CDATA[Warnings:\n${warningDetails}]]>`);
|
|
340
|
+
lines.push(' </system-err>');
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
lines.push(' </testcase>');
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Close testsuite
|
|
347
|
+
lines.push('</testsuite>');
|
|
348
|
+
|
|
349
|
+
return lines.join('\n');
|
|
350
|
+
}
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown Report Generator
|
|
3
|
+
*
|
|
4
|
+
* Generates documentation-friendly markdown reports for compliance and audit trails.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
|
|
8
|
+
|
|
9
|
+
export interface MarkdownReportOptions {
|
|
10
|
+
/** Include full prompt/response details for failed cases */
|
|
11
|
+
includeDetails?: boolean;
|
|
12
|
+
/** Maximum characters to show for prompts/responses */
|
|
13
|
+
truncateAt?: number;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Truncate text to a maximum length
|
|
18
|
+
*/
|
|
19
|
+
function truncate(text: string, maxLength: number): string {
|
|
20
|
+
if (text.length <= maxLength) return text;
|
|
21
|
+
return `${text.slice(0, maxLength)}...`;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Format cost for display
|
|
26
|
+
*/
|
|
27
|
+
function formatCostMd(costUsd: number): string {
|
|
28
|
+
if (costUsd < 0.01) {
|
|
29
|
+
return `$${(costUsd * 100).toFixed(4)} cents`;
|
|
30
|
+
}
|
|
31
|
+
if (costUsd < 1) {
|
|
32
|
+
return `$${costUsd.toFixed(4)}`;
|
|
33
|
+
}
|
|
34
|
+
return `$${costUsd.toFixed(2)}`;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Format duration in milliseconds to human-readable string
|
|
39
|
+
*/
|
|
40
|
+
function formatDuration(ms: number): string {
|
|
41
|
+
if (ms < 1000) return `${ms}ms`;
|
|
42
|
+
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
|
43
|
+
const minutes = Math.floor(ms / 60000);
|
|
44
|
+
const seconds = ((ms % 60000) / 1000).toFixed(0);
|
|
45
|
+
return `${minutes}m ${seconds}s`;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Generate markdown report for a standard run
|
|
50
|
+
*/
|
|
51
|
+
export function generateMarkdownReport(
|
|
52
|
+
manifest: RunManifest,
|
|
53
|
+
options: MarkdownReportOptions = {}
|
|
54
|
+
): string {
|
|
55
|
+
const { includeDetails = true, truncateAt = 500 } = options;
|
|
56
|
+
const lines: string[] = [];
|
|
57
|
+
|
|
58
|
+
// Header
|
|
59
|
+
lines.push('# ArtemisKit Test Results');
|
|
60
|
+
lines.push('');
|
|
61
|
+
lines.push(`**Scenario:** ${manifest.config.scenario}`);
|
|
62
|
+
lines.push(`**Run ID:** ${manifest.run_id}`);
|
|
63
|
+
lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
|
|
64
|
+
lines.push(
|
|
65
|
+
`**Provider:** ${manifest.config.provider}${manifest.config.model ? ` (${manifest.config.model})` : ''}`
|
|
66
|
+
);
|
|
67
|
+
lines.push('');
|
|
68
|
+
lines.push('---');
|
|
69
|
+
lines.push('');
|
|
70
|
+
|
|
71
|
+
// Summary table
|
|
72
|
+
lines.push('## Summary');
|
|
73
|
+
lines.push('');
|
|
74
|
+
lines.push('| Metric | Value |');
|
|
75
|
+
lines.push('|--------|-------|');
|
|
76
|
+
lines.push(`| Total Cases | ${manifest.metrics.total_cases} |`);
|
|
77
|
+
lines.push(
|
|
78
|
+
`| Passed | ${manifest.metrics.passed_cases} (${(manifest.metrics.success_rate * 100).toFixed(1)}%) |`
|
|
79
|
+
);
|
|
80
|
+
lines.push(`| Failed | ${manifest.metrics.failed_cases} |`);
|
|
81
|
+
lines.push(`| Duration | ${formatDuration(manifest.duration_ms)} |`);
|
|
82
|
+
lines.push(`| Median Latency | ${manifest.metrics.median_latency_ms}ms |`);
|
|
83
|
+
lines.push(`| P95 Latency | ${manifest.metrics.p95_latency_ms}ms |`);
|
|
84
|
+
lines.push(`| Total Tokens | ${manifest.metrics.total_tokens.toLocaleString()} |`);
|
|
85
|
+
|
|
86
|
+
if (manifest.metrics.cost) {
|
|
87
|
+
lines.push(`| Estimated Cost | ${formatCostMd(manifest.metrics.cost.total_usd)} |`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
lines.push('');
|
|
91
|
+
lines.push('---');
|
|
92
|
+
lines.push('');
|
|
93
|
+
|
|
94
|
+
// Results by case
|
|
95
|
+
lines.push('## Results by Case');
|
|
96
|
+
lines.push('');
|
|
97
|
+
|
|
98
|
+
// Passed cases (collapsed)
|
|
99
|
+
const passed = manifest.cases.filter((c) => c.ok);
|
|
100
|
+
lines.push(`### Passed (${passed.length})`);
|
|
101
|
+
lines.push('');
|
|
102
|
+
|
|
103
|
+
if (passed.length > 0) {
|
|
104
|
+
lines.push('<details>');
|
|
105
|
+
lines.push('<summary>Click to expand passed cases</summary>');
|
|
106
|
+
lines.push('');
|
|
107
|
+
lines.push('| Case ID | Latency | Tokens | Score |');
|
|
108
|
+
lines.push('|---------|---------|--------|-------|');
|
|
109
|
+
for (const c of passed) {
|
|
110
|
+
lines.push(
|
|
111
|
+
`| ${c.id} | ${formatDuration(c.latencyMs)} | ${c.tokens?.total || '-'} | ${(c.score * 100).toFixed(0)}% |`
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
lines.push('');
|
|
115
|
+
lines.push('</details>');
|
|
116
|
+
} else {
|
|
117
|
+
lines.push('_No passed cases_');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
lines.push('');
|
|
121
|
+
|
|
122
|
+
// Failed cases (expanded with details)
|
|
123
|
+
const failed = manifest.cases.filter((c) => !c.ok);
|
|
124
|
+
lines.push(`### Failed (${failed.length})`);
|
|
125
|
+
lines.push('');
|
|
126
|
+
|
|
127
|
+
if (failed.length > 0) {
|
|
128
|
+
for (const c of failed) {
|
|
129
|
+
lines.push(`#### \`${c.id}\``);
|
|
130
|
+
lines.push('');
|
|
131
|
+
|
|
132
|
+
if (includeDetails) {
|
|
133
|
+
// Prompt
|
|
134
|
+
const promptStr =
|
|
135
|
+
typeof c.prompt === 'string' ? c.prompt : JSON.stringify(c.prompt, null, 2);
|
|
136
|
+
lines.push('**Prompt:**');
|
|
137
|
+
lines.push('```');
|
|
138
|
+
lines.push(truncate(promptStr, truncateAt));
|
|
139
|
+
lines.push('```');
|
|
140
|
+
lines.push('');
|
|
141
|
+
|
|
142
|
+
// Expected
|
|
143
|
+
lines.push('**Expected:**');
|
|
144
|
+
lines.push(`- Type: \`${c.matcherType}\``);
|
|
145
|
+
lines.push('```json');
|
|
146
|
+
lines.push(truncate(JSON.stringify(c.expected, null, 2), truncateAt));
|
|
147
|
+
lines.push('```');
|
|
148
|
+
lines.push('');
|
|
149
|
+
|
|
150
|
+
// Actual response
|
|
151
|
+
lines.push('**Actual Response:**');
|
|
152
|
+
lines.push('```');
|
|
153
|
+
lines.push(truncate(c.response || '(empty)', truncateAt));
|
|
154
|
+
lines.push('```');
|
|
155
|
+
lines.push('');
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Reason
|
|
159
|
+
lines.push(`**Reason:** ${c.reason || 'Unknown'}`);
|
|
160
|
+
lines.push('');
|
|
161
|
+
lines.push('---');
|
|
162
|
+
lines.push('');
|
|
163
|
+
}
|
|
164
|
+
} else {
|
|
165
|
+
lines.push('_No failed cases_');
|
|
166
|
+
lines.push('');
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Configuration section
|
|
170
|
+
if (manifest.resolved_config) {
|
|
171
|
+
lines.push('## Configuration');
|
|
172
|
+
lines.push('');
|
|
173
|
+
lines.push('```yaml');
|
|
174
|
+
lines.push(`provider: ${manifest.resolved_config.provider}`);
|
|
175
|
+
if (manifest.resolved_config.model) {
|
|
176
|
+
lines.push(`model: ${manifest.resolved_config.model}`);
|
|
177
|
+
}
|
|
178
|
+
if (manifest.resolved_config.temperature !== undefined) {
|
|
179
|
+
lines.push(`temperature: ${manifest.resolved_config.temperature}`);
|
|
180
|
+
}
|
|
181
|
+
if (manifest.resolved_config.max_tokens !== undefined) {
|
|
182
|
+
lines.push(`max_tokens: ${manifest.resolved_config.max_tokens}`);
|
|
183
|
+
}
|
|
184
|
+
lines.push('```');
|
|
185
|
+
lines.push('');
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Redaction info
|
|
189
|
+
if (manifest.redaction?.enabled) {
|
|
190
|
+
lines.push('## Redaction');
|
|
191
|
+
lines.push('');
|
|
192
|
+
lines.push(`- **Patterns Used:** ${manifest.redaction.patternsUsed.join(', ')}`);
|
|
193
|
+
lines.push(`- **Prompts Redacted:** ${manifest.redaction.summary.promptsRedacted}`);
|
|
194
|
+
lines.push(`- **Responses Redacted:** ${manifest.redaction.summary.responsesRedacted}`);
|
|
195
|
+
lines.push(`- **Total Redactions:** ${manifest.redaction.summary.totalRedactions}`);
|
|
196
|
+
lines.push('');
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Footer
|
|
200
|
+
lines.push('---');
|
|
201
|
+
lines.push('');
|
|
202
|
+
lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
|
|
203
|
+
|
|
204
|
+
return lines.join('\n');
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Generate markdown report for red team results
|
|
209
|
+
*/
|
|
210
|
+
export function generateRedTeamMarkdownReport(
|
|
211
|
+
manifest: RedTeamManifest,
|
|
212
|
+
options: MarkdownReportOptions = {}
|
|
213
|
+
): string {
|
|
214
|
+
const { includeDetails = true, truncateAt = 500 } = options;
|
|
215
|
+
const lines: string[] = [];
|
|
216
|
+
|
|
217
|
+
// Header
|
|
218
|
+
lines.push('# ArtemisKit Security Report');
|
|
219
|
+
lines.push('');
|
|
220
|
+
lines.push(`**Scenario:** ${manifest.config.scenario}`);
|
|
221
|
+
lines.push(`**Run ID:** ${manifest.run_id}`);
|
|
222
|
+
lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
|
|
223
|
+
lines.push('**Test Type:** Red Team Security Scan');
|
|
224
|
+
lines.push('');
|
|
225
|
+
lines.push('---');
|
|
226
|
+
lines.push('');
|
|
227
|
+
|
|
228
|
+
// Security Summary
|
|
229
|
+
const testable = manifest.metrics.total_tests - manifest.metrics.error_responses;
|
|
230
|
+
const defenseRate = manifest.metrics.defense_rate * 100;
|
|
231
|
+
|
|
232
|
+
// Determine risk level
|
|
233
|
+
let riskLevel: string;
|
|
234
|
+
let riskEmoji: string;
|
|
235
|
+
if (defenseRate >= 95) {
|
|
236
|
+
riskLevel = 'LOW';
|
|
237
|
+
riskEmoji = '';
|
|
238
|
+
} else if (defenseRate >= 80) {
|
|
239
|
+
riskLevel = 'MEDIUM';
|
|
240
|
+
riskEmoji = '';
|
|
241
|
+
} else if (defenseRate >= 50) {
|
|
242
|
+
riskLevel = 'HIGH';
|
|
243
|
+
riskEmoji = '';
|
|
244
|
+
} else {
|
|
245
|
+
riskLevel = 'CRITICAL';
|
|
246
|
+
riskEmoji = '';
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
lines.push('## Security Summary');
|
|
250
|
+
lines.push('');
|
|
251
|
+
lines.push('| Metric | Value |');
|
|
252
|
+
lines.push('|--------|-------|');
|
|
253
|
+
lines.push(`| Defense Rate | ${defenseRate.toFixed(1)}% |`);
|
|
254
|
+
lines.push(`| Total Attacks | ${manifest.metrics.total_tests} |`);
|
|
255
|
+
lines.push(`| Defended | ${manifest.metrics.defended} |`);
|
|
256
|
+
lines.push(`| Safe Responses | ${manifest.metrics.safe_responses} |`);
|
|
257
|
+
lines.push(`| Blocked | ${manifest.metrics.blocked_responses} |`);
|
|
258
|
+
lines.push(`| Vulnerabilities | ${manifest.metrics.unsafe_responses} |`);
|
|
259
|
+
lines.push(`| Errors | ${manifest.metrics.error_responses} |`);
|
|
260
|
+
lines.push(`| Risk Level | **${riskEmoji} ${riskLevel}** |`);
|
|
261
|
+
lines.push('');
|
|
262
|
+
lines.push('---');
|
|
263
|
+
lines.push('');
|
|
264
|
+
|
|
265
|
+
// Severity Breakdown
|
|
266
|
+
const { by_severity } = manifest.metrics;
|
|
267
|
+
if (manifest.metrics.unsafe_responses > 0) {
|
|
268
|
+
lines.push('## Severity Breakdown');
|
|
269
|
+
lines.push('');
|
|
270
|
+
lines.push('| Severity | Count | Description |');
|
|
271
|
+
lines.push('|----------|-------|-------------|');
|
|
272
|
+
if (by_severity.critical > 0) {
|
|
273
|
+
lines.push(`| Critical | ${by_severity.critical} | Severe security vulnerability |`);
|
|
274
|
+
}
|
|
275
|
+
if (by_severity.high > 0) {
|
|
276
|
+
lines.push(`| High | ${by_severity.high} | Significant security concern |`);
|
|
277
|
+
}
|
|
278
|
+
if (by_severity.medium > 0) {
|
|
279
|
+
lines.push(`| Medium | ${by_severity.medium} | Moderate security issue |`);
|
|
280
|
+
}
|
|
281
|
+
if (by_severity.low > 0) {
|
|
282
|
+
lines.push(`| Low | ${by_severity.low} | Minor security concern |`);
|
|
283
|
+
}
|
|
284
|
+
lines.push('');
|
|
285
|
+
lines.push('---');
|
|
286
|
+
lines.push('');
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Vulnerabilities Found
|
|
290
|
+
const unsafeResults = manifest.results.filter((r) => r.status === 'unsafe');
|
|
291
|
+
if (unsafeResults.length > 0) {
|
|
292
|
+
lines.push('## Vulnerabilities Found');
|
|
293
|
+
lines.push('');
|
|
294
|
+
|
|
295
|
+
for (let i = 0; i < unsafeResults.length; i++) {
|
|
296
|
+
const result = unsafeResults[i];
|
|
297
|
+
lines.push(`### ${i + 1}. ${result.mutation} (${result.severity.toUpperCase()})`);
|
|
298
|
+
lines.push('');
|
|
299
|
+
lines.push(`**Case ID:** ${result.caseId}`);
|
|
300
|
+
lines.push(`**Severity:** ${result.severity.toUpperCase()}`);
|
|
301
|
+
lines.push(`**Mutation:** ${result.mutation}`);
|
|
302
|
+
lines.push('');
|
|
303
|
+
|
|
304
|
+
if (includeDetails) {
|
|
305
|
+
lines.push('**Attack Prompt:**');
|
|
306
|
+
lines.push('```');
|
|
307
|
+
lines.push(truncate(result.prompt, truncateAt));
|
|
308
|
+
lines.push('```');
|
|
309
|
+
lines.push('');
|
|
310
|
+
|
|
311
|
+
lines.push('**Model Response:**');
|
|
312
|
+
lines.push('```');
|
|
313
|
+
lines.push(truncate(result.response || '(empty)', truncateAt));
|
|
314
|
+
lines.push('```');
|
|
315
|
+
lines.push('');
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (result.reasons.length > 0) {
|
|
319
|
+
lines.push('**Reasons:**');
|
|
320
|
+
for (const reason of result.reasons) {
|
|
321
|
+
lines.push(`- ${reason}`);
|
|
322
|
+
}
|
|
323
|
+
lines.push('');
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
lines.push('---');
|
|
327
|
+
lines.push('');
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Recommendations
|
|
332
|
+
lines.push('## Recommendations');
|
|
333
|
+
lines.push('');
|
|
334
|
+
|
|
335
|
+
if (manifest.metrics.unsafe_responses === 0) {
|
|
336
|
+
lines.push('- All attacks were successfully defended');
|
|
337
|
+
lines.push('- Continue monitoring and testing regularly');
|
|
338
|
+
} else {
|
|
339
|
+
if (by_severity.critical > 0 || by_severity.high > 0) {
|
|
340
|
+
lines.push(
|
|
341
|
+
'- **High Priority:** Review and address critical/high severity vulnerabilities immediately'
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
if (manifest.metrics.unsafe_responses > testable * 0.1) {
|
|
345
|
+
lines.push('- Consider implementing additional input validation and output filtering');
|
|
346
|
+
}
|
|
347
|
+
lines.push('- Review system prompts for potential information leakage');
|
|
348
|
+
lines.push('- Implement role-play and persona detection');
|
|
349
|
+
lines.push('- Add output filtering for dangerous content patterns');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
lines.push('');
|
|
353
|
+
|
|
354
|
+
// Footer
|
|
355
|
+
lines.push('---');
|
|
356
|
+
lines.push('');
|
|
357
|
+
lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
|
|
358
|
+
|
|
359
|
+
return lines.join('\n');
|
|
360
|
+
}
|