@artemiskit/reports 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,45 @@
1
1
  # @artemiskit/reports
2
2
 
3
+ ## 0.2.4
4
+
5
+ ### Patch Changes
6
+
7
+ - 16604a6: ## New Features
8
+
9
+ ### Validate Command
10
+
11
+ New `artemiskit validate` command for validating scenario files without running them:
12
+
13
+ - **YAML syntax validation** - Catches formatting errors
14
+ - **Schema validation** - Validates against ArtemisKit schema using Zod
15
+ - **Semantic validation** - Detects duplicate case IDs, undefined variables
16
+ - **Warnings** - Identifies deprecated fields, missing descriptions, performance hints
17
+
18
+ Options:
19
+
20
+ - `--json` - Output results as JSON
21
+ - `--strict` - Treat warnings as errors
22
+ - `--quiet` - Only show errors
23
+ - `--export junit` - Export to JUnit XML for CI integration
24
+
25
+ ### JUnit XML Export
26
+
27
+ Added JUnit XML export support for CI/CD integration with Jenkins, GitHub Actions, GitLab CI, and other systems:
28
+
29
+ - `akit run scenarios/ --export junit` - Export run results
30
+ - `akit redteam scenarios/chatbot.yaml --export junit` - Export security test results
31
+ - `akit validate scenarios/ --export junit` - Export validation results
32
+
33
+ JUnit reports include:
34
+
35
+ - Test suite metadata (run ID, provider, model, success rate)
36
+ - Individual test cases with pass/fail status
37
+ - Failure details with matcher type and expected values
38
+ - Timing information for each test
39
+
40
+ - Updated dependencies [16604a6]
41
+ - @artemiskit/core@0.2.4
42
+
3
43
  ## 0.2.3
4
44
 
5
45
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -8,4 +8,5 @@ export { generateRedTeamHTMLReport } from './html/redteam-generator';
8
8
  export { generateStressHTMLReport } from './html/stress-generator';
9
9
  export { generateCompareHTMLReport, buildComparisonData, type ComparisonData, type CaseComparison, } from './html/compare-generator';
10
10
  export { generateMarkdownReport, generateRedTeamMarkdownReport, type MarkdownReportOptions, } from './markdown/generator';
11
+ export { generateJUnitReport, generateRedTeamJUnitReport, generateValidationJUnitReport, type JUnitReportOptions, } from './junit/generator';
11
12
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAG9E,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AAGrE,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAGnE,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,cAAc,GACpB,MAAM,0BAA0B,CAAC;AAGlC,OAAO,EACL,sBAAsB,EACtB,6BAA6B,EAC7B,KAAK,qBAAqB,GAC3B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAG9E,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AAGrE,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAGnE,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,cAAc,GACpB,MAAM,0BAA0B,CAAC;AAGlC,OAAO,EACL,sBAAsB,EACtB,6BAA6B,EAC7B,KAAK,qBAAqB,GAC3B,MAAM,sBAAsB,CAAC;AAG9B,OAAO,EACL,mBAAmB,EACnB,0BAA0B,EAC1B,6BAA6B,EAC7B,KAAK,kBAAkB,GACxB,MAAM,mBAAmB,CAAC"}
package/dist/index.js CHANGED
@@ -8165,11 +8165,202 @@ function generateRedTeamMarkdownReport(manifest, options = {}) {
8165
8165
  return lines.join(`
8166
8166
  `);
8167
8167
  }
8168
+ // src/junit/generator.ts
8169
+ function escapeXml(str) {
8170
+ const invalidXmlChars = /[\x00-\x08\x0B\x0C\x0E-\x1F]/g;
8171
+ return str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;").replace(invalidXmlChars, "");
8172
+ }
8173
+ function truncate2(text, maxLength) {
8174
+ if (text.length <= maxLength)
8175
+ return text;
8176
+ return `${text.slice(0, maxLength)}...(truncated)`;
8177
+ }
8178
+ function formatTimestamp(dateStr) {
8179
+ return new Date(dateStr).toISOString();
8180
+ }
8181
+ function generateJUnitReport(manifest, options = {}) {
8182
+ const {
8183
+ suiteName = manifest.config.scenario,
8184
+ includeSystemOut = true,
8185
+ includeSystemErr = true,
8186
+ maxOutputLength = 2000
8187
+ } = options;
8188
+ const lines = [];
8189
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
8190
+ const tests = manifest.metrics.total_cases;
8191
+ const failures = manifest.metrics.failed_cases;
8192
+ const errors = 0;
8193
+ const skipped = 0;
8194
+ const time = manifest.duration_ms / 1000;
8195
+ lines.push(`<testsuite name="${escapeXml(suiteName)}" ` + `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` + `time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`);
8196
+ lines.push(" <properties>");
8197
+ lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
8198
+ lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
8199
+ lines.push(` <property name="artemis.provider" value="${escapeXml(manifest.config.provider)}" />`);
8200
+ if (manifest.config.model) {
8201
+ lines.push(` <property name="artemis.model" value="${escapeXml(manifest.config.model)}" />`);
8202
+ }
8203
+ lines.push(` <property name="artemis.success_rate" value="${(manifest.metrics.success_rate * 100).toFixed(1)}%" />`);
8204
+ lines.push(` <property name="artemis.total_tokens" value="${manifest.metrics.total_tokens}" />`);
8205
+ if (manifest.metrics.cost) {
8206
+ lines.push(` <property name="artemis.cost_usd" value="${manifest.metrics.cost.total_usd.toFixed(6)}" />`);
8207
+ }
8208
+ lines.push(" </properties>");
8209
+ for (const testCase of manifest.cases) {
8210
+ const className = escapeXml(suiteName);
8211
+ const testName = escapeXml(testCase.id);
8212
+ const testTime = testCase.latencyMs / 1000;
8213
+ lines.push(` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`);
8214
+ if (!testCase.ok) {
8215
+ const failureMessage = escapeXml(testCase.reason || "Test failed");
8216
+ const failureType = escapeXml(testCase.matcherType);
8217
+ lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
8218
+ const details = [];
8219
+ details.push(`Matcher Type: ${testCase.matcherType}`);
8220
+ details.push(`Expected: ${JSON.stringify(testCase.expected, null, 2)}`);
8221
+ details.push(`Score: ${(testCase.score * 100).toFixed(1)}%`);
8222
+ if (testCase.reason) {
8223
+ details.push(`Reason: ${testCase.reason}`);
8224
+ }
8225
+ lines.push(escapeXml(details.join(`
8226
+ `)));
8227
+ lines.push(" </failure>");
8228
+ }
8229
+ if (includeSystemOut && testCase.response) {
8230
+ lines.push(" <system-out>");
8231
+ lines.push(`<![CDATA[${truncate2(testCase.response, maxOutputLength)}]]>`);
8232
+ lines.push(" </system-out>");
8233
+ }
8234
+ if (includeSystemErr && !testCase.ok && testCase.reason) {
8235
+ lines.push(" <system-err>");
8236
+ const errorDetails = [];
8237
+ errorDetails.push(`Error: ${testCase.reason}`);
8238
+ const promptStr = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
8239
+ errorDetails.push(`Prompt: ${truncate2(promptStr, maxOutputLength / 2)}`);
8240
+ lines.push(`<![CDATA[${errorDetails.join(`
8241
+ `)}]]>`);
8242
+ lines.push(" </system-err>");
8243
+ }
8244
+ lines.push(" </testcase>");
8245
+ }
8246
+ lines.push("</testsuite>");
8247
+ return lines.join(`
8248
+ `);
8249
+ }
8250
+ function generateRedTeamJUnitReport(manifest, options = {}) {
8251
+ const {
8252
+ suiteName = `RedTeam: ${manifest.config.scenario}`,
8253
+ includeSystemOut = true,
8254
+ includeSystemErr = true,
8255
+ maxOutputLength = 2000
8256
+ } = options;
8257
+ const lines = [];
8258
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
8259
+ const tests = manifest.metrics.total_tests;
8260
+ const failures = manifest.metrics.unsafe_responses;
8261
+ const errors = manifest.metrics.error_responses;
8262
+ const skipped = 0;
8263
+ const time = manifest.duration_ms / 1000;
8264
+ lines.push(`<testsuite name="${escapeXml(suiteName)}" ` + `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` + `time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`);
8265
+ lines.push(" <properties>");
8266
+ lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
8267
+ lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
8268
+ lines.push(` <property name="artemis.test_type" value="redteam" />`);
8269
+ lines.push(` <property name="artemis.defense_rate" value="${(manifest.metrics.defense_rate * 100).toFixed(1)}%" />`);
8270
+ lines.push(` <property name="artemis.safe_responses" value="${manifest.metrics.safe_responses}" />`);
8271
+ lines.push(` <property name="artemis.blocked_responses" value="${manifest.metrics.blocked_responses}" />`);
8272
+ lines.push(` <property name="artemis.unsafe_responses" value="${manifest.metrics.unsafe_responses}" />`);
8273
+ lines.push(" </properties>");
8274
+ for (const result of manifest.results) {
8275
+ const className = escapeXml(suiteName);
8276
+ const testName = escapeXml(`${result.caseId} (${result.mutation})`);
8277
+ const testTime = (result.latencyMs || 0) / 1000;
8278
+ lines.push(` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`);
8279
+ if (result.status === "unsafe") {
8280
+ const failureMessage = escapeXml(`Security vulnerability: ${result.mutation}`);
8281
+ const failureType = escapeXml(`severity:${result.severity}`);
8282
+ lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
8283
+ const details = [];
8284
+ details.push(`Severity: ${result.severity.toUpperCase()}`);
8285
+ details.push(`Mutation: ${result.mutation}`);
8286
+ if (result.reasons.length > 0) {
8287
+ details.push(`Reasons: ${result.reasons.join(", ")}`);
8288
+ }
8289
+ lines.push(escapeXml(details.join(`
8290
+ `)));
8291
+ lines.push(" </failure>");
8292
+ } else if (result.status === "error") {
8293
+ lines.push(` <error message="${escapeXml(result.response || "Error during test")}" type="error">`);
8294
+ lines.push(escapeXml(`Attack: ${result.mutation}
8295
+ Case: ${result.caseId}`));
8296
+ lines.push(" </error>");
8297
+ }
8298
+ if (includeSystemOut && result.response) {
8299
+ lines.push(" <system-out>");
8300
+ lines.push(`<![CDATA[${truncate2(result.response, maxOutputLength)}]]>`);
8301
+ lines.push(" </system-out>");
8302
+ }
8303
+ if (includeSystemErr && result.status === "unsafe") {
8304
+ lines.push(" <system-err>");
8305
+ const errDetails = [];
8306
+ errDetails.push(`Attack Prompt: ${truncate2(result.prompt, maxOutputLength / 2)}`);
8307
+ errDetails.push(`Severity: ${result.severity.toUpperCase()}`);
8308
+ lines.push(`<![CDATA[${errDetails.join(`
8309
+ `)}]]>`);
8310
+ lines.push(" </system-err>");
8311
+ }
8312
+ lines.push(" </testcase>");
8313
+ }
8314
+ lines.push("</testsuite>");
8315
+ return lines.join(`
8316
+ `);
8317
+ }
8318
+ function generateValidationJUnitReport(results, options = {}) {
8319
+ const { suiteName = "ArtemisKit Validation" } = options;
8320
+ const lines = [];
8321
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
8322
+ const tests = results.length;
8323
+ const failures = results.filter((r) => !r.valid).length;
8324
+ const errors = 0;
8325
+ const skipped = 0;
8326
+ lines.push(`<testsuite name="${escapeXml(suiteName)}" ` + `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` + `time="0" timestamp="${new Date().toISOString()}">`);
8327
+ for (const result of results) {
8328
+ const className = escapeXml(suiteName);
8329
+ const testName = escapeXml(result.file);
8330
+ lines.push(` <testcase classname="${className}" name="${testName}" time="0">`);
8331
+ if (!result.valid) {
8332
+ const errorMessages = result.errors.map((e) => `Line ${e.line}: ${e.message}`).join("; ");
8333
+ lines.push(` <failure message="${escapeXml(errorMessages)}" type="validation">`);
8334
+ const details = [];
8335
+ for (const error of result.errors) {
8336
+ details.push(`[${error.rule}] Line ${error.line}: ${error.message}`);
8337
+ }
8338
+ lines.push(escapeXml(details.join(`
8339
+ `)));
8340
+ lines.push(" </failure>");
8341
+ }
8342
+ if (result.warnings.length > 0) {
8343
+ lines.push(" <system-err>");
8344
+ const warningDetails = result.warnings.map((w) => `[${w.rule}] Line ${w.line}: ${w.message}`).join(`
8345
+ `);
8346
+ lines.push(`<![CDATA[Warnings:
8347
+ ${warningDetails}]]>`);
8348
+ lines.push(" </system-err>");
8349
+ }
8350
+ lines.push(" </testcase>");
8351
+ }
8352
+ lines.push("</testsuite>");
8353
+ return lines.join(`
8354
+ `);
8355
+ }
8168
8356
  export {
8357
+ generateValidationJUnitReport,
8169
8358
  generateStressHTMLReport,
8170
8359
  generateRedTeamMarkdownReport,
8360
+ generateRedTeamJUnitReport,
8171
8361
  generateRedTeamHTMLReport,
8172
8362
  generateMarkdownReport,
8363
+ generateJUnitReport,
8173
8364
  generateJSONReport,
8174
8365
  generateHTMLReport,
8175
8366
  generateCompareHTMLReport,
@@ -0,0 +1,44 @@
1
+ /**
2
+ * JUnit XML Report Generator
3
+ *
4
+ * Generates JUnit-compatible XML reports for CI/CD integration.
5
+ * Follows the JUnit XML format specification for compatibility with
6
+ * Jenkins, GitHub Actions, GitLab CI, and other CI systems.
7
+ */
8
+ import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
9
+ export interface JUnitReportOptions {
10
+ /** Test suite name (defaults to scenario name) */
11
+ suiteName?: string;
12
+ /** Include system-out with response content */
13
+ includeSystemOut?: boolean;
14
+ /** Include system-err with error details */
15
+ includeSystemErr?: boolean;
16
+ /** Maximum content length for outputs */
17
+ maxOutputLength?: number;
18
+ }
19
+ /**
20
+ * Generate JUnit XML report for a standard run
21
+ */
22
+ export declare function generateJUnitReport(manifest: RunManifest, options?: JUnitReportOptions): string;
23
+ /**
24
+ * Generate JUnit XML report for red team results
25
+ */
26
+ export declare function generateRedTeamJUnitReport(manifest: RedTeamManifest, options?: JUnitReportOptions): string;
27
+ /**
28
+ * Generate JUnit XML for validation results
29
+ */
30
+ export declare function generateValidationJUnitReport(results: Array<{
31
+ file: string;
32
+ valid: boolean;
33
+ errors: Array<{
34
+ line: number;
35
+ message: string;
36
+ rule: string;
37
+ }>;
38
+ warnings: Array<{
39
+ line: number;
40
+ message: string;
41
+ rule: string;
42
+ }>;
43
+ }>, options?: JUnitReportOptions): string;
44
+ //# sourceMappingURL=generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../src/junit/generator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAErE,MAAM,WAAW,kBAAkB;IACjC,kDAAkD;IAClD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,4CAA4C;IAC5C,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,yCAAyC;IACzC,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAmCD;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,WAAW,EACrB,OAAO,GAAE,kBAAuB,GAC/B,MAAM,CA0GR;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,eAAe,EACzB,OAAO,GAAE,kBAAuB,GAC/B,MAAM,CAyGR;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAC3C,OAAO,EAAE,KAAK,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/D,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAClE,CAAC,EACF,OAAO,GAAE,kBAAuB,GAC/B,MAAM,CAyDR"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/reports",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "HTML report generation for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -30,7 +30,7 @@
30
30
  "test": "bun test"
31
31
  },
32
32
  "dependencies": {
33
- "@artemiskit/core": "0.2.3",
33
+ "@artemiskit/core": "0.2.4",
34
34
  "handlebars": "^4.7.8"
35
35
  },
36
36
  "devDependencies": {
package/src/index.ts CHANGED
@@ -27,3 +27,11 @@ export {
27
27
  generateRedTeamMarkdownReport,
28
28
  type MarkdownReportOptions,
29
29
  } from './markdown/generator';
30
+
31
+ // JUnit XML reports (CI integration)
32
+ export {
33
+ generateJUnitReport,
34
+ generateRedTeamJUnitReport,
35
+ generateValidationJUnitReport,
36
+ type JUnitReportOptions,
37
+ } from './junit/generator';
@@ -0,0 +1,350 @@
1
+ /**
2
+ * JUnit XML Report Generator
3
+ *
4
+ * Generates JUnit-compatible XML reports for CI/CD integration.
5
+ * Follows the JUnit XML format specification for compatibility with
6
+ * Jenkins, GitHub Actions, GitLab CI, and other CI systems.
7
+ */
8
+
9
+ import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
10
+
11
+ export interface JUnitReportOptions {
12
+ /** Test suite name (defaults to scenario name) */
13
+ suiteName?: string;
14
+ /** Include system-out with response content */
15
+ includeSystemOut?: boolean;
16
+ /** Include system-err with error details */
17
+ includeSystemErr?: boolean;
18
+ /** Maximum content length for outputs */
19
+ maxOutputLength?: number;
20
+ }
21
+
22
+ /**
23
+ * Escape special XML characters
24
+ */
25
+ function escapeXml(str: string): string {
26
+ // Remove invalid XML control characters (0x00-0x08, 0x0B, 0x0C, 0x0E-0x1F)
27
+ // These are not allowed in XML 1.0 and would cause parsing errors
28
+ // biome-ignore lint/suspicious/noControlCharactersInRegex: Required to strip invalid XML chars
29
+ const invalidXmlChars = /[\x00-\x08\x0B\x0C\x0E-\x1F]/g;
30
+
31
+ return str
32
+ .replace(/&/g, '&amp;')
33
+ .replace(/</g, '&lt;')
34
+ .replace(/>/g, '&gt;')
35
+ .replace(/"/g, '&quot;')
36
+ .replace(/'/g, '&apos;')
37
+ .replace(invalidXmlChars, '');
38
+ }
39
+
40
+ /**
41
+ * Truncate text to maximum length
42
+ */
43
+ function truncate(text: string, maxLength: number): string {
44
+ if (text.length <= maxLength) return text;
45
+ return `${text.slice(0, maxLength)}...(truncated)`;
46
+ }
47
+
48
+ /**
49
+ * Format timestamp as ISO 8601
50
+ */
51
+ function formatTimestamp(dateStr: string): string {
52
+ return new Date(dateStr).toISOString();
53
+ }
54
+
55
+ /**
56
+ * Generate JUnit XML report for a standard run
57
+ */
58
+ export function generateJUnitReport(
59
+ manifest: RunManifest,
60
+ options: JUnitReportOptions = {}
61
+ ): string {
62
+ const {
63
+ suiteName = manifest.config.scenario,
64
+ includeSystemOut = true,
65
+ includeSystemErr = true,
66
+ maxOutputLength = 2000,
67
+ } = options;
68
+
69
+ const lines: string[] = [];
70
+
71
+ // XML declaration
72
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
73
+
74
+ // Calculate totals
75
+ const tests = manifest.metrics.total_cases;
76
+ const failures = manifest.metrics.failed_cases;
77
+ const errors = 0; // We treat all failures as failures, not errors
78
+ const skipped = 0;
79
+ const time = manifest.duration_ms / 1000; // JUnit uses seconds
80
+
81
+ // Root testsuite element
82
+ lines.push(
83
+ `<testsuite name="${escapeXml(suiteName)}" ` +
84
+ `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` +
85
+ `time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`
86
+ );
87
+
88
+ // Properties
89
+ lines.push(' <properties>');
90
+ lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
91
+ lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
92
+ lines.push(
93
+ ` <property name="artemis.provider" value="${escapeXml(manifest.config.provider)}" />`
94
+ );
95
+ if (manifest.config.model) {
96
+ lines.push(` <property name="artemis.model" value="${escapeXml(manifest.config.model)}" />`);
97
+ }
98
+ lines.push(
99
+ ` <property name="artemis.success_rate" value="${(manifest.metrics.success_rate * 100).toFixed(1)}%" />`
100
+ );
101
+ lines.push(
102
+ ` <property name="artemis.total_tokens" value="${manifest.metrics.total_tokens}" />`
103
+ );
104
+ if (manifest.metrics.cost) {
105
+ lines.push(
106
+ ` <property name="artemis.cost_usd" value="${manifest.metrics.cost.total_usd.toFixed(6)}" />`
107
+ );
108
+ }
109
+ lines.push(' </properties>');
110
+
111
+ // Test cases
112
+ for (const testCase of manifest.cases) {
113
+ const className = escapeXml(suiteName);
114
+ const testName = escapeXml(testCase.id);
115
+ const testTime = testCase.latencyMs / 1000;
116
+
117
+ lines.push(
118
+ ` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`
119
+ );
120
+
121
+ if (!testCase.ok) {
122
+ // Failed test
123
+ const failureMessage = escapeXml(testCase.reason || 'Test failed');
124
+ const failureType = escapeXml(testCase.matcherType);
125
+
126
+ lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
127
+
128
+ // Include details in failure content
129
+ const details: string[] = [];
130
+ details.push(`Matcher Type: ${testCase.matcherType}`);
131
+ details.push(`Expected: ${JSON.stringify(testCase.expected, null, 2)}`);
132
+ details.push(`Score: ${(testCase.score * 100).toFixed(1)}%`);
133
+ if (testCase.reason) {
134
+ details.push(`Reason: ${testCase.reason}`);
135
+ }
136
+
137
+ lines.push(escapeXml(details.join('\n')));
138
+ lines.push(' </failure>');
139
+ }
140
+
141
+ // System out (response)
142
+ if (includeSystemOut && testCase.response) {
143
+ lines.push(' <system-out>');
144
+ lines.push(`<![CDATA[${truncate(testCase.response, maxOutputLength)}]]>`);
145
+ lines.push(' </system-out>');
146
+ }
147
+
148
+ // System err (error details for failed tests)
149
+ if (includeSystemErr && !testCase.ok && testCase.reason) {
150
+ lines.push(' <system-err>');
151
+ const errorDetails: string[] = [];
152
+ errorDetails.push(`Error: ${testCase.reason}`);
153
+ const promptStr =
154
+ typeof testCase.prompt === 'string' ? testCase.prompt : JSON.stringify(testCase.prompt);
155
+ errorDetails.push(`Prompt: ${truncate(promptStr, maxOutputLength / 2)}`);
156
+ lines.push(`<![CDATA[${errorDetails.join('\n')}]]>`);
157
+ lines.push(' </system-err>');
158
+ }
159
+
160
+ lines.push(' </testcase>');
161
+ }
162
+
163
+ // Close testsuite
164
+ lines.push('</testsuite>');
165
+
166
+ return lines.join('\n');
167
+ }
168
+
169
+ /**
170
+ * Generate JUnit XML report for red team results
171
+ */
172
+ export function generateRedTeamJUnitReport(
173
+ manifest: RedTeamManifest,
174
+ options: JUnitReportOptions = {}
175
+ ): string {
176
+ const {
177
+ suiteName = `RedTeam: ${manifest.config.scenario}`,
178
+ includeSystemOut = true,
179
+ includeSystemErr = true,
180
+ maxOutputLength = 2000,
181
+ } = options;
182
+
183
+ const lines: string[] = [];
184
+
185
+ // XML declaration
186
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
187
+
188
+ // Calculate totals - unsafe responses are failures
189
+ const tests = manifest.metrics.total_tests;
190
+ const failures = manifest.metrics.unsafe_responses;
191
+ const errors = manifest.metrics.error_responses;
192
+ const skipped = 0;
193
+ const time = manifest.duration_ms / 1000;
194
+
195
+ // Root testsuite element
196
+ lines.push(
197
+ `<testsuite name="${escapeXml(suiteName)}" ` +
198
+ `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` +
199
+ `time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`
200
+ );
201
+
202
+ // Properties
203
+ lines.push(' <properties>');
204
+ lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
205
+ lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
206
+ lines.push(` <property name="artemis.test_type" value="redteam" />`);
207
+ lines.push(
208
+ ` <property name="artemis.defense_rate" value="${(manifest.metrics.defense_rate * 100).toFixed(1)}%" />`
209
+ );
210
+ lines.push(
211
+ ` <property name="artemis.safe_responses" value="${manifest.metrics.safe_responses}" />`
212
+ );
213
+ lines.push(
214
+ ` <property name="artemis.blocked_responses" value="${manifest.metrics.blocked_responses}" />`
215
+ );
216
+ lines.push(
217
+ ` <property name="artemis.unsafe_responses" value="${manifest.metrics.unsafe_responses}" />`
218
+ );
219
+ lines.push(' </properties>');
220
+
221
+ // Test cases
222
+ for (const result of manifest.results) {
223
+ const className = escapeXml(suiteName);
224
+ const testName = escapeXml(`${result.caseId} (${result.mutation})`);
225
+ const testTime = (result.latencyMs || 0) / 1000;
226
+
227
+ lines.push(
228
+ ` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`
229
+ );
230
+
231
+ if (result.status === 'unsafe') {
232
+ // Security vulnerability found - this is a failure
233
+ const failureMessage = escapeXml(`Security vulnerability: ${result.mutation}`);
234
+ const failureType = escapeXml(`severity:${result.severity}`);
235
+
236
+ lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
237
+
238
+ const details: string[] = [];
239
+ details.push(`Severity: ${result.severity.toUpperCase()}`);
240
+ details.push(`Mutation: ${result.mutation}`);
241
+ if (result.reasons.length > 0) {
242
+ details.push(`Reasons: ${result.reasons.join(', ')}`);
243
+ }
244
+
245
+ lines.push(escapeXml(details.join('\n')));
246
+ lines.push(' </failure>');
247
+ } else if (result.status === 'error') {
248
+ // Error during test
249
+ lines.push(
250
+ ` <error message="${escapeXml(result.response || 'Error during test')}" type="error">`
251
+ );
252
+ lines.push(escapeXml(`Attack: ${result.mutation}\nCase: ${result.caseId}`));
253
+ lines.push(' </error>');
254
+ }
255
+
256
+ // System out (response)
257
+ if (includeSystemOut && result.response) {
258
+ lines.push(' <system-out>');
259
+ lines.push(`<![CDATA[${truncate(result.response, maxOutputLength)}]]>`);
260
+ lines.push(' </system-out>');
261
+ }
262
+
263
+ // System err (attack prompt for reference)
264
+ if (includeSystemErr && result.status === 'unsafe') {
265
+ lines.push(' <system-err>');
266
+ const errDetails: string[] = [];
267
+ errDetails.push(`Attack Prompt: ${truncate(result.prompt, maxOutputLength / 2)}`);
268
+ errDetails.push(`Severity: ${result.severity.toUpperCase()}`);
269
+ lines.push(`<![CDATA[${errDetails.join('\n')}]]>`);
270
+ lines.push(' </system-err>');
271
+ }
272
+
273
+ lines.push(' </testcase>');
274
+ }
275
+
276
+ // Close testsuite
277
+ lines.push('</testsuite>');
278
+
279
+ return lines.join('\n');
280
+ }
281
+
282
+ /**
283
+ * Generate JUnit XML for validation results
284
+ */
285
+ export function generateValidationJUnitReport(
286
+ results: Array<{
287
+ file: string;
288
+ valid: boolean;
289
+ errors: Array<{ line: number; message: string; rule: string }>;
290
+ warnings: Array<{ line: number; message: string; rule: string }>;
291
+ }>,
292
+ options: JUnitReportOptions = {}
293
+ ): string {
294
+ const { suiteName = 'ArtemisKit Validation' } = options;
295
+
296
+ const lines: string[] = [];
297
+
298
+ // XML declaration
299
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
300
+
301
+ // Calculate totals
302
+ const tests = results.length;
303
+ const failures = results.filter((r) => !r.valid).length;
304
+ const errors = 0;
305
+ const skipped = 0;
306
+
307
+ // Root testsuite element
308
+ lines.push(
309
+ `<testsuite name="${escapeXml(suiteName)}" ` +
310
+ `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` +
311
+ `time="0" timestamp="${new Date().toISOString()}">`
312
+ );
313
+
314
+ // Test cases
315
+ for (const result of results) {
316
+ const className = escapeXml(suiteName);
317
+ const testName = escapeXml(result.file);
318
+
319
+ lines.push(` <testcase classname="${className}" name="${testName}" time="0">`);
320
+
321
+ if (!result.valid) {
322
+ const errorMessages = result.errors.map((e) => `Line ${e.line}: ${e.message}`).join('; ');
323
+ lines.push(` <failure message="${escapeXml(errorMessages)}" type="validation">`);
324
+
325
+ const details: string[] = [];
326
+ for (const error of result.errors) {
327
+ details.push(`[${error.rule}] Line ${error.line}: ${error.message}`);
328
+ }
329
+ lines.push(escapeXml(details.join('\n')));
330
+ lines.push(' </failure>');
331
+ }
332
+
333
+ // Warnings as system-err
334
+ if (result.warnings.length > 0) {
335
+ lines.push(' <system-err>');
336
+ const warningDetails = result.warnings
337
+ .map((w) => `[${w.rule}] Line ${w.line}: ${w.message}`)
338
+ .join('\n');
339
+ lines.push(`<![CDATA[Warnings:\n${warningDetails}]]>`);
340
+ lines.push(' </system-err>');
341
+ }
342
+
343
+ lines.push(' </testcase>');
344
+ }
345
+
346
+ // Close testsuite
347
+ lines.push('</testsuite>');
348
+
349
+ return lines.join('\n');
350
+ }