@artemiskit/reports 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,82 @@
1
1
  # @artemiskit/reports
2
2
 
3
+ ## 0.2.4
4
+
5
+ ### Patch Changes
6
+
7
+ - 16604a6: ## New Features
8
+
9
+ ### Validate Command
10
+
11
+ New `artemiskit validate` command for validating scenario files without running them:
12
+
13
+ - **YAML syntax validation** - Catches formatting errors
14
+ - **Schema validation** - Validates against ArtemisKit schema using Zod
15
+ - **Semantic validation** - Detects duplicate case IDs, undefined variables
16
+ - **Warnings** - Identifies deprecated fields, missing descriptions, performance hints
17
+
18
+ Options:
19
+
20
+ - `--json` - Output results as JSON
21
+ - `--strict` - Treat warnings as errors
22
+ - `--quiet` - Only show errors
23
+ - `--export junit` - Export to JUnit XML for CI integration
24
+
25
+ ### JUnit XML Export
26
+
27
+ Added JUnit XML export support for CI/CD integration with Jenkins, GitHub Actions, GitLab CI, and other systems:
28
+
29
+ - `akit run scenarios/ --export junit` - Export run results
30
+ - `akit redteam scenarios/chatbot.yaml --export junit` - Export security test results
31
+ - `akit validate scenarios/ --export junit` - Export validation results
32
+
33
+ JUnit reports include:
34
+
35
+ - Test suite metadata (run ID, provider, model, success rate)
36
+ - Individual test cases with pass/fail status
37
+ - Failure details with matcher type and expected values
38
+ - Timing information for each test
39
+
40
+ - Updated dependencies [16604a6]
41
+ - @artemiskit/core@0.2.4
42
+
43
+ ## 0.2.3
44
+
45
+ ### Patch Changes
46
+
47
+ - 37403aa: ## v0.2.3 - Cost Tracking & Compliance Features
48
+
49
+ ### Cost Tracking
50
+
51
+ - **Automatic cost estimation**: Run results now include estimated API costs based on token usage and model pricing data
52
+ - **Cost display in output**: Summary output shows total tokens and estimated cost for each run
53
+ - **`--budget` flag**: Set a maximum budget in USD for `run`, `redteam`, and `stress` commands - the command fails (exit code 1) if the estimated cost exceeds the budget
54
+
55
+ ### History Enhancements
56
+
57
+ - **`--show-cost` flag**: Display cost column and total in `history` command output
58
+ - Cost data is stored with each run for historical tracking
59
+
60
+ ### Markdown Export
61
+
62
+ - **`--export markdown` flag**: Export run and redteam results to compliance-ready markdown format
63
+ - **`--export-output` flag**: Specify custom output directory for exports (default: `./artemis-exports`)
64
+ - Markdown reports include:
65
+ - Summary table with pass/fail rates, latency, token usage, and cost metrics
66
+ - Detailed results for failed test cases (run) or vulnerabilities found (redteam)
67
+ - Configuration used for the run
68
+ - Redaction summary (if enabled)
69
+ - Recommendations for remediation (redteam)
70
+
71
+ ### CI/CD Integration
72
+
73
+ - Budget enforcement in pipelines: `akit run scenarios/ --ci --budget 5.00`
74
+ - Cost tracking in CI summary output with `ARTEMISKIT_COST_USD` variable
75
+ - Automatic markdown report generation for compliance documentation
76
+
77
+ - Updated dependencies [37403aa]
78
+ - @artemiskit/core@0.2.3
79
+
3
80
  ## 0.2.2
4
81
 
5
82
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -7,4 +7,6 @@ export { generateJSONReport, type JSONReportOptions } from './json/generator';
7
7
  export { generateRedTeamHTMLReport } from './html/redteam-generator';
8
8
  export { generateStressHTMLReport } from './html/stress-generator';
9
9
  export { generateCompareHTMLReport, buildComparisonData, type ComparisonData, type CaseComparison, } from './html/compare-generator';
10
+ export { generateMarkdownReport, generateRedTeamMarkdownReport, type MarkdownReportOptions, } from './markdown/generator';
11
+ export { generateJUnitReport, generateRedTeamJUnitReport, generateValidationJUnitReport, type JUnitReportOptions, } from './junit/generator';
10
12
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAG9E,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AAGrE,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAGnE,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,cAAc,GACpB,MAAM,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAG9E,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AAGrE,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAGnE,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,cAAc,GACpB,MAAM,0BAA0B,CAAC;AAGlC,OAAO,EACL,sBAAsB,EACtB,6BAA6B,EAC7B,KAAK,qBAAqB,GAC3B,MAAM,sBAAsB,CAAC;AAG9B,OAAO,EACL,mBAAmB,EACnB,0BAA0B,EAC1B,6BAA6B,EAC7B,KAAK,kBAAkB,GACxB,MAAM,mBAAmB,CAAC"}
package/dist/index.js CHANGED
@@ -7901,9 +7901,466 @@ function generateCompareHTMLReport(baseline, current) {
7901
7901
  const template = import_handlebars4.default.compile(COMPARE_HTML_TEMPLATE);
7902
7902
  return template({ data });
7903
7903
  }
7904
+ // src/markdown/generator.ts
7905
+ function truncate(text, maxLength) {
7906
+ if (text.length <= maxLength)
7907
+ return text;
7908
+ return `${text.slice(0, maxLength)}...`;
7909
+ }
7910
+ function formatCostMd(costUsd) {
7911
+ if (costUsd < 0.01) {
7912
+ return `$${(costUsd * 100).toFixed(4)} cents`;
7913
+ }
7914
+ if (costUsd < 1) {
7915
+ return `$${costUsd.toFixed(4)}`;
7916
+ }
7917
+ return `$${costUsd.toFixed(2)}`;
7918
+ }
7919
+ function formatDuration(ms) {
7920
+ if (ms < 1000)
7921
+ return `${ms}ms`;
7922
+ if (ms < 60000)
7923
+ return `${(ms / 1000).toFixed(1)}s`;
7924
+ const minutes = Math.floor(ms / 60000);
7925
+ const seconds = (ms % 60000 / 1000).toFixed(0);
7926
+ return `${minutes}m ${seconds}s`;
7927
+ }
7928
+ function generateMarkdownReport(manifest, options = {}) {
7929
+ const { includeDetails = true, truncateAt = 500 } = options;
7930
+ const lines = [];
7931
+ lines.push("# ArtemisKit Test Results");
7932
+ lines.push("");
7933
+ lines.push(`**Scenario:** ${manifest.config.scenario}`);
7934
+ lines.push(`**Run ID:** ${manifest.run_id}`);
7935
+ lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
7936
+ lines.push(`**Provider:** ${manifest.config.provider}${manifest.config.model ? ` (${manifest.config.model})` : ""}`);
7937
+ lines.push("");
7938
+ lines.push("---");
7939
+ lines.push("");
7940
+ lines.push("## Summary");
7941
+ lines.push("");
7942
+ lines.push("| Metric | Value |");
7943
+ lines.push("|--------|-------|");
7944
+ lines.push(`| Total Cases | ${manifest.metrics.total_cases} |`);
7945
+ lines.push(`| Passed | ${manifest.metrics.passed_cases} (${(manifest.metrics.success_rate * 100).toFixed(1)}%) |`);
7946
+ lines.push(`| Failed | ${manifest.metrics.failed_cases} |`);
7947
+ lines.push(`| Duration | ${formatDuration(manifest.duration_ms)} |`);
7948
+ lines.push(`| Median Latency | ${manifest.metrics.median_latency_ms}ms |`);
7949
+ lines.push(`| P95 Latency | ${manifest.metrics.p95_latency_ms}ms |`);
7950
+ lines.push(`| Total Tokens | ${manifest.metrics.total_tokens.toLocaleString()} |`);
7951
+ if (manifest.metrics.cost) {
7952
+ lines.push(`| Estimated Cost | ${formatCostMd(manifest.metrics.cost.total_usd)} |`);
7953
+ }
7954
+ lines.push("");
7955
+ lines.push("---");
7956
+ lines.push("");
7957
+ lines.push("## Results by Case");
7958
+ lines.push("");
7959
+ const passed = manifest.cases.filter((c) => c.ok);
7960
+ lines.push(`### Passed (${passed.length})`);
7961
+ lines.push("");
7962
+ if (passed.length > 0) {
7963
+ lines.push("<details>");
7964
+ lines.push("<summary>Click to expand passed cases</summary>");
7965
+ lines.push("");
7966
+ lines.push("| Case ID | Latency | Tokens | Score |");
7967
+ lines.push("|---------|---------|--------|-------|");
7968
+ for (const c of passed) {
7969
+ lines.push(`| ${c.id} | ${formatDuration(c.latencyMs)} | ${c.tokens?.total || "-"} | ${(c.score * 100).toFixed(0)}% |`);
7970
+ }
7971
+ lines.push("");
7972
+ lines.push("</details>");
7973
+ } else {
7974
+ lines.push("_No passed cases_");
7975
+ }
7976
+ lines.push("");
7977
+ const failed = manifest.cases.filter((c) => !c.ok);
7978
+ lines.push(`### Failed (${failed.length})`);
7979
+ lines.push("");
7980
+ if (failed.length > 0) {
7981
+ for (const c of failed) {
7982
+ lines.push(`#### \`${c.id}\``);
7983
+ lines.push("");
7984
+ if (includeDetails) {
7985
+ const promptStr = typeof c.prompt === "string" ? c.prompt : JSON.stringify(c.prompt, null, 2);
7986
+ lines.push("**Prompt:**");
7987
+ lines.push("```");
7988
+ lines.push(truncate(promptStr, truncateAt));
7989
+ lines.push("```");
7990
+ lines.push("");
7991
+ lines.push("**Expected:**");
7992
+ lines.push(`- Type: \`${c.matcherType}\``);
7993
+ lines.push("```json");
7994
+ lines.push(truncate(JSON.stringify(c.expected, null, 2), truncateAt));
7995
+ lines.push("```");
7996
+ lines.push("");
7997
+ lines.push("**Actual Response:**");
7998
+ lines.push("```");
7999
+ lines.push(truncate(c.response || "(empty)", truncateAt));
8000
+ lines.push("```");
8001
+ lines.push("");
8002
+ }
8003
+ lines.push(`**Reason:** ${c.reason || "Unknown"}`);
8004
+ lines.push("");
8005
+ lines.push("---");
8006
+ lines.push("");
8007
+ }
8008
+ } else {
8009
+ lines.push("_No failed cases_");
8010
+ lines.push("");
8011
+ }
8012
+ if (manifest.resolved_config) {
8013
+ lines.push("## Configuration");
8014
+ lines.push("");
8015
+ lines.push("```yaml");
8016
+ lines.push(`provider: ${manifest.resolved_config.provider}`);
8017
+ if (manifest.resolved_config.model) {
8018
+ lines.push(`model: ${manifest.resolved_config.model}`);
8019
+ }
8020
+ if (manifest.resolved_config.temperature !== undefined) {
8021
+ lines.push(`temperature: ${manifest.resolved_config.temperature}`);
8022
+ }
8023
+ if (manifest.resolved_config.max_tokens !== undefined) {
8024
+ lines.push(`max_tokens: ${manifest.resolved_config.max_tokens}`);
8025
+ }
8026
+ lines.push("```");
8027
+ lines.push("");
8028
+ }
8029
+ if (manifest.redaction?.enabled) {
8030
+ lines.push("## Redaction");
8031
+ lines.push("");
8032
+ lines.push(`- **Patterns Used:** ${manifest.redaction.patternsUsed.join(", ")}`);
8033
+ lines.push(`- **Prompts Redacted:** ${manifest.redaction.summary.promptsRedacted}`);
8034
+ lines.push(`- **Responses Redacted:** ${manifest.redaction.summary.responsesRedacted}`);
8035
+ lines.push(`- **Total Redactions:** ${manifest.redaction.summary.totalRedactions}`);
8036
+ lines.push("");
8037
+ }
8038
+ lines.push("---");
8039
+ lines.push("");
8040
+ lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
8041
+ return lines.join(`
8042
+ `);
8043
+ }
8044
+ function generateRedTeamMarkdownReport(manifest, options = {}) {
8045
+ const { includeDetails = true, truncateAt = 500 } = options;
8046
+ const lines = [];
8047
+ lines.push("# ArtemisKit Security Report");
8048
+ lines.push("");
8049
+ lines.push(`**Scenario:** ${manifest.config.scenario}`);
8050
+ lines.push(`**Run ID:** ${manifest.run_id}`);
8051
+ lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
8052
+ lines.push("**Test Type:** Red Team Security Scan");
8053
+ lines.push("");
8054
+ lines.push("---");
8055
+ lines.push("");
8056
+ const testable = manifest.metrics.total_tests - manifest.metrics.error_responses;
8057
+ const defenseRate = manifest.metrics.defense_rate * 100;
8058
+ let riskLevel;
8059
+ let riskEmoji;
8060
+ if (defenseRate >= 95) {
8061
+ riskLevel = "LOW";
8062
+ riskEmoji = "";
8063
+ } else if (defenseRate >= 80) {
8064
+ riskLevel = "MEDIUM";
8065
+ riskEmoji = "";
8066
+ } else if (defenseRate >= 50) {
8067
+ riskLevel = "HIGH";
8068
+ riskEmoji = "";
8069
+ } else {
8070
+ riskLevel = "CRITICAL";
8071
+ riskEmoji = "";
8072
+ }
8073
+ lines.push("## Security Summary");
8074
+ lines.push("");
8075
+ lines.push("| Metric | Value |");
8076
+ lines.push("|--------|-------|");
8077
+ lines.push(`| Defense Rate | ${defenseRate.toFixed(1)}% |`);
8078
+ lines.push(`| Total Attacks | ${manifest.metrics.total_tests} |`);
8079
+ lines.push(`| Defended | ${manifest.metrics.defended} |`);
8080
+ lines.push(`| Safe Responses | ${manifest.metrics.safe_responses} |`);
8081
+ lines.push(`| Blocked | ${manifest.metrics.blocked_responses} |`);
8082
+ lines.push(`| Vulnerabilities | ${manifest.metrics.unsafe_responses} |`);
8083
+ lines.push(`| Errors | ${manifest.metrics.error_responses} |`);
8084
+ lines.push(`| Risk Level | **${riskEmoji} ${riskLevel}** |`);
8085
+ lines.push("");
8086
+ lines.push("---");
8087
+ lines.push("");
8088
+ const { by_severity } = manifest.metrics;
8089
+ if (manifest.metrics.unsafe_responses > 0) {
8090
+ lines.push("## Severity Breakdown");
8091
+ lines.push("");
8092
+ lines.push("| Severity | Count | Description |");
8093
+ lines.push("|----------|-------|-------------|");
8094
+ if (by_severity.critical > 0) {
8095
+ lines.push(`| Critical | ${by_severity.critical} | Severe security vulnerability |`);
8096
+ }
8097
+ if (by_severity.high > 0) {
8098
+ lines.push(`| High | ${by_severity.high} | Significant security concern |`);
8099
+ }
8100
+ if (by_severity.medium > 0) {
8101
+ lines.push(`| Medium | ${by_severity.medium} | Moderate security issue |`);
8102
+ }
8103
+ if (by_severity.low > 0) {
8104
+ lines.push(`| Low | ${by_severity.low} | Minor security concern |`);
8105
+ }
8106
+ lines.push("");
8107
+ lines.push("---");
8108
+ lines.push("");
8109
+ }
8110
+ const unsafeResults = manifest.results.filter((r) => r.status === "unsafe");
8111
+ if (unsafeResults.length > 0) {
8112
+ lines.push("## Vulnerabilities Found");
8113
+ lines.push("");
8114
+ for (let i = 0;i < unsafeResults.length; i++) {
8115
+ const result = unsafeResults[i];
8116
+ lines.push(`### ${i + 1}. ${result.mutation} (${result.severity.toUpperCase()})`);
8117
+ lines.push("");
8118
+ lines.push(`**Case ID:** ${result.caseId}`);
8119
+ lines.push(`**Severity:** ${result.severity.toUpperCase()}`);
8120
+ lines.push(`**Mutation:** ${result.mutation}`);
8121
+ lines.push("");
8122
+ if (includeDetails) {
8123
+ lines.push("**Attack Prompt:**");
8124
+ lines.push("```");
8125
+ lines.push(truncate(result.prompt, truncateAt));
8126
+ lines.push("```");
8127
+ lines.push("");
8128
+ lines.push("**Model Response:**");
8129
+ lines.push("```");
8130
+ lines.push(truncate(result.response || "(empty)", truncateAt));
8131
+ lines.push("```");
8132
+ lines.push("");
8133
+ }
8134
+ if (result.reasons.length > 0) {
8135
+ lines.push("**Reasons:**");
8136
+ for (const reason of result.reasons) {
8137
+ lines.push(`- ${reason}`);
8138
+ }
8139
+ lines.push("");
8140
+ }
8141
+ lines.push("---");
8142
+ lines.push("");
8143
+ }
8144
+ }
8145
+ lines.push("## Recommendations");
8146
+ lines.push("");
8147
+ if (manifest.metrics.unsafe_responses === 0) {
8148
+ lines.push("- All attacks were successfully defended");
8149
+ lines.push("- Continue monitoring and testing regularly");
8150
+ } else {
8151
+ if (by_severity.critical > 0 || by_severity.high > 0) {
8152
+ lines.push("- **High Priority:** Review and address critical/high severity vulnerabilities immediately");
8153
+ }
8154
+ if (manifest.metrics.unsafe_responses > testable * 0.1) {
8155
+ lines.push("- Consider implementing additional input validation and output filtering");
8156
+ }
8157
+ lines.push("- Review system prompts for potential information leakage");
8158
+ lines.push("- Implement role-play and persona detection");
8159
+ lines.push("- Add output filtering for dangerous content patterns");
8160
+ }
8161
+ lines.push("");
8162
+ lines.push("---");
8163
+ lines.push("");
8164
+ lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
8165
+ return lines.join(`
8166
+ `);
8167
+ }
8168
+ // src/junit/generator.ts
8169
+ function escapeXml(str) {
8170
+ const invalidXmlChars = /[\x00-\x08\x0B\x0C\x0E-\x1F]/g;
8171
+ return str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;").replace(invalidXmlChars, "");
8172
+ }
8173
+ function truncate2(text, maxLength) {
8174
+ if (text.length <= maxLength)
8175
+ return text;
8176
+ return `${text.slice(0, maxLength)}...(truncated)`;
8177
+ }
8178
+ function formatTimestamp(dateStr) {
8179
+ return new Date(dateStr).toISOString();
8180
+ }
8181
+ function generateJUnitReport(manifest, options = {}) {
8182
+ const {
8183
+ suiteName = manifest.config.scenario,
8184
+ includeSystemOut = true,
8185
+ includeSystemErr = true,
8186
+ maxOutputLength = 2000
8187
+ } = options;
8188
+ const lines = [];
8189
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
8190
+ const tests = manifest.metrics.total_cases;
8191
+ const failures = manifest.metrics.failed_cases;
8192
+ const errors = 0;
8193
+ const skipped = 0;
8194
+ const time = manifest.duration_ms / 1000;
8195
+ lines.push(`<testsuite name="${escapeXml(suiteName)}" ` + `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` + `time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`);
8196
+ lines.push(" <properties>");
8197
+ lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
8198
+ lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
8199
+ lines.push(` <property name="artemis.provider" value="${escapeXml(manifest.config.provider)}" />`);
8200
+ if (manifest.config.model) {
8201
+ lines.push(` <property name="artemis.model" value="${escapeXml(manifest.config.model)}" />`);
8202
+ }
8203
+ lines.push(` <property name="artemis.success_rate" value="${(manifest.metrics.success_rate * 100).toFixed(1)}%" />`);
8204
+ lines.push(` <property name="artemis.total_tokens" value="${manifest.metrics.total_tokens}" />`);
8205
+ if (manifest.metrics.cost) {
8206
+ lines.push(` <property name="artemis.cost_usd" value="${manifest.metrics.cost.total_usd.toFixed(6)}" />`);
8207
+ }
8208
+ lines.push(" </properties>");
8209
+ for (const testCase of manifest.cases) {
8210
+ const className = escapeXml(suiteName);
8211
+ const testName = escapeXml(testCase.id);
8212
+ const testTime = testCase.latencyMs / 1000;
8213
+ lines.push(` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`);
8214
+ if (!testCase.ok) {
8215
+ const failureMessage = escapeXml(testCase.reason || "Test failed");
8216
+ const failureType = escapeXml(testCase.matcherType);
8217
+ lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
8218
+ const details = [];
8219
+ details.push(`Matcher Type: ${testCase.matcherType}`);
8220
+ details.push(`Expected: ${JSON.stringify(testCase.expected, null, 2)}`);
8221
+ details.push(`Score: ${(testCase.score * 100).toFixed(1)}%`);
8222
+ if (testCase.reason) {
8223
+ details.push(`Reason: ${testCase.reason}`);
8224
+ }
8225
+ lines.push(escapeXml(details.join(`
8226
+ `)));
8227
+ lines.push(" </failure>");
8228
+ }
8229
+ if (includeSystemOut && testCase.response) {
8230
+ lines.push(" <system-out>");
8231
+ lines.push(`<![CDATA[${truncate2(testCase.response, maxOutputLength)}]]>`);
8232
+ lines.push(" </system-out>");
8233
+ }
8234
+ if (includeSystemErr && !testCase.ok && testCase.reason) {
8235
+ lines.push(" <system-err>");
8236
+ const errorDetails = [];
8237
+ errorDetails.push(`Error: ${testCase.reason}`);
8238
+ const promptStr = typeof testCase.prompt === "string" ? testCase.prompt : JSON.stringify(testCase.prompt);
8239
+ errorDetails.push(`Prompt: ${truncate2(promptStr, maxOutputLength / 2)}`);
8240
+ lines.push(`<![CDATA[${errorDetails.join(`
8241
+ `)}]]>`);
8242
+ lines.push(" </system-err>");
8243
+ }
8244
+ lines.push(" </testcase>");
8245
+ }
8246
+ lines.push("</testsuite>");
8247
+ return lines.join(`
8248
+ `);
8249
+ }
8250
+ function generateRedTeamJUnitReport(manifest, options = {}) {
8251
+ const {
8252
+ suiteName = `RedTeam: ${manifest.config.scenario}`,
8253
+ includeSystemOut = true,
8254
+ includeSystemErr = true,
8255
+ maxOutputLength = 2000
8256
+ } = options;
8257
+ const lines = [];
8258
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
8259
+ const tests = manifest.metrics.total_tests;
8260
+ const failures = manifest.metrics.unsafe_responses;
8261
+ const errors = manifest.metrics.error_responses;
8262
+ const skipped = 0;
8263
+ const time = manifest.duration_ms / 1000;
8264
+ lines.push(`<testsuite name="${escapeXml(suiteName)}" ` + `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` + `time="${time.toFixed(3)}" timestamp="${formatTimestamp(manifest.start_time)}">`);
8265
+ lines.push(" <properties>");
8266
+ lines.push(` <property name="artemis.run_id" value="${escapeXml(manifest.run_id)}" />`);
8267
+ lines.push(` <property name="artemis.version" value="${escapeXml(manifest.version)}" />`);
8268
+ lines.push(` <property name="artemis.test_type" value="redteam" />`);
8269
+ lines.push(` <property name="artemis.defense_rate" value="${(manifest.metrics.defense_rate * 100).toFixed(1)}%" />`);
8270
+ lines.push(` <property name="artemis.safe_responses" value="${manifest.metrics.safe_responses}" />`);
8271
+ lines.push(` <property name="artemis.blocked_responses" value="${manifest.metrics.blocked_responses}" />`);
8272
+ lines.push(` <property name="artemis.unsafe_responses" value="${manifest.metrics.unsafe_responses}" />`);
8273
+ lines.push(" </properties>");
8274
+ for (const result of manifest.results) {
8275
+ const className = escapeXml(suiteName);
8276
+ const testName = escapeXml(`${result.caseId} (${result.mutation})`);
8277
+ const testTime = (result.latencyMs || 0) / 1000;
8278
+ lines.push(` <testcase classname="${className}" name="${testName}" time="${testTime.toFixed(3)}">`);
8279
+ if (result.status === "unsafe") {
8280
+ const failureMessage = escapeXml(`Security vulnerability: ${result.mutation}`);
8281
+ const failureType = escapeXml(`severity:${result.severity}`);
8282
+ lines.push(` <failure message="${failureMessage}" type="${failureType}">`);
8283
+ const details = [];
8284
+ details.push(`Severity: ${result.severity.toUpperCase()}`);
8285
+ details.push(`Mutation: ${result.mutation}`);
8286
+ if (result.reasons.length > 0) {
8287
+ details.push(`Reasons: ${result.reasons.join(", ")}`);
8288
+ }
8289
+ lines.push(escapeXml(details.join(`
8290
+ `)));
8291
+ lines.push(" </failure>");
8292
+ } else if (result.status === "error") {
8293
+ lines.push(` <error message="${escapeXml(result.response || "Error during test")}" type="error">`);
8294
+ lines.push(escapeXml(`Attack: ${result.mutation}
8295
+ Case: ${result.caseId}`));
8296
+ lines.push(" </error>");
8297
+ }
8298
+ if (includeSystemOut && result.response) {
8299
+ lines.push(" <system-out>");
8300
+ lines.push(`<![CDATA[${truncate2(result.response, maxOutputLength)}]]>`);
8301
+ lines.push(" </system-out>");
8302
+ }
8303
+ if (includeSystemErr && result.status === "unsafe") {
8304
+ lines.push(" <system-err>");
8305
+ const errDetails = [];
8306
+ errDetails.push(`Attack Prompt: ${truncate2(result.prompt, maxOutputLength / 2)}`);
8307
+ errDetails.push(`Severity: ${result.severity.toUpperCase()}`);
8308
+ lines.push(`<![CDATA[${errDetails.join(`
8309
+ `)}]]>`);
8310
+ lines.push(" </system-err>");
8311
+ }
8312
+ lines.push(" </testcase>");
8313
+ }
8314
+ lines.push("</testsuite>");
8315
+ return lines.join(`
8316
+ `);
8317
+ }
8318
+ function generateValidationJUnitReport(results, options = {}) {
8319
+ const { suiteName = "ArtemisKit Validation" } = options;
8320
+ const lines = [];
8321
+ lines.push('<?xml version="1.0" encoding="UTF-8"?>');
8322
+ const tests = results.length;
8323
+ const failures = results.filter((r) => !r.valid).length;
8324
+ const errors = 0;
8325
+ const skipped = 0;
8326
+ lines.push(`<testsuite name="${escapeXml(suiteName)}" ` + `tests="${tests}" failures="${failures}" errors="${errors}" skipped="${skipped}" ` + `time="0" timestamp="${new Date().toISOString()}">`);
8327
+ for (const result of results) {
8328
+ const className = escapeXml(suiteName);
8329
+ const testName = escapeXml(result.file);
8330
+ lines.push(` <testcase classname="${className}" name="${testName}" time="0">`);
8331
+ if (!result.valid) {
8332
+ const errorMessages = result.errors.map((e) => `Line ${e.line}: ${e.message}`).join("; ");
8333
+ lines.push(` <failure message="${escapeXml(errorMessages)}" type="validation">`);
8334
+ const details = [];
8335
+ for (const error of result.errors) {
8336
+ details.push(`[${error.rule}] Line ${error.line}: ${error.message}`);
8337
+ }
8338
+ lines.push(escapeXml(details.join(`
8339
+ `)));
8340
+ lines.push(" </failure>");
8341
+ }
8342
+ if (result.warnings.length > 0) {
8343
+ lines.push(" <system-err>");
8344
+ const warningDetails = result.warnings.map((w) => `[${w.rule}] Line ${w.line}: ${w.message}`).join(`
8345
+ `);
8346
+ lines.push(`<![CDATA[Warnings:
8347
+ ${warningDetails}]]>`);
8348
+ lines.push(" </system-err>");
8349
+ }
8350
+ lines.push(" </testcase>");
8351
+ }
8352
+ lines.push("</testsuite>");
8353
+ return lines.join(`
8354
+ `);
8355
+ }
7904
8356
  export {
8357
+ generateValidationJUnitReport,
7905
8358
  generateStressHTMLReport,
8359
+ generateRedTeamMarkdownReport,
8360
+ generateRedTeamJUnitReport,
7906
8361
  generateRedTeamHTMLReport,
8362
+ generateMarkdownReport,
8363
+ generateJUnitReport,
7907
8364
  generateJSONReport,
7908
8365
  generateHTMLReport,
7909
8366
  generateCompareHTMLReport,
@@ -0,0 +1,44 @@
1
+ /**
2
+ * JUnit XML Report Generator
3
+ *
4
+ * Generates JUnit-compatible XML reports for CI/CD integration.
5
+ * Follows the JUnit XML format specification for compatibility with
6
+ * Jenkins, GitHub Actions, GitLab CI, and other CI systems.
7
+ */
8
+ import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
9
+ export interface JUnitReportOptions {
10
+ /** Test suite name (defaults to scenario name) */
11
+ suiteName?: string;
12
+ /** Include system-out with response content */
13
+ includeSystemOut?: boolean;
14
+ /** Include system-err with error details */
15
+ includeSystemErr?: boolean;
16
+ /** Maximum content length for outputs */
17
+ maxOutputLength?: number;
18
+ }
19
+ /**
20
+ * Generate JUnit XML report for a standard run
21
+ */
22
+ export declare function generateJUnitReport(manifest: RunManifest, options?: JUnitReportOptions): string;
23
+ /**
24
+ * Generate JUnit XML report for red team results
25
+ */
26
+ export declare function generateRedTeamJUnitReport(manifest: RedTeamManifest, options?: JUnitReportOptions): string;
27
+ /**
28
+ * Generate JUnit XML for validation results
29
+ */
30
+ export declare function generateValidationJUnitReport(results: Array<{
31
+ file: string;
32
+ valid: boolean;
33
+ errors: Array<{
34
+ line: number;
35
+ message: string;
36
+ rule: string;
37
+ }>;
38
+ warnings: Array<{
39
+ line: number;
40
+ message: string;
41
+ rule: string;
42
+ }>;
43
+ }>, options?: JUnitReportOptions): string;
44
+ //# sourceMappingURL=generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../src/junit/generator.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAErE,MAAM,WAAW,kBAAkB;IACjC,kDAAkD;IAClD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,4CAA4C;IAC5C,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,yCAAyC;IACzC,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAmCD;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,WAAW,EACrB,OAAO,GAAE,kBAAuB,GAC/B,MAAM,CA0GR;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,eAAe,EACzB,OAAO,GAAE,kBAAuB,GAC/B,MAAM,CAyGR;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAC3C,OAAO,EAAE,KAAK,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/D,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAClE,CAAC,EACF,OAAO,GAAE,kBAAuB,GAC/B,MAAM,CAyDR"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Markdown Report Generator
3
+ *
4
+ * Generates documentation-friendly markdown reports for compliance and audit trails.
5
+ */
6
+ import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
7
+ export interface MarkdownReportOptions {
8
+ /** Include full prompt/response details for failed cases */
9
+ includeDetails?: boolean;
10
+ /** Maximum characters to show for prompts/responses */
11
+ truncateAt?: number;
12
+ }
13
+ /**
14
+ * Generate markdown report for a standard run
15
+ */
16
+ export declare function generateMarkdownReport(manifest: RunManifest, options?: MarkdownReportOptions): string;
17
+ /**
18
+ * Generate markdown report for red team results
19
+ */
20
+ export declare function generateRedTeamMarkdownReport(manifest: RedTeamManifest, options?: MarkdownReportOptions): string;
21
+ //# sourceMappingURL=generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../src/markdown/generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAErE,MAAM,WAAW,qBAAqB;IACpC,4DAA4D;IAC5D,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAkCD;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,WAAW,EACrB,OAAO,GAAE,qBAA0B,GAClC,MAAM,CAuJR;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAC3C,QAAQ,EAAE,eAAe,EACzB,OAAO,GAAE,qBAA0B,GAClC,MAAM,CAmJR"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/reports",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "HTML report generation for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -30,7 +30,7 @@
30
30
  "test": "bun test"
31
31
  },
32
32
  "dependencies": {
33
- "@artemiskit/core": "workspace:*",
33
+ "@artemiskit/core": "0.2.4",
34
34
  "handlebars": "^4.7.8"
35
35
  },
36
36
  "devDependencies": {
package/src/index.ts CHANGED
@@ -20,3 +20,18 @@ export {
20
20
  type ComparisonData,
21
21
  type CaseComparison,
22
22
  } from './html/compare-generator';
23
+
24
+ // Markdown reports
25
+ export {
26
+ generateMarkdownReport,
27
+ generateRedTeamMarkdownReport,
28
+ type MarkdownReportOptions,
29
+ } from './markdown/generator';
30
+
31
+ // JUnit XML reports (CI integration)
32
+ export {
33
+ generateJUnitReport,
34
+ generateRedTeamJUnitReport,
35
+ generateValidationJUnitReport,
36
+ type JUnitReportOptions,
37
+ } from './junit/generator';