@artemiskit/reports 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,42 @@
1
1
  # @artemiskit/reports
2
2
 
3
+ ## 0.2.3
4
+
5
+ ### Patch Changes
6
+
7
+ - 37403aa: ## v0.2.3 - Cost Tracking & Compliance Features
8
+
9
+ ### Cost Tracking
10
+
11
+ - **Automatic cost estimation**: Run results now include estimated API costs based on token usage and model pricing data
12
+ - **Cost display in output**: Summary output shows total tokens and estimated cost for each run
13
+ - **`--budget` flag**: Set a maximum budget in USD for `run`, `redteam`, and `stress` commands - the command fails (exit code 1) if the estimated cost exceeds the budget
14
+
15
+ ### History Enhancements
16
+
17
+ - **`--show-cost` flag**: Display cost column and total in `history` command output
18
+ - Cost data is stored with each run for historical tracking
19
+
20
+ ### Markdown Export
21
+
22
+ - **`--export markdown` flag**: Export run and redteam results to compliance-ready markdown format
23
+ - **`--export-output` flag**: Specify custom output directory for exports (default: `./artemis-exports`)
24
+ - Markdown reports include:
25
+ - Summary table with pass/fail rates, latency, token usage, and cost metrics
26
+ - Detailed results for failed test cases (run) or vulnerabilities found (redteam)
27
+ - Configuration used for the run
28
+ - Redaction summary (if enabled)
29
+ - Recommendations for remediation (redteam)
30
+
31
+ ### CI/CD Integration
32
+
33
+ - Budget enforcement in pipelines: `akit run scenarios/ --ci --budget 5.00`
34
+ - Cost tracking in CI summary output with `ARTEMISKIT_COST_USD` variable
35
+ - Automatic markdown report generation for compliance documentation
36
+
37
+ - Updated dependencies [37403aa]
38
+ - @artemiskit/core@0.2.3
39
+
3
40
  ## 0.2.2
4
41
 
5
42
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -7,4 +7,5 @@ export { generateJSONReport, type JSONReportOptions } from './json/generator';
7
7
  export { generateRedTeamHTMLReport } from './html/redteam-generator';
8
8
  export { generateStressHTMLReport } from './html/stress-generator';
9
9
  export { generateCompareHTMLReport, buildComparisonData, type ComparisonData, type CaseComparison, } from './html/compare-generator';
10
+ export { generateMarkdownReport, generateRedTeamMarkdownReport, type MarkdownReportOptions, } from './markdown/generator';
10
11
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAG9E,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AAGrE,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAGnE,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,cAAc,GACpB,MAAM,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AAG9E,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AAGrE,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAGnE,OAAO,EACL,yBAAyB,EACzB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,cAAc,GACpB,MAAM,0BAA0B,CAAC;AAGlC,OAAO,EACL,sBAAsB,EACtB,6BAA6B,EAC7B,KAAK,qBAAqB,GAC3B,MAAM,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -7901,9 +7901,275 @@ function generateCompareHTMLReport(baseline, current) {
7901
7901
  const template = import_handlebars4.default.compile(COMPARE_HTML_TEMPLATE);
7902
7902
  return template({ data });
7903
7903
  }
7904
+ // src/markdown/generator.ts
7905
+ function truncate(text, maxLength) {
7906
+ if (text.length <= maxLength)
7907
+ return text;
7908
+ return `${text.slice(0, maxLength)}...`;
7909
+ }
7910
+ function formatCostMd(costUsd) {
7911
+ if (costUsd < 0.01) {
7912
+ return `$${(costUsd * 100).toFixed(4)} cents`;
7913
+ }
7914
+ if (costUsd < 1) {
7915
+ return `$${costUsd.toFixed(4)}`;
7916
+ }
7917
+ return `$${costUsd.toFixed(2)}`;
7918
+ }
7919
+ function formatDuration(ms) {
7920
+ if (ms < 1000)
7921
+ return `${ms}ms`;
7922
+ if (ms < 60000)
7923
+ return `${(ms / 1000).toFixed(1)}s`;
7924
+ const minutes = Math.floor(ms / 60000);
7925
+ const seconds = (ms % 60000 / 1000).toFixed(0);
7926
+ return `${minutes}m ${seconds}s`;
7927
+ }
7928
+ function generateMarkdownReport(manifest, options = {}) {
7929
+ const { includeDetails = true, truncateAt = 500 } = options;
7930
+ const lines = [];
7931
+ lines.push("# ArtemisKit Test Results");
7932
+ lines.push("");
7933
+ lines.push(`**Scenario:** ${manifest.config.scenario}`);
7934
+ lines.push(`**Run ID:** ${manifest.run_id}`);
7935
+ lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
7936
+ lines.push(`**Provider:** ${manifest.config.provider}${manifest.config.model ? ` (${manifest.config.model})` : ""}`);
7937
+ lines.push("");
7938
+ lines.push("---");
7939
+ lines.push("");
7940
+ lines.push("## Summary");
7941
+ lines.push("");
7942
+ lines.push("| Metric | Value |");
7943
+ lines.push("|--------|-------|");
7944
+ lines.push(`| Total Cases | ${manifest.metrics.total_cases} |`);
7945
+ lines.push(`| Passed | ${manifest.metrics.passed_cases} (${(manifest.metrics.success_rate * 100).toFixed(1)}%) |`);
7946
+ lines.push(`| Failed | ${manifest.metrics.failed_cases} |`);
7947
+ lines.push(`| Duration | ${formatDuration(manifest.duration_ms)} |`);
7948
+ lines.push(`| Median Latency | ${manifest.metrics.median_latency_ms}ms |`);
7949
+ lines.push(`| P95 Latency | ${manifest.metrics.p95_latency_ms}ms |`);
7950
+ lines.push(`| Total Tokens | ${manifest.metrics.total_tokens.toLocaleString()} |`);
7951
+ if (manifest.metrics.cost) {
7952
+ lines.push(`| Estimated Cost | ${formatCostMd(manifest.metrics.cost.total_usd)} |`);
7953
+ }
7954
+ lines.push("");
7955
+ lines.push("---");
7956
+ lines.push("");
7957
+ lines.push("## Results by Case");
7958
+ lines.push("");
7959
+ const passed = manifest.cases.filter((c) => c.ok);
7960
+ lines.push(`### Passed (${passed.length})`);
7961
+ lines.push("");
7962
+ if (passed.length > 0) {
7963
+ lines.push("<details>");
7964
+ lines.push("<summary>Click to expand passed cases</summary>");
7965
+ lines.push("");
7966
+ lines.push("| Case ID | Latency | Tokens | Score |");
7967
+ lines.push("|---------|---------|--------|-------|");
7968
+ for (const c of passed) {
7969
+ lines.push(`| ${c.id} | ${formatDuration(c.latencyMs)} | ${c.tokens?.total || "-"} | ${(c.score * 100).toFixed(0)}% |`);
7970
+ }
7971
+ lines.push("");
7972
+ lines.push("</details>");
7973
+ } else {
7974
+ lines.push("_No passed cases_");
7975
+ }
7976
+ lines.push("");
7977
+ const failed = manifest.cases.filter((c) => !c.ok);
7978
+ lines.push(`### Failed (${failed.length})`);
7979
+ lines.push("");
7980
+ if (failed.length > 0) {
7981
+ for (const c of failed) {
7982
+ lines.push(`#### \`${c.id}\``);
7983
+ lines.push("");
7984
+ if (includeDetails) {
7985
+ const promptStr = typeof c.prompt === "string" ? c.prompt : JSON.stringify(c.prompt, null, 2);
7986
+ lines.push("**Prompt:**");
7987
+ lines.push("```");
7988
+ lines.push(truncate(promptStr, truncateAt));
7989
+ lines.push("```");
7990
+ lines.push("");
7991
+ lines.push("**Expected:**");
7992
+ lines.push(`- Type: \`${c.matcherType}\``);
7993
+ lines.push("```json");
7994
+ lines.push(truncate(JSON.stringify(c.expected, null, 2), truncateAt));
7995
+ lines.push("```");
7996
+ lines.push("");
7997
+ lines.push("**Actual Response:**");
7998
+ lines.push("```");
7999
+ lines.push(truncate(c.response || "(empty)", truncateAt));
8000
+ lines.push("```");
8001
+ lines.push("");
8002
+ }
8003
+ lines.push(`**Reason:** ${c.reason || "Unknown"}`);
8004
+ lines.push("");
8005
+ lines.push("---");
8006
+ lines.push("");
8007
+ }
8008
+ } else {
8009
+ lines.push("_No failed cases_");
8010
+ lines.push("");
8011
+ }
8012
+ if (manifest.resolved_config) {
8013
+ lines.push("## Configuration");
8014
+ lines.push("");
8015
+ lines.push("```yaml");
8016
+ lines.push(`provider: ${manifest.resolved_config.provider}`);
8017
+ if (manifest.resolved_config.model) {
8018
+ lines.push(`model: ${manifest.resolved_config.model}`);
8019
+ }
8020
+ if (manifest.resolved_config.temperature !== undefined) {
8021
+ lines.push(`temperature: ${manifest.resolved_config.temperature}`);
8022
+ }
8023
+ if (manifest.resolved_config.max_tokens !== undefined) {
8024
+ lines.push(`max_tokens: ${manifest.resolved_config.max_tokens}`);
8025
+ }
8026
+ lines.push("```");
8027
+ lines.push("");
8028
+ }
8029
+ if (manifest.redaction?.enabled) {
8030
+ lines.push("## Redaction");
8031
+ lines.push("");
8032
+ lines.push(`- **Patterns Used:** ${manifest.redaction.patternsUsed.join(", ")}`);
8033
+ lines.push(`- **Prompts Redacted:** ${manifest.redaction.summary.promptsRedacted}`);
8034
+ lines.push(`- **Responses Redacted:** ${manifest.redaction.summary.responsesRedacted}`);
8035
+ lines.push(`- **Total Redactions:** ${manifest.redaction.summary.totalRedactions}`);
8036
+ lines.push("");
8037
+ }
8038
+ lines.push("---");
8039
+ lines.push("");
8040
+ lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
8041
+ return lines.join(`
8042
+ `);
8043
+ }
8044
+ function generateRedTeamMarkdownReport(manifest, options = {}) {
8045
+ const { includeDetails = true, truncateAt = 500 } = options;
8046
+ const lines = [];
8047
+ lines.push("# ArtemisKit Security Report");
8048
+ lines.push("");
8049
+ lines.push(`**Scenario:** ${manifest.config.scenario}`);
8050
+ lines.push(`**Run ID:** ${manifest.run_id}`);
8051
+ lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
8052
+ lines.push("**Test Type:** Red Team Security Scan");
8053
+ lines.push("");
8054
+ lines.push("---");
8055
+ lines.push("");
8056
+ const testable = manifest.metrics.total_tests - manifest.metrics.error_responses;
8057
+ const defenseRate = manifest.metrics.defense_rate * 100;
8058
+ let riskLevel;
8059
+ let riskEmoji;
8060
+ if (defenseRate >= 95) {
8061
+ riskLevel = "LOW";
8062
+ riskEmoji = "";
8063
+ } else if (defenseRate >= 80) {
8064
+ riskLevel = "MEDIUM";
8065
+ riskEmoji = "";
8066
+ } else if (defenseRate >= 50) {
8067
+ riskLevel = "HIGH";
8068
+ riskEmoji = "";
8069
+ } else {
8070
+ riskLevel = "CRITICAL";
8071
+ riskEmoji = "";
8072
+ }
8073
+ lines.push("## Security Summary");
8074
+ lines.push("");
8075
+ lines.push("| Metric | Value |");
8076
+ lines.push("|--------|-------|");
8077
+ lines.push(`| Defense Rate | ${defenseRate.toFixed(1)}% |`);
8078
+ lines.push(`| Total Attacks | ${manifest.metrics.total_tests} |`);
8079
+ lines.push(`| Defended | ${manifest.metrics.defended} |`);
8080
+ lines.push(`| Safe Responses | ${manifest.metrics.safe_responses} |`);
8081
+ lines.push(`| Blocked | ${manifest.metrics.blocked_responses} |`);
8082
+ lines.push(`| Vulnerabilities | ${manifest.metrics.unsafe_responses} |`);
8083
+ lines.push(`| Errors | ${manifest.metrics.error_responses} |`);
8084
+ lines.push(`| Risk Level | **${riskEmoji} ${riskLevel}** |`);
8085
+ lines.push("");
8086
+ lines.push("---");
8087
+ lines.push("");
8088
+ const { by_severity } = manifest.metrics;
8089
+ if (manifest.metrics.unsafe_responses > 0) {
8090
+ lines.push("## Severity Breakdown");
8091
+ lines.push("");
8092
+ lines.push("| Severity | Count | Description |");
8093
+ lines.push("|----------|-------|-------------|");
8094
+ if (by_severity.critical > 0) {
8095
+ lines.push(`| Critical | ${by_severity.critical} | Severe security vulnerability |`);
8096
+ }
8097
+ if (by_severity.high > 0) {
8098
+ lines.push(`| High | ${by_severity.high} | Significant security concern |`);
8099
+ }
8100
+ if (by_severity.medium > 0) {
8101
+ lines.push(`| Medium | ${by_severity.medium} | Moderate security issue |`);
8102
+ }
8103
+ if (by_severity.low > 0) {
8104
+ lines.push(`| Low | ${by_severity.low} | Minor security concern |`);
8105
+ }
8106
+ lines.push("");
8107
+ lines.push("---");
8108
+ lines.push("");
8109
+ }
8110
+ const unsafeResults = manifest.results.filter((r) => r.status === "unsafe");
8111
+ if (unsafeResults.length > 0) {
8112
+ lines.push("## Vulnerabilities Found");
8113
+ lines.push("");
8114
+ for (let i = 0;i < unsafeResults.length; i++) {
8115
+ const result = unsafeResults[i];
8116
+ lines.push(`### ${i + 1}. ${result.mutation} (${result.severity.toUpperCase()})`);
8117
+ lines.push("");
8118
+ lines.push(`**Case ID:** ${result.caseId}`);
8119
+ lines.push(`**Severity:** ${result.severity.toUpperCase()}`);
8120
+ lines.push(`**Mutation:** ${result.mutation}`);
8121
+ lines.push("");
8122
+ if (includeDetails) {
8123
+ lines.push("**Attack Prompt:**");
8124
+ lines.push("```");
8125
+ lines.push(truncate(result.prompt, truncateAt));
8126
+ lines.push("```");
8127
+ lines.push("");
8128
+ lines.push("**Model Response:**");
8129
+ lines.push("```");
8130
+ lines.push(truncate(result.response || "(empty)", truncateAt));
8131
+ lines.push("```");
8132
+ lines.push("");
8133
+ }
8134
+ if (result.reasons.length > 0) {
8135
+ lines.push("**Reasons:**");
8136
+ for (const reason of result.reasons) {
8137
+ lines.push(`- ${reason}`);
8138
+ }
8139
+ lines.push("");
8140
+ }
8141
+ lines.push("---");
8142
+ lines.push("");
8143
+ }
8144
+ }
8145
+ lines.push("## Recommendations");
8146
+ lines.push("");
8147
+ if (manifest.metrics.unsafe_responses === 0) {
8148
+ lines.push("- All attacks were successfully defended");
8149
+ lines.push("- Continue monitoring and testing regularly");
8150
+ } else {
8151
+ if (by_severity.critical > 0 || by_severity.high > 0) {
8152
+ lines.push("- **High Priority:** Review and address critical/high severity vulnerabilities immediately");
8153
+ }
8154
+ if (manifest.metrics.unsafe_responses > testable * 0.1) {
8155
+ lines.push("- Consider implementing additional input validation and output filtering");
8156
+ }
8157
+ lines.push("- Review system prompts for potential information leakage");
8158
+ lines.push("- Implement role-play and persona detection");
8159
+ lines.push("- Add output filtering for dangerous content patterns");
8160
+ }
8161
+ lines.push("");
8162
+ lines.push("---");
8163
+ lines.push("");
8164
+ lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
8165
+ return lines.join(`
8166
+ `);
8167
+ }
7904
8168
  export {
7905
8169
  generateStressHTMLReport,
8170
+ generateRedTeamMarkdownReport,
7906
8171
  generateRedTeamHTMLReport,
8172
+ generateMarkdownReport,
7907
8173
  generateJSONReport,
7908
8174
  generateHTMLReport,
7909
8175
  generateCompareHTMLReport,
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Markdown Report Generator
3
+ *
4
+ * Generates documentation-friendly markdown reports for compliance and audit trails.
5
+ */
6
+ import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
7
+ export interface MarkdownReportOptions {
8
+ /** Include full prompt/response details for failed cases */
9
+ includeDetails?: boolean;
10
+ /** Maximum characters to show for prompts/responses */
11
+ truncateAt?: number;
12
+ }
13
+ /**
14
+ * Generate markdown report for a standard run
15
+ */
16
+ export declare function generateMarkdownReport(manifest: RunManifest, options?: MarkdownReportOptions): string;
17
+ /**
18
+ * Generate markdown report for red team results
19
+ */
20
+ export declare function generateRedTeamMarkdownReport(manifest: RedTeamManifest, options?: MarkdownReportOptions): string;
21
+ //# sourceMappingURL=generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../src/markdown/generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAErE,MAAM,WAAW,qBAAqB;IACpC,4DAA4D;IAC5D,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAkCD;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,WAAW,EACrB,OAAO,GAAE,qBAA0B,GAClC,MAAM,CAuJR;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAC3C,QAAQ,EAAE,eAAe,EACzB,OAAO,GAAE,qBAA0B,GAClC,MAAM,CAmJR"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@artemiskit/reports",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "HTML report generation for ArtemisKit LLM evaluation toolkit",
5
5
  "type": "module",
6
6
  "license": "Apache-2.0",
@@ -30,7 +30,7 @@
30
30
  "test": "bun test"
31
31
  },
32
32
  "dependencies": {
33
- "@artemiskit/core": "workspace:*",
33
+ "@artemiskit/core": "0.2.3",
34
34
  "handlebars": "^4.7.8"
35
35
  },
36
36
  "devDependencies": {
package/src/index.ts CHANGED
@@ -20,3 +20,10 @@ export {
20
20
  type ComparisonData,
21
21
  type CaseComparison,
22
22
  } from './html/compare-generator';
23
+
24
+ // Markdown reports
25
+ export {
26
+ generateMarkdownReport,
27
+ generateRedTeamMarkdownReport,
28
+ type MarkdownReportOptions,
29
+ } from './markdown/generator';
@@ -0,0 +1,360 @@
1
+ /**
2
+ * Markdown Report Generator
3
+ *
4
+ * Generates documentation-friendly markdown reports for compliance and audit trails.
5
+ */
6
+
7
+ import type { RedTeamManifest, RunManifest } from '@artemiskit/core';
8
+
9
+ export interface MarkdownReportOptions {
10
+ /** Include full prompt/response details for failed cases */
11
+ includeDetails?: boolean;
12
+ /** Maximum characters to show for prompts/responses */
13
+ truncateAt?: number;
14
+ }
15
+
16
+ /**
17
+ * Truncate text to a maximum length
18
+ */
19
+ function truncate(text: string, maxLength: number): string {
20
+ if (text.length <= maxLength) return text;
21
+ return `${text.slice(0, maxLength)}...`;
22
+ }
23
+
24
+ /**
25
+ * Format cost for display
26
+ */
27
+ function formatCostMd(costUsd: number): string {
28
+ if (costUsd < 0.01) {
29
+ return `$${(costUsd * 100).toFixed(4)} cents`;
30
+ }
31
+ if (costUsd < 1) {
32
+ return `$${costUsd.toFixed(4)}`;
33
+ }
34
+ return `$${costUsd.toFixed(2)}`;
35
+ }
36
+
37
+ /**
38
+ * Format duration in milliseconds to human-readable string
39
+ */
40
+ function formatDuration(ms: number): string {
41
+ if (ms < 1000) return `${ms}ms`;
42
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
43
+ const minutes = Math.floor(ms / 60000);
44
+ const seconds = ((ms % 60000) / 1000).toFixed(0);
45
+ return `${minutes}m ${seconds}s`;
46
+ }
47
+
48
+ /**
49
+ * Generate markdown report for a standard run
50
+ */
51
+ export function generateMarkdownReport(
52
+ manifest: RunManifest,
53
+ options: MarkdownReportOptions = {}
54
+ ): string {
55
+ const { includeDetails = true, truncateAt = 500 } = options;
56
+ const lines: string[] = [];
57
+
58
+ // Header
59
+ lines.push('# ArtemisKit Test Results');
60
+ lines.push('');
61
+ lines.push(`**Scenario:** ${manifest.config.scenario}`);
62
+ lines.push(`**Run ID:** ${manifest.run_id}`);
63
+ lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
64
+ lines.push(
65
+ `**Provider:** ${manifest.config.provider}${manifest.config.model ? ` (${manifest.config.model})` : ''}`
66
+ );
67
+ lines.push('');
68
+ lines.push('---');
69
+ lines.push('');
70
+
71
+ // Summary table
72
+ lines.push('## Summary');
73
+ lines.push('');
74
+ lines.push('| Metric | Value |');
75
+ lines.push('|--------|-------|');
76
+ lines.push(`| Total Cases | ${manifest.metrics.total_cases} |`);
77
+ lines.push(
78
+ `| Passed | ${manifest.metrics.passed_cases} (${(manifest.metrics.success_rate * 100).toFixed(1)}%) |`
79
+ );
80
+ lines.push(`| Failed | ${manifest.metrics.failed_cases} |`);
81
+ lines.push(`| Duration | ${formatDuration(manifest.duration_ms)} |`);
82
+ lines.push(`| Median Latency | ${manifest.metrics.median_latency_ms}ms |`);
83
+ lines.push(`| P95 Latency | ${manifest.metrics.p95_latency_ms}ms |`);
84
+ lines.push(`| Total Tokens | ${manifest.metrics.total_tokens.toLocaleString()} |`);
85
+
86
+ if (manifest.metrics.cost) {
87
+ lines.push(`| Estimated Cost | ${formatCostMd(manifest.metrics.cost.total_usd)} |`);
88
+ }
89
+
90
+ lines.push('');
91
+ lines.push('---');
92
+ lines.push('');
93
+
94
+ // Results by case
95
+ lines.push('## Results by Case');
96
+ lines.push('');
97
+
98
+ // Passed cases (collapsed)
99
+ const passed = manifest.cases.filter((c) => c.ok);
100
+ lines.push(`### Passed (${passed.length})`);
101
+ lines.push('');
102
+
103
+ if (passed.length > 0) {
104
+ lines.push('<details>');
105
+ lines.push('<summary>Click to expand passed cases</summary>');
106
+ lines.push('');
107
+ lines.push('| Case ID | Latency | Tokens | Score |');
108
+ lines.push('|---------|---------|--------|-------|');
109
+ for (const c of passed) {
110
+ lines.push(
111
+ `| ${c.id} | ${formatDuration(c.latencyMs)} | ${c.tokens?.total || '-'} | ${(c.score * 100).toFixed(0)}% |`
112
+ );
113
+ }
114
+ lines.push('');
115
+ lines.push('</details>');
116
+ } else {
117
+ lines.push('_No passed cases_');
118
+ }
119
+
120
+ lines.push('');
121
+
122
+ // Failed cases (expanded with details)
123
+ const failed = manifest.cases.filter((c) => !c.ok);
124
+ lines.push(`### Failed (${failed.length})`);
125
+ lines.push('');
126
+
127
+ if (failed.length > 0) {
128
+ for (const c of failed) {
129
+ lines.push(`#### \`${c.id}\``);
130
+ lines.push('');
131
+
132
+ if (includeDetails) {
133
+ // Prompt
134
+ const promptStr =
135
+ typeof c.prompt === 'string' ? c.prompt : JSON.stringify(c.prompt, null, 2);
136
+ lines.push('**Prompt:**');
137
+ lines.push('```');
138
+ lines.push(truncate(promptStr, truncateAt));
139
+ lines.push('```');
140
+ lines.push('');
141
+
142
+ // Expected
143
+ lines.push('**Expected:**');
144
+ lines.push(`- Type: \`${c.matcherType}\``);
145
+ lines.push('```json');
146
+ lines.push(truncate(JSON.stringify(c.expected, null, 2), truncateAt));
147
+ lines.push('```');
148
+ lines.push('');
149
+
150
+ // Actual response
151
+ lines.push('**Actual Response:**');
152
+ lines.push('```');
153
+ lines.push(truncate(c.response || '(empty)', truncateAt));
154
+ lines.push('```');
155
+ lines.push('');
156
+ }
157
+
158
+ // Reason
159
+ lines.push(`**Reason:** ${c.reason || 'Unknown'}`);
160
+ lines.push('');
161
+ lines.push('---');
162
+ lines.push('');
163
+ }
164
+ } else {
165
+ lines.push('_No failed cases_');
166
+ lines.push('');
167
+ }
168
+
169
+ // Configuration section
170
+ if (manifest.resolved_config) {
171
+ lines.push('## Configuration');
172
+ lines.push('');
173
+ lines.push('```yaml');
174
+ lines.push(`provider: ${manifest.resolved_config.provider}`);
175
+ if (manifest.resolved_config.model) {
176
+ lines.push(`model: ${manifest.resolved_config.model}`);
177
+ }
178
+ if (manifest.resolved_config.temperature !== undefined) {
179
+ lines.push(`temperature: ${manifest.resolved_config.temperature}`);
180
+ }
181
+ if (manifest.resolved_config.max_tokens !== undefined) {
182
+ lines.push(`max_tokens: ${manifest.resolved_config.max_tokens}`);
183
+ }
184
+ lines.push('```');
185
+ lines.push('');
186
+ }
187
+
188
+ // Redaction info
189
+ if (manifest.redaction?.enabled) {
190
+ lines.push('## Redaction');
191
+ lines.push('');
192
+ lines.push(`- **Patterns Used:** ${manifest.redaction.patternsUsed.join(', ')}`);
193
+ lines.push(`- **Prompts Redacted:** ${manifest.redaction.summary.promptsRedacted}`);
194
+ lines.push(`- **Responses Redacted:** ${manifest.redaction.summary.responsesRedacted}`);
195
+ lines.push(`- **Total Redactions:** ${manifest.redaction.summary.totalRedactions}`);
196
+ lines.push('');
197
+ }
198
+
199
+ // Footer
200
+ lines.push('---');
201
+ lines.push('');
202
+ lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
203
+
204
+ return lines.join('\n');
205
+ }
206
+
207
+ /**
208
+ * Generate markdown report for red team results
209
+ */
210
+ export function generateRedTeamMarkdownReport(
211
+ manifest: RedTeamManifest,
212
+ options: MarkdownReportOptions = {}
213
+ ): string {
214
+ const { includeDetails = true, truncateAt = 500 } = options;
215
+ const lines: string[] = [];
216
+
217
+ // Header
218
+ lines.push('# ArtemisKit Security Report');
219
+ lines.push('');
220
+ lines.push(`**Scenario:** ${manifest.config.scenario}`);
221
+ lines.push(`**Run ID:** ${manifest.run_id}`);
222
+ lines.push(`**Date:** ${new Date(manifest.start_time).toISOString()}`);
223
+ lines.push('**Test Type:** Red Team Security Scan');
224
+ lines.push('');
225
+ lines.push('---');
226
+ lines.push('');
227
+
228
+ // Security Summary
229
+ const testable = manifest.metrics.total_tests - manifest.metrics.error_responses;
230
+ const defenseRate = manifest.metrics.defense_rate * 100;
231
+
232
+ // Determine risk level
233
+ let riskLevel: string;
234
+ let riskEmoji: string;
235
+ if (defenseRate >= 95) {
236
+ riskLevel = 'LOW';
237
+ riskEmoji = '';
238
+ } else if (defenseRate >= 80) {
239
+ riskLevel = 'MEDIUM';
240
+ riskEmoji = '';
241
+ } else if (defenseRate >= 50) {
242
+ riskLevel = 'HIGH';
243
+ riskEmoji = '';
244
+ } else {
245
+ riskLevel = 'CRITICAL';
246
+ riskEmoji = '';
247
+ }
248
+
249
+ lines.push('## Security Summary');
250
+ lines.push('');
251
+ lines.push('| Metric | Value |');
252
+ lines.push('|--------|-------|');
253
+ lines.push(`| Defense Rate | ${defenseRate.toFixed(1)}% |`);
254
+ lines.push(`| Total Attacks | ${manifest.metrics.total_tests} |`);
255
+ lines.push(`| Defended | ${manifest.metrics.defended} |`);
256
+ lines.push(`| Safe Responses | ${manifest.metrics.safe_responses} |`);
257
+ lines.push(`| Blocked | ${manifest.metrics.blocked_responses} |`);
258
+ lines.push(`| Vulnerabilities | ${manifest.metrics.unsafe_responses} |`);
259
+ lines.push(`| Errors | ${manifest.metrics.error_responses} |`);
260
+ lines.push(`| Risk Level | **${riskEmoji} ${riskLevel}** |`);
261
+ lines.push('');
262
+ lines.push('---');
263
+ lines.push('');
264
+
265
+ // Severity Breakdown
266
+ const { by_severity } = manifest.metrics;
267
+ if (manifest.metrics.unsafe_responses > 0) {
268
+ lines.push('## Severity Breakdown');
269
+ lines.push('');
270
+ lines.push('| Severity | Count | Description |');
271
+ lines.push('|----------|-------|-------------|');
272
+ if (by_severity.critical > 0) {
273
+ lines.push(`| Critical | ${by_severity.critical} | Severe security vulnerability |`);
274
+ }
275
+ if (by_severity.high > 0) {
276
+ lines.push(`| High | ${by_severity.high} | Significant security concern |`);
277
+ }
278
+ if (by_severity.medium > 0) {
279
+ lines.push(`| Medium | ${by_severity.medium} | Moderate security issue |`);
280
+ }
281
+ if (by_severity.low > 0) {
282
+ lines.push(`| Low | ${by_severity.low} | Minor security concern |`);
283
+ }
284
+ lines.push('');
285
+ lines.push('---');
286
+ lines.push('');
287
+ }
288
+
289
+ // Vulnerabilities Found
290
+ const unsafeResults = manifest.results.filter((r) => r.status === 'unsafe');
291
+ if (unsafeResults.length > 0) {
292
+ lines.push('## Vulnerabilities Found');
293
+ lines.push('');
294
+
295
+ for (let i = 0; i < unsafeResults.length; i++) {
296
+ const result = unsafeResults[i];
297
+ lines.push(`### ${i + 1}. ${result.mutation} (${result.severity.toUpperCase()})`);
298
+ lines.push('');
299
+ lines.push(`**Case ID:** ${result.caseId}`);
300
+ lines.push(`**Severity:** ${result.severity.toUpperCase()}`);
301
+ lines.push(`**Mutation:** ${result.mutation}`);
302
+ lines.push('');
303
+
304
+ if (includeDetails) {
305
+ lines.push('**Attack Prompt:**');
306
+ lines.push('```');
307
+ lines.push(truncate(result.prompt, truncateAt));
308
+ lines.push('```');
309
+ lines.push('');
310
+
311
+ lines.push('**Model Response:**');
312
+ lines.push('```');
313
+ lines.push(truncate(result.response || '(empty)', truncateAt));
314
+ lines.push('```');
315
+ lines.push('');
316
+ }
317
+
318
+ if (result.reasons.length > 0) {
319
+ lines.push('**Reasons:**');
320
+ for (const reason of result.reasons) {
321
+ lines.push(`- ${reason}`);
322
+ }
323
+ lines.push('');
324
+ }
325
+
326
+ lines.push('---');
327
+ lines.push('');
328
+ }
329
+ }
330
+
331
+ // Recommendations
332
+ lines.push('## Recommendations');
333
+ lines.push('');
334
+
335
+ if (manifest.metrics.unsafe_responses === 0) {
336
+ lines.push('- All attacks were successfully defended');
337
+ lines.push('- Continue monitoring and testing regularly');
338
+ } else {
339
+ if (by_severity.critical > 0 || by_severity.high > 0) {
340
+ lines.push(
341
+ '- **High Priority:** Review and address critical/high severity vulnerabilities immediately'
342
+ );
343
+ }
344
+ if (manifest.metrics.unsafe_responses > testable * 0.1) {
345
+ lines.push('- Consider implementing additional input validation and output filtering');
346
+ }
347
+ lines.push('- Review system prompts for potential information leakage');
348
+ lines.push('- Implement role-play and persona detection');
349
+ lines.push('- Add output filtering for dangerous content patterns');
350
+ }
351
+
352
+ lines.push('');
353
+
354
+ // Footer
355
+ lines.push('---');
356
+ lines.push('');
357
+ lines.push(`*Generated by [ArtemisKit](https://artemiskit.vercel.app) v${manifest.version}*`);
358
+
359
+ return lines.join('\n');
360
+ }