@artemiskit/reports 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ /**
2
+ * JSON Report Generator
3
+ */
4
+ import type { RedTeamManifest, RunManifest, StressManifest } from '@artemiskit/core';
5
+ export interface JSONReportOptions {
6
+ pretty?: boolean;
7
+ includeRaw?: boolean;
8
+ }
9
+ export declare function generateJSONReport(manifest: RunManifest, options?: JSONReportOptions): string;
10
+ export declare function generateJSONReport(manifest: RedTeamManifest, options?: JSONReportOptions): string;
11
+ export declare function generateJSONReport(manifest: StressManifest, options?: JSONReportOptions): string;
12
+ //# sourceMappingURL=generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../src/json/generator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAe,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAElG,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,WAAW,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,MAAM,CAAC;AAC/F,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,eAAe,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,MAAM,CAAC;AACnG,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,MAAM,CAAC"}
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@artemiskit/reports",
3
+ "version": "0.1.2",
4
+ "description": "HTML report generation for ArtemisKit LLM evaluation toolkit",
5
+ "type": "module",
6
+ "license": "Apache-2.0",
7
+ "author": "code-sensei",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "https://github.com/code-sensei/artemiskit.git",
11
+ "directory": "packages/reports"
12
+ },
13
+ "bugs": {
14
+ "url": "https://github.com/code-sensei/artemiskit/issues"
15
+ },
16
+ "keywords": [
17
+ "llm",
18
+ "testing",
19
+ "reports",
20
+ "html",
21
+ "evaluation",
22
+ "artemiskit"
23
+ ],
24
+ "main": "./dist/index.js",
25
+ "types": "./dist/index.d.ts",
26
+ "exports": {
27
+ ".": {
28
+ "import": "./dist/index.js",
29
+ "types": "./dist/index.d.ts"
30
+ }
31
+ },
32
+ "scripts": {
33
+ "build": "tsc && bun build ./src/index.ts --outdir ./dist --target bun",
34
+ "typecheck": "tsc --noEmit",
35
+ "clean": "rm -rf dist",
36
+ "test": "bun test"
37
+ },
38
+ "dependencies": {
39
+ "@artemiskit/core": "workspace:*",
40
+ "handlebars": "^4.7.8"
41
+ },
42
+ "devDependencies": {
43
+ "@types/bun": "^1.1.0",
44
+ "typescript": "^5.3.0"
45
+ }
46
+ }
@@ -0,0 +1,258 @@
1
+ /**
2
+ * HTML Report Generator
3
+ */
4
+
5
+ import type { RunManifest } from '@artemiskit/core';
6
+ import Handlebars from 'handlebars';
7
+
8
+ const HTML_TEMPLATE = `
9
+ <!DOCTYPE html>
10
+ <html lang="en">
11
+ <head>
12
+ <meta charset="UTF-8">
13
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
14
+ <title>Artemis Report - {{manifest.config.scenario}}</title>
15
+ <style>
16
+ * { margin: 0; padding: 0; box-sizing: border-box; }
17
+ body {
18
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
19
+ line-height: 1.6;
20
+ color: #333;
21
+ background: #f5f5f5;
22
+ padding: 2rem;
23
+ }
24
+ .container { max-width: 1200px; margin: 0 auto; }
25
+ h1 { margin-bottom: 0.5rem; color: #1a1a1a; }
26
+ h2 { margin: 2rem 0 1rem; color: #333; border-bottom: 2px solid #e0e0e0; padding-bottom: 0.5rem; }
27
+ .meta { color: #666; margin-bottom: 2rem; }
28
+ .summary {
29
+ display: grid;
30
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
31
+ gap: 1rem;
32
+ margin-bottom: 2rem;
33
+ }
34
+ .card {
35
+ background: white;
36
+ padding: 1.5rem;
37
+ border-radius: 8px;
38
+ box-shadow: 0 1px 3px rgba(0,0,0,0.1);
39
+ }
40
+ .card h3 { font-size: 0.875rem; color: #666; margin-bottom: 0.5rem; }
41
+ .card .value { font-size: 2rem; font-weight: bold; }
42
+ .card .value.success { color: #22c55e; }
43
+ .card .value.warning { color: #f59e0b; }
44
+ .card .value.error { color: #ef4444; }
45
+ table { width: 100%; border-collapse: collapse; background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
46
+ th, td { padding: 1rem; text-align: left; border-bottom: 1px solid #e0e0e0; }
47
+ th { background: #f9fafb; font-weight: 600; }
48
+ tr:last-child td { border-bottom: none; }
49
+ .status { display: inline-block; padding: 0.25rem 0.75rem; border-radius: 9999px; font-size: 0.875rem; font-weight: 500; }
50
+ .status.passed { background: #dcfce7; color: #166534; }
51
+ .status.failed { background: #fee2e2; color: #991b1b; }
52
+ .score { font-family: monospace; }
53
+ .details { margin-top: 0.5rem; padding: 1rem; background: #f9fafb; border-radius: 4px; font-size: 0.875rem; }
54
+ .details pre { white-space: pre-wrap; word-break: break-word; }
55
+ .expandable { cursor: pointer; }
56
+ .expandable:hover { background: #f0f0f0; }
57
+ .hidden { display: none; }
58
+ .source-badge {
59
+ display: inline-block;
60
+ padding: 0.125rem 0.5rem;
61
+ border-radius: 4px;
62
+ font-size: 0.75rem;
63
+ font-weight: 500;
64
+ background: #e0e7ff;
65
+ color: #3730a3;
66
+ margin-left: 0.5rem;
67
+ }
68
+ .redaction-banner {
69
+ background: #fef3c7;
70
+ border: 1px solid #f59e0b;
71
+ border-radius: 8px;
72
+ padding: 1rem;
73
+ margin-bottom: 1.5rem;
74
+ display: flex;
75
+ align-items: center;
76
+ gap: 0.75rem;
77
+ }
78
+ .redaction-banner .icon { font-size: 1.5rem; }
79
+ .redaction-banner .content { flex: 1; }
80
+ .redaction-banner .title { font-weight: 600; color: #92400e; }
81
+ .redaction-banner .details { font-size: 0.875rem; color: #a16207; margin-top: 0.25rem; }
82
+ .redacted-badge {
83
+ display: inline-block;
84
+ padding: 0.125rem 0.5rem;
85
+ border-radius: 4px;
86
+ font-size: 0.75rem;
87
+ font-weight: 500;
88
+ background: #fef3c7;
89
+ color: #92400e;
90
+ margin-left: 0.5rem;
91
+ }
92
+ footer { margin-top: 3rem; text-align: center; color: #666; font-size: 0.875rem; }
93
+ </style>
94
+ </head>
95
+ <body>
96
+ <div class="container">
97
+ <h1>{{manifest.config.scenario}}</h1>
98
+ <p class="meta">
99
+ Run ID: {{manifest.run_id}} |
100
+ Provider: {{manifest.config.provider}} |
101
+ Model: {{manifest.config.model}} |
102
+ {{formatDate manifest.start_time}}
103
+ </p>
104
+
105
+ {{#if manifest.redaction.enabled}}
106
+ <div class="redaction-banner">
107
+ <div class="icon">🔒</div>
108
+ <div class="content">
109
+ <div class="title">Data Redaction Applied</div>
110
+ <div class="details">
111
+ {{manifest.redaction.summary.totalRedactions}} redactions made
112
+ ({{manifest.redaction.summary.promptsRedacted}} prompts, {{manifest.redaction.summary.responsesRedacted}} responses).
113
+ Replacement: <code>{{manifest.redaction.replacement}}</code>
114
+ </div>
115
+ </div>
116
+ </div>
117
+ {{/if}}
118
+
119
+ <div class="summary">
120
+ <div class="card">
121
+ <h3>Success Rate</h3>
122
+ <div class="value {{successRateClass manifest.metrics.success_rate}}">
123
+ {{formatPercent manifest.metrics.success_rate}}
124
+ </div>
125
+ </div>
126
+ <div class="card">
127
+ <h3>Passed / Total</h3>
128
+ <div class="value">{{manifest.metrics.passed_cases}} / {{manifest.metrics.total_cases}}</div>
129
+ </div>
130
+ <div class="card">
131
+ <h3>Median Latency</h3>
132
+ <div class="value">{{manifest.metrics.median_latency_ms}}ms</div>
133
+ </div>
134
+ <div class="card">
135
+ <h3>Total Tokens</h3>
136
+ <div class="value">{{formatNumber manifest.metrics.total_tokens}}</div>
137
+ </div>
138
+ </div>
139
+
140
+ <h2>Test Cases</h2>
141
+ <table>
142
+ <thead>
143
+ <tr>
144
+ <th>ID</th>
145
+ <th>Status</th>
146
+ <th>Score</th>
147
+ <th>Matcher</th>
148
+ <th>Latency</th>
149
+ <th>Tokens</th>
150
+ </tr>
151
+ </thead>
152
+ <tbody>
153
+ {{#each manifest.cases}}
154
+ <tr class="expandable" onclick="toggleDetails('{{id}}')">
155
+ <td><strong>{{id}}</strong>{{#if name}}<br><small>{{name}}</small>{{/if}}{{#if redaction.redacted}}<span class="redacted-badge">redacted</span>{{/if}}</td>
156
+ <td><span class="status {{#if ok}}passed{{else}}failed{{/if}}">{{#if ok}}PASSED{{else}}FAILED{{/if}}</span></td>
157
+ <td class="score">{{formatPercent score}}</td>
158
+ <td>{{matcherType}}</td>
159
+ <td>{{latencyMs}}ms</td>
160
+ <td>{{tokens.total}}</td>
161
+ </tr>
162
+ <tr id="details-{{id}}" class="hidden">
163
+ <td colspan="6">
164
+ <div class="details">
165
+ <p><strong>Reason:</strong> {{reason}}</p>
166
+ <p><strong>Prompt:</strong>{{#if redaction.promptRedacted}} <span class="redacted-badge">redacted</span>{{/if}}</p>
167
+ <pre>{{formatPrompt prompt}}</pre>
168
+ <p><strong>Response:</strong>{{#if redaction.responseRedacted}} <span class="redacted-badge">redacted</span>{{/if}}</p>
169
+ <pre>{{response}}</pre>
170
+ </div>
171
+ </td>
172
+ </tr>
173
+ {{/each}}
174
+ </tbody>
175
+ </table>
176
+
177
+ {{#if manifest.resolved_config}}
178
+ <h2>Resolved Configuration</h2>
179
+ <div class="card">
180
+ <p><strong>Provider:</strong> {{manifest.resolved_config.provider}} <span class="source-badge">{{manifest.resolved_config.source.provider}}</span></p>
181
+ {{#if manifest.resolved_config.model}}
182
+ <p><strong>Model:</strong> {{manifest.resolved_config.model}} <span class="source-badge">{{manifest.resolved_config.source.model}}</span></p>
183
+ {{/if}}
184
+ {{#if manifest.resolved_config.deployment_name}}
185
+ <p><strong>Deployment:</strong> {{manifest.resolved_config.deployment_name}} <span class="source-badge">{{manifest.resolved_config.source.deployment_name}}</span></p>
186
+ {{/if}}
187
+ {{#if manifest.resolved_config.resource_name}}
188
+ <p><strong>Resource:</strong> {{manifest.resolved_config.resource_name}} <span class="source-badge">{{manifest.resolved_config.source.resource_name}}</span></p>
189
+ {{/if}}
190
+ {{#if manifest.resolved_config.api_version}}
191
+ <p><strong>API Version:</strong> {{manifest.resolved_config.api_version}} <span class="source-badge">{{manifest.resolved_config.source.api_version}}</span></p>
192
+ {{/if}}
193
+ {{#if manifest.resolved_config.base_url}}
194
+ <p><strong>Base URL:</strong> {{manifest.resolved_config.base_url}} <span class="source-badge">{{manifest.resolved_config.source.base_url}}</span></p>
195
+ {{/if}}
196
+ {{#if manifest.resolved_config.underlying_provider}}
197
+ <p><strong>Underlying Provider:</strong> {{manifest.resolved_config.underlying_provider}} <span class="source-badge">{{manifest.resolved_config.source.underlying_provider}}</span></p>
198
+ {{/if}}
199
+ {{#if manifest.resolved_config.temperature}}
200
+ <p><strong>Temperature:</strong> {{manifest.resolved_config.temperature}} <span class="source-badge">{{manifest.resolved_config.source.temperature}}</span></p>
201
+ {{/if}}
202
+ {{#if manifest.resolved_config.max_tokens}}
203
+ <p><strong>Max Tokens:</strong> {{manifest.resolved_config.max_tokens}} <span class="source-badge">{{manifest.resolved_config.source.max_tokens}}</span></p>
204
+ {{/if}}
205
+ </div>
206
+ {{/if}}
207
+
208
+ <h2>Provenance</h2>
209
+ <div class="card">
210
+ <p><strong>Git Commit:</strong> {{manifest.git.commit}}</p>
211
+ <p><strong>Git Branch:</strong> {{manifest.git.branch}}</p>
212
+ <p><strong>Run By:</strong> {{manifest.provenance.run_by}}</p>
213
+ <p><strong>Duration:</strong> {{manifest.duration_ms}}ms</p>
214
+ </div>
215
+
216
+ <footer>
217
+ Generated by Artemis Agent Reliability Toolkit
218
+ </footer>
219
+ </div>
220
+
221
+ <script>
222
+ function toggleDetails(id) {
223
+ const details = document.getElementById('details-' + id);
224
+ details.classList.toggle('hidden');
225
+ }
226
+ </script>
227
+ </body>
228
+ </html>
229
+ `;
230
+
231
+ export function generateHTMLReport(manifest: RunManifest): string {
232
+ // Register helpers
233
+ Handlebars.registerHelper('formatPercent', (value: number) => {
234
+ return `${(value * 100).toFixed(1)}%`;
235
+ });
236
+
237
+ Handlebars.registerHelper('formatNumber', (value: number) => {
238
+ return value.toLocaleString();
239
+ });
240
+
241
+ Handlebars.registerHelper('formatDate', (value: string) => {
242
+ return new Date(value).toLocaleString();
243
+ });
244
+
245
+ Handlebars.registerHelper('successRateClass', (value: number) => {
246
+ if (value >= 0.9) return 'success';
247
+ if (value >= 0.7) return 'warning';
248
+ return 'error';
249
+ });
250
+
251
+ Handlebars.registerHelper('formatPrompt', (prompt: string | object) => {
252
+ if (typeof prompt === 'string') return prompt;
253
+ return JSON.stringify(prompt, null, 2);
254
+ });
255
+
256
+ const template = Handlebars.compile(HTML_TEMPLATE);
257
+ return template({ manifest });
258
+ }