prompt-lock 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +59 -0
- package/dist/cli.js +70 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/reporter.d.ts +4 -1
- package/dist/reporter.d.ts.map +1 -1
- package/dist/reporter.js +549 -83
- package/dist/reporter.js.map +1 -1
- package/dist/runner.d.ts +2 -1
- package/dist/runner.d.ts.map +1 -1
- package/dist/runner.js +65 -0
- package/dist/runner.js.map +1 -1
- package/dist/types.d.ts +12 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/utils.d.ts +5 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +37 -0
- package/dist/utils.js.map +1 -1
- package/package.json +2 -1
- package/schemas/promptlock.schema.json +271 -0
package/dist/reporter.js
CHANGED
|
@@ -41,6 +41,9 @@ exports.printDiffReport = printDiffReport;
|
|
|
41
41
|
exports.generateJsonReport = generateJsonReport;
|
|
42
42
|
exports.generateHtmlReport = generateHtmlReport;
|
|
43
43
|
exports.generateMarkdownReport = generateMarkdownReport;
|
|
44
|
+
exports.printABReport = printABReport;
|
|
45
|
+
exports.generateABMarkdownReport = generateABMarkdownReport;
|
|
46
|
+
exports.generateABHtmlReport = generateABHtmlReport;
|
|
44
47
|
const path = __importStar(require("path"));
|
|
45
48
|
const chalk_1 = __importDefault(require("chalk"));
|
|
46
49
|
const utils_1 = require("./utils");
|
|
@@ -148,106 +151,396 @@ async function generateJsonReport(results, outputDir = DEFAULT_REPORT_DIR) {
|
|
|
148
151
|
await (0, utils_1.writeJsonFile)(filePath, report);
|
|
149
152
|
return filePath;
|
|
150
153
|
}
|
|
154
|
+
// ── Shared dark theme CSS for all HTML reports ──────────────────────────────
|
|
155
|
+
const DARK_THEME_CSS = `
|
|
156
|
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
|
157
|
+
:root {
|
|
158
|
+
--bg: #0d1117;
|
|
159
|
+
--card: #161b22;
|
|
160
|
+
--card-2: #1c2128;
|
|
161
|
+
--border: #30363d;
|
|
162
|
+
--text: #e6edf3;
|
|
163
|
+
--text-dim: #8b949e;
|
|
164
|
+
--accent: #58a6ff;
|
|
165
|
+
--pass: #3fb950;
|
|
166
|
+
--fail: #f85149;
|
|
167
|
+
--warn: #d29922;
|
|
168
|
+
--highlight: #238636;
|
|
169
|
+
}
|
|
170
|
+
html { scroll-behavior: smooth; }
|
|
171
|
+
body {
|
|
172
|
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
|
173
|
+
background: var(--bg);
|
|
174
|
+
color: var(--text);
|
|
175
|
+
line-height: 1.5;
|
|
176
|
+
padding: 2rem 1.5rem;
|
|
177
|
+
max-width: 1200px;
|
|
178
|
+
margin: 0 auto;
|
|
179
|
+
}
|
|
180
|
+
.mono { font-family: "SF Mono", Monaco, Inconsolata, "Roboto Mono", "Courier New", monospace; }
|
|
181
|
+
header { margin-bottom: 2rem; }
|
|
182
|
+
header h1 {
|
|
183
|
+
font-size: 2rem;
|
|
184
|
+
font-weight: 700;
|
|
185
|
+
letter-spacing: -0.02em;
|
|
186
|
+
margin-bottom: 0.25rem;
|
|
187
|
+
}
|
|
188
|
+
header .subtitle { color: var(--text-dim); font-size: 0.9rem; }
|
|
189
|
+
.summary-grid {
|
|
190
|
+
display: grid;
|
|
191
|
+
grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
|
|
192
|
+
gap: 1rem;
|
|
193
|
+
margin: 2rem 0;
|
|
194
|
+
}
|
|
195
|
+
.metric-card {
|
|
196
|
+
background: var(--card);
|
|
197
|
+
border: 1px solid var(--border);
|
|
198
|
+
border-radius: 8px;
|
|
199
|
+
padding: 1.25rem;
|
|
200
|
+
}
|
|
201
|
+
.metric-card .label {
|
|
202
|
+
color: var(--text-dim);
|
|
203
|
+
font-size: 0.8rem;
|
|
204
|
+
text-transform: uppercase;
|
|
205
|
+
letter-spacing: 0.05em;
|
|
206
|
+
margin-bottom: 0.5rem;
|
|
207
|
+
}
|
|
208
|
+
.metric-card .value {
|
|
209
|
+
font-size: 1.75rem;
|
|
210
|
+
font-weight: 700;
|
|
211
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
212
|
+
}
|
|
213
|
+
.metric-card.pass .value { color: var(--pass); }
|
|
214
|
+
.metric-card.fail .value { color: var(--fail); }
|
|
215
|
+
.metric-card.accent .value { color: var(--accent); }
|
|
216
|
+
.prompt-card {
|
|
217
|
+
background: var(--card);
|
|
218
|
+
border: 1px solid var(--border);
|
|
219
|
+
border-left: 4px solid var(--pass);
|
|
220
|
+
border-radius: 8px;
|
|
221
|
+
padding: 1.5rem;
|
|
222
|
+
margin-bottom: 1rem;
|
|
223
|
+
}
|
|
224
|
+
.prompt-card.failed { border-left-color: var(--fail); }
|
|
225
|
+
.prompt-card .title-row {
|
|
226
|
+
display: flex;
|
|
227
|
+
align-items: center;
|
|
228
|
+
justify-content: space-between;
|
|
229
|
+
flex-wrap: wrap;
|
|
230
|
+
gap: 0.75rem;
|
|
231
|
+
margin-bottom: 1rem;
|
|
232
|
+
}
|
|
233
|
+
.prompt-card h2 {
|
|
234
|
+
font-size: 1.15rem;
|
|
235
|
+
font-weight: 600;
|
|
236
|
+
}
|
|
237
|
+
.prompt-card .meta {
|
|
238
|
+
color: var(--text-dim);
|
|
239
|
+
font-size: 0.85rem;
|
|
240
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
241
|
+
}
|
|
242
|
+
.chip {
|
|
243
|
+
display: inline-block;
|
|
244
|
+
padding: 0.25rem 0.6rem;
|
|
245
|
+
border-radius: 999px;
|
|
246
|
+
font-size: 0.75rem;
|
|
247
|
+
font-weight: 600;
|
|
248
|
+
margin-right: 0.35rem;
|
|
249
|
+
margin-bottom: 0.25rem;
|
|
250
|
+
}
|
|
251
|
+
.chip.pass { background: rgba(63, 185, 80, 0.15); color: var(--pass); border: 1px solid rgba(63, 185, 80, 0.4); }
|
|
252
|
+
.chip.fail { background: rgba(248, 81, 73, 0.15); color: var(--fail); border: 1px solid rgba(248, 81, 73, 0.4); }
|
|
253
|
+
.assertions { margin-top: 0.75rem; }
|
|
254
|
+
details {
|
|
255
|
+
margin-top: 1rem;
|
|
256
|
+
border-top: 1px solid var(--border);
|
|
257
|
+
padding-top: 0.75rem;
|
|
258
|
+
}
|
|
259
|
+
summary {
|
|
260
|
+
cursor: pointer;
|
|
261
|
+
color: var(--text-dim);
|
|
262
|
+
font-size: 0.85rem;
|
|
263
|
+
padding: 0.25rem 0;
|
|
264
|
+
user-select: none;
|
|
265
|
+
}
|
|
266
|
+
summary:hover { color: var(--text); }
|
|
267
|
+
pre {
|
|
268
|
+
background: var(--card-2);
|
|
269
|
+
border: 1px solid var(--border);
|
|
270
|
+
border-radius: 6px;
|
|
271
|
+
padding: 0.9rem;
|
|
272
|
+
margin-top: 0.5rem;
|
|
273
|
+
overflow-x: auto;
|
|
274
|
+
font-size: 0.82rem;
|
|
275
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
276
|
+
white-space: pre-wrap;
|
|
277
|
+
word-break: break-word;
|
|
278
|
+
}
|
|
279
|
+
table {
|
|
280
|
+
width: 100%;
|
|
281
|
+
border-collapse: collapse;
|
|
282
|
+
margin-top: 0.75rem;
|
|
283
|
+
font-size: 0.85rem;
|
|
284
|
+
}
|
|
285
|
+
th, td {
|
|
286
|
+
padding: 0.6rem 0.75rem;
|
|
287
|
+
text-align: left;
|
|
288
|
+
border-bottom: 1px solid var(--border);
|
|
289
|
+
}
|
|
290
|
+
th {
|
|
291
|
+
background: var(--card-2);
|
|
292
|
+
font-weight: 600;
|
|
293
|
+
color: var(--text-dim);
|
|
294
|
+
text-transform: uppercase;
|
|
295
|
+
font-size: 0.7rem;
|
|
296
|
+
letter-spacing: 0.05em;
|
|
297
|
+
}
|
|
298
|
+
tr.fail td { background: rgba(248, 81, 73, 0.06); }
|
|
299
|
+
code {
|
|
300
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
301
|
+
background: var(--card-2);
|
|
302
|
+
padding: 0.1rem 0.35rem;
|
|
303
|
+
border-radius: 3px;
|
|
304
|
+
font-size: 0.85em;
|
|
305
|
+
}
|
|
306
|
+
footer {
|
|
307
|
+
margin-top: 3rem;
|
|
308
|
+
padding-top: 1.5rem;
|
|
309
|
+
border-top: 1px solid var(--border);
|
|
310
|
+
color: var(--text-dim);
|
|
311
|
+
font-size: 0.8rem;
|
|
312
|
+
text-align: center;
|
|
313
|
+
}
|
|
314
|
+
footer a { color: var(--accent); text-decoration: none; }
|
|
315
|
+
footer a:hover { text-decoration: underline; }
|
|
316
|
+
|
|
317
|
+
/* ── A/B specific styles ── */
|
|
318
|
+
.winner-banner {
|
|
319
|
+
background: linear-gradient(135deg, rgba(35, 134, 54, 0.2), rgba(88, 166, 255, 0.1));
|
|
320
|
+
border: 1px solid var(--highlight);
|
|
321
|
+
border-radius: 12px;
|
|
322
|
+
padding: 1.5rem 2rem;
|
|
323
|
+
margin: 1.5rem 0 2rem;
|
|
324
|
+
text-align: center;
|
|
325
|
+
}
|
|
326
|
+
.winner-banner .trophy { font-size: 2rem; margin-bottom: 0.5rem; }
|
|
327
|
+
.winner-banner h2 {
|
|
328
|
+
font-size: 1.5rem;
|
|
329
|
+
color: var(--pass);
|
|
330
|
+
margin-bottom: 0.25rem;
|
|
331
|
+
}
|
|
332
|
+
.winner-banner .subtext { color: var(--text-dim); font-size: 0.9rem; }
|
|
333
|
+
.ab-grid {
|
|
334
|
+
display: grid;
|
|
335
|
+
grid-template-columns: 1fr 1fr;
|
|
336
|
+
gap: 1rem;
|
|
337
|
+
margin-bottom: 2rem;
|
|
338
|
+
}
|
|
339
|
+
@media (max-width: 720px) {
|
|
340
|
+
.ab-grid { grid-template-columns: 1fr; }
|
|
341
|
+
}
|
|
342
|
+
.variant-card {
|
|
343
|
+
background: var(--card);
|
|
344
|
+
border: 1px solid var(--border);
|
|
345
|
+
border-radius: 8px;
|
|
346
|
+
padding: 1.5rem;
|
|
347
|
+
}
|
|
348
|
+
.variant-card.winner {
|
|
349
|
+
border-color: var(--highlight);
|
|
350
|
+
box-shadow: 0 0 0 2px rgba(35, 134, 54, 0.3);
|
|
351
|
+
}
|
|
352
|
+
.variant-card .variant-label {
|
|
353
|
+
display: inline-block;
|
|
354
|
+
font-size: 0.7rem;
|
|
355
|
+
font-weight: 700;
|
|
356
|
+
letter-spacing: 0.1em;
|
|
357
|
+
color: var(--text-dim);
|
|
358
|
+
text-transform: uppercase;
|
|
359
|
+
margin-bottom: 0.25rem;
|
|
360
|
+
}
|
|
361
|
+
.variant-card.winner .variant-label { color: var(--pass); }
|
|
362
|
+
.variant-card h3 {
|
|
363
|
+
font-size: 1.1rem;
|
|
364
|
+
font-weight: 600;
|
|
365
|
+
margin-bottom: 1rem;
|
|
366
|
+
word-break: break-word;
|
|
367
|
+
}
|
|
368
|
+
.variant-metrics {
|
|
369
|
+
display: grid;
|
|
370
|
+
grid-template-columns: 1fr 1fr;
|
|
371
|
+
gap: 0.75rem;
|
|
372
|
+
}
|
|
373
|
+
.variant-metrics .m {
|
|
374
|
+
background: var(--card-2);
|
|
375
|
+
border-radius: 6px;
|
|
376
|
+
padding: 0.75rem;
|
|
377
|
+
}
|
|
378
|
+
.variant-metrics .m .label {
|
|
379
|
+
color: var(--text-dim);
|
|
380
|
+
font-size: 0.7rem;
|
|
381
|
+
text-transform: uppercase;
|
|
382
|
+
letter-spacing: 0.05em;
|
|
383
|
+
margin-bottom: 0.25rem;
|
|
384
|
+
}
|
|
385
|
+
.variant-metrics .m .value {
|
|
386
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
387
|
+
font-size: 1.1rem;
|
|
388
|
+
font-weight: 600;
|
|
389
|
+
}
|
|
390
|
+
.delta-bar {
|
|
391
|
+
margin: 0.5rem 0 1rem;
|
|
392
|
+
}
|
|
393
|
+
.delta-bar .label {
|
|
394
|
+
display: flex;
|
|
395
|
+
justify-content: space-between;
|
|
396
|
+
font-size: 0.8rem;
|
|
397
|
+
color: var(--text-dim);
|
|
398
|
+
margin-bottom: 0.35rem;
|
|
399
|
+
}
|
|
400
|
+
.delta-bar .track {
|
|
401
|
+
background: var(--card-2);
|
|
402
|
+
height: 10px;
|
|
403
|
+
border-radius: 5px;
|
|
404
|
+
position: relative;
|
|
405
|
+
overflow: hidden;
|
|
406
|
+
}
|
|
407
|
+
.delta-bar .fill {
|
|
408
|
+
position: absolute;
|
|
409
|
+
top: 0;
|
|
410
|
+
bottom: 0;
|
|
411
|
+
border-radius: 5px;
|
|
412
|
+
}
|
|
413
|
+
.delta-bar .fill.a { background: var(--accent); opacity: 0.6; }
|
|
414
|
+
.delta-bar .fill.b { background: var(--pass); opacity: 0.8; }
|
|
415
|
+
`;
|
|
416
|
+
function renderHtmlShell(title, body) {
|
|
417
|
+
return `<!DOCTYPE html>
|
|
418
|
+
<html lang="en">
|
|
419
|
+
<head>
|
|
420
|
+
<meta charset="UTF-8">
|
|
421
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
422
|
+
<title>${escapeHtml(title)}</title>
|
|
423
|
+
<style>${DARK_THEME_CSS}</style>
|
|
424
|
+
</head>
|
|
425
|
+
<body>
|
|
426
|
+
${body}
|
|
427
|
+
<footer>
|
|
428
|
+
Generated by <a href="https://www.npmjs.com/package/prompt-lock" target="_blank">prompt-lock</a> · ${new Date().toISOString()}
|
|
429
|
+
</footer>
|
|
430
|
+
</body>
|
|
431
|
+
</html>`;
|
|
432
|
+
}
|
|
433
|
+
function renderAssertionChips(assertions) {
|
|
434
|
+
return assertions.map(a => `<span class="chip ${a.passed ? 'pass' : 'fail'}">${a.passed ? '✓' : '✗'} ${escapeHtml(a.name)}</span>`).join('');
|
|
435
|
+
}
|
|
151
436
|
async function generateHtmlReport(results, outputDir = DEFAULT_REPORT_DIR) {
|
|
152
437
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
153
438
|
const filePath = path.join(outputDir, `run-${timestamp}.html`);
|
|
154
439
|
const total = results.length;
|
|
155
440
|
const passed = results.filter(r => r.passed).length;
|
|
156
441
|
const failed = total - passed;
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
<
|
|
164
|
-
<
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
<
|
|
168
|
-
|
|
169
|
-
|
|
442
|
+
const totalCost = results.reduce((sum, r) => sum + (r.cost ?? 0), 0);
|
|
443
|
+
const totalTokens = results.reduce((sum, r) => sum + (r.tokens?.totalTokens ?? 0), 0);
|
|
444
|
+
const totalDuration = results.reduce((sum, r) => sum + r.duration, 0);
|
|
445
|
+
const summaryCards = `
|
|
446
|
+
<div class="summary-grid">
|
|
447
|
+
<div class="metric-card accent">
|
|
448
|
+
<div class="label">Total prompts</div>
|
|
449
|
+
<div class="value">${total}</div>
|
|
450
|
+
</div>
|
|
451
|
+
<div class="metric-card pass">
|
|
452
|
+
<div class="label">Passed</div>
|
|
453
|
+
<div class="value">${passed}</div>
|
|
454
|
+
</div>
|
|
455
|
+
<div class="metric-card ${failed > 0 ? 'fail' : ''}">
|
|
456
|
+
<div class="label">Failed</div>
|
|
457
|
+
<div class="value">${failed}</div>
|
|
458
|
+
</div>
|
|
459
|
+
<div class="metric-card">
|
|
460
|
+
<div class="label">Duration</div>
|
|
461
|
+
<div class="value mono">${totalDuration}ms</div>
|
|
462
|
+
</div>
|
|
463
|
+
${totalCost > 0 ? `
|
|
464
|
+
<div class="metric-card">
|
|
465
|
+
<div class="label">Cost</div>
|
|
466
|
+
<div class="value mono">$${totalCost.toFixed(6)}</div>
|
|
467
|
+
</div>
|
|
468
|
+
<div class="metric-card">
|
|
469
|
+
<div class="label">Tokens</div>
|
|
470
|
+
<div class="value mono">${totalTokens.toLocaleString()}</div>
|
|
471
|
+
</div>` : ''}
|
|
472
|
+
</div>
|
|
473
|
+
`;
|
|
474
|
+
const promptCards = results.map(r => {
|
|
475
|
+
const statusClass = r.passed ? '' : 'failed';
|
|
476
|
+
const assertionTable = r.assertions.length > 0 ? `
|
|
477
|
+
<details open>
|
|
478
|
+
<summary>Assertion details (${r.assertions.filter(a => a.passed).length}/${r.assertions.length})</summary>
|
|
479
|
+
<table>
|
|
480
|
+
<thead><tr><th>Type</th><th>Name</th><th>Result</th><th>Expected</th><th>Actual</th></tr></thead>
|
|
481
|
+
<tbody>
|
|
482
|
+
${r.assertions.map(a => `
|
|
483
|
+
<tr class="${a.passed ? '' : 'fail'}">
|
|
484
|
+
<td>${escapeHtml(a.type)}</td>
|
|
485
|
+
<td>${escapeHtml(a.name)}</td>
|
|
486
|
+
<td>${a.passed ? '<span class="chip pass">Pass</span>' : '<span class="chip fail">Fail</span>'}</td>
|
|
487
|
+
<td>${escapeHtml(a.expected ?? '—')}</td>
|
|
488
|
+
<td>${escapeHtml(a.actual ?? '—')}${a.message ? ' · ' + escapeHtml(a.message) : ''}</td>
|
|
489
|
+
</tr>
|
|
490
|
+
`).join('')}
|
|
491
|
+
</tbody>
|
|
492
|
+
</table>
|
|
493
|
+
</details>` : '';
|
|
494
|
+
const datasetSection = r.datasetResults && r.datasetResults.length > 0 ? `
|
|
495
|
+
<details>
|
|
496
|
+
<summary>Dataset (${r.datasetResults.filter(d => d.passed).length}/${r.datasetResults.length} inputs passed)</summary>
|
|
497
|
+
<table>
|
|
498
|
+
<thead><tr><th>#</th><th>Variables</th><th>Result</th><th>Duration</th><th>Failures</th></tr></thead>
|
|
499
|
+
<tbody>
|
|
500
|
+
${r.datasetResults.map((d, i) => `
|
|
501
|
+
<tr class="${d.passed ? '' : 'fail'}">
|
|
502
|
+
<td>${i + 1}</td>
|
|
503
|
+
<td><code>${escapeHtml(JSON.stringify(d.vars).slice(0, 80))}</code></td>
|
|
504
|
+
<td>${d.passed ? '<span class="chip pass">Pass</span>' : '<span class="chip fail">Fail</span>'}</td>
|
|
505
|
+
<td>${d.duration}ms</td>
|
|
506
|
+
<td>${d.passed ? '—' : escapeHtml(d.assertions.filter(a => !a.passed).map(a => a.name).join(', '))}</td>
|
|
507
|
+
</tr>
|
|
508
|
+
`).join('')}
|
|
509
|
+
</tbody>
|
|
510
|
+
</table>
|
|
511
|
+
</details>` : '';
|
|
170
512
|
return `
|
|
171
|
-
<div class="prompt-
|
|
172
|
-
<
|
|
513
|
+
<div class="prompt-card ${statusClass}">
|
|
514
|
+
<div class="title-row">
|
|
515
|
+
<h2>${r.passed ? '✓' : '✗'} ${escapeHtml(r.id)}</h2>
|
|
516
|
+
<div class="meta">${escapeHtml(r.provider)}/${escapeHtml(r.model)} · ${r.duration}ms${r.cost ? ` · $${r.cost.toFixed(6)}` : ''}${r.tokens ? ` · ${r.tokens.totalTokens} tokens` : ''}</div>
|
|
517
|
+
</div>
|
|
518
|
+
<div class="assertions">
|
|
519
|
+
${renderAssertionChips(r.assertions)}
|
|
520
|
+
</div>
|
|
521
|
+
${assertionTable}
|
|
522
|
+
${datasetSection}
|
|
173
523
|
<details>
|
|
174
524
|
<summary>Prompt</summary>
|
|
175
|
-
<pre
|
|
525
|
+
<pre>${escapeHtml(r.prompt)}</pre>
|
|
176
526
|
</details>
|
|
177
527
|
<details>
|
|
178
528
|
<summary>Output</summary>
|
|
179
|
-
<pre
|
|
529
|
+
<pre>${escapeHtml(r.output.length > 2000 ? r.output.slice(0, 2000) + `\n... (truncated, ${r.output.length} chars total)` : r.output)}</pre>
|
|
180
530
|
</details>
|
|
181
|
-
<table>
|
|
182
|
-
<thead>
|
|
183
|
-
<tr><th>Type</th><th>Name</th><th>Result</th><th>Expected</th><th>Actual</th><th>Message</th></tr>
|
|
184
|
-
</thead>
|
|
185
|
-
<tbody>${assertionRows}</tbody>
|
|
186
|
-
</table>
|
|
187
|
-
${r.datasetResults && r.datasetResults.length > 0 ? `
|
|
188
|
-
<details>
|
|
189
|
-
<summary>Dataset Results (${r.datasetResults.filter(d => d.passed).length}/${r.datasetResults.length} passed)</summary>
|
|
190
|
-
<table>
|
|
191
|
-
<thead>
|
|
192
|
-
<tr><th>#</th><th>Variables</th><th>Result</th><th>Duration</th><th>Failures</th></tr>
|
|
193
|
-
</thead>
|
|
194
|
-
<tbody>
|
|
195
|
-
${r.datasetResults.map((d, i) => `
|
|
196
|
-
<tr class="${d.passed ? 'pass' : 'fail'}">
|
|
197
|
-
<td>${i + 1}</td>
|
|
198
|
-
<td><code>${escapeHtml(JSON.stringify(d.vars).slice(0, 100))}</code></td>
|
|
199
|
-
<td>${d.passed ? '✅ Pass' : '❌ Fail'}</td>
|
|
200
|
-
<td>${d.duration}ms</td>
|
|
201
|
-
<td>${d.passed ? '' : escapeHtml(d.assertions.filter(a => !a.passed).map(a => a.name).join(', '))}</td>
|
|
202
|
-
</tr>
|
|
203
|
-
`).join('')}
|
|
204
|
-
</tbody>
|
|
205
|
-
</table>
|
|
206
|
-
</details>` : ''}
|
|
207
531
|
</div>
|
|
208
532
|
`;
|
|
209
533
|
}).join('');
|
|
210
|
-
const
|
|
211
|
-
<
|
|
212
|
-
<
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; padding: 2rem; background: #f8f9fa; color: #212529; }
|
|
219
|
-
h1 { margin-bottom: 0.5rem; }
|
|
220
|
-
.summary { margin-bottom: 2rem; padding: 1rem; background: white; border-radius: 8px; border: 1px solid #dee2e6; }
|
|
221
|
-
.summary .stat { display: inline-block; margin-right: 2rem; font-size: 1.1rem; }
|
|
222
|
-
.stat.fail-count { color: #dc3545; font-weight: bold; }
|
|
223
|
-
.stat.pass-count { color: #28a745; font-weight: bold; }
|
|
224
|
-
.prompt-result { margin-bottom: 1.5rem; padding: 1rem; background: white; border-radius: 8px; border-left: 4px solid #28a745; }
|
|
225
|
-
.prompt-result.fail { border-left-color: #dc3545; }
|
|
226
|
-
.prompt-result h3 { margin-bottom: 0.75rem; }
|
|
227
|
-
.prompt-result .meta { font-weight: normal; color: #6c757d; font-size: 0.85rem; }
|
|
228
|
-
details { margin-bottom: 0.75rem; }
|
|
229
|
-
summary { cursor: pointer; font-weight: 500; padding: 0.25rem 0; }
|
|
230
|
-
pre { background: #f1f3f5; padding: 1rem; border-radius: 4px; overflow-x: auto; font-size: 0.85rem; margin-top: 0.5rem; white-space: pre-wrap; }
|
|
231
|
-
table { width: 100%; border-collapse: collapse; font-size: 0.85rem; }
|
|
232
|
-
th, td { padding: 0.5rem; text-align: left; border-bottom: 1px solid #dee2e6; }
|
|
233
|
-
th { background: #f1f3f5; font-weight: 600; }
|
|
234
|
-
tr.fail { background: #fff5f5; }
|
|
235
|
-
tr.pass { background: #f0fff0; }
|
|
236
|
-
</style>
|
|
237
|
-
</head>
|
|
238
|
-
<body>
|
|
239
|
-
<h1>prompt-lock Report</h1>
|
|
240
|
-
<p style="color: #6c757d; margin-bottom: 1.5rem;">${new Date().toISOString()}</p>
|
|
241
|
-
<div class="summary">
|
|
242
|
-
<span class="stat">Total: ${total}</span>
|
|
243
|
-
<span class="stat pass-count">Passed: ${passed}</span>
|
|
244
|
-
${failed > 0 ? `<span class="stat fail-count">Failed: ${failed}</span>` : ''}
|
|
245
|
-
</div>
|
|
246
|
-
${resultRows}
|
|
247
|
-
</body>
|
|
248
|
-
</html>`;
|
|
534
|
+
const body = `
|
|
535
|
+
<header>
|
|
536
|
+
<h1>prompt-lock report</h1>
|
|
537
|
+
<div class="subtitle">${new Date().toLocaleString()}</div>
|
|
538
|
+
</header>
|
|
539
|
+
${summaryCards}
|
|
540
|
+
${promptCards}
|
|
541
|
+
`;
|
|
249
542
|
await (0, utils_1.ensureDir)(outputDir);
|
|
250
|
-
await fs.promises.writeFile(filePath,
|
|
543
|
+
await fs.promises.writeFile(filePath, renderHtmlShell('prompt-lock report', body), 'utf-8');
|
|
251
544
|
return filePath;
|
|
252
545
|
}
|
|
253
546
|
async function generateMarkdownReport(results, outputDir = DEFAULT_REPORT_DIR) {
|
|
@@ -305,6 +598,179 @@ async function generateMarkdownReport(results, outputDir = DEFAULT_REPORT_DIR) {
|
|
|
305
598
|
await fs.promises.writeFile(filePath, md, 'utf-8');
|
|
306
599
|
return filePath;
|
|
307
600
|
}
|
|
601
|
+
function printABReport(result) {
|
|
602
|
+
const { variantA: a, variantB: b, winner, deltas } = result;
|
|
603
|
+
console.log('');
|
|
604
|
+
console.log(chalk_1.default.bold(`A/B Comparison: ${result.id}`));
|
|
605
|
+
console.log('');
|
|
606
|
+
const passA = `${a.assertions.filter(x => x.passed).length}/${a.assertions.length}`;
|
|
607
|
+
const passB = `${b.assertions.filter(x => x.passed).length}/${b.assertions.length}`;
|
|
608
|
+
const rows = [
|
|
609
|
+
['Status', `${a.passed ? '✅' : '❌'} ${passA} passed`, `${b.passed ? '✅' : '❌'} ${passB} passed`, '—'],
|
|
610
|
+
['Latency', `${a.duration}ms`, `${b.duration}ms`, formatDelta(deltas.latencyMs, 'ms')],
|
|
611
|
+
];
|
|
612
|
+
if ((a.cost ?? 0) > 0 || (b.cost ?? 0) > 0) {
|
|
613
|
+
rows.push(['Cost', `$${(a.cost ?? 0).toFixed(6)}`, `$${(b.cost ?? 0).toFixed(6)}`, formatDelta(deltas.costDollars, '$', true)]);
|
|
614
|
+
}
|
|
615
|
+
if ((a.tokens?.totalTokens ?? 0) > 0 || (b.tokens?.totalTokens ?? 0) > 0) {
|
|
616
|
+
rows.push(['Tokens', `${a.tokens?.totalTokens ?? 0}`, `${b.tokens?.totalTokens ?? 0}`, formatDelta(deltas.tokens, '')]);
|
|
617
|
+
}
|
|
618
|
+
// Print aligned table
|
|
619
|
+
const colWidths = [12, 22, 22, 14];
|
|
620
|
+
printTableRow(['Metric', 'Variant A', 'Variant B', 'Delta'], colWidths, true);
|
|
621
|
+
printTableRow(colWidths.map(w => '─'.repeat(w)), colWidths, false);
|
|
622
|
+
for (const row of rows)
|
|
623
|
+
printTableRow(row, colWidths, false);
|
|
624
|
+
console.log('');
|
|
625
|
+
if (winner === 'tie') {
|
|
626
|
+
console.log(chalk_1.default.yellow('Result: Tie — variants are equivalent'));
|
|
627
|
+
}
|
|
628
|
+
else {
|
|
629
|
+
const name = winner === 'A' ? 'Variant A' : 'Variant B';
|
|
630
|
+
console.log(chalk_1.default.green.bold(`Winner: ${name}`));
|
|
631
|
+
}
|
|
632
|
+
console.log('');
|
|
633
|
+
}
|
|
634
|
+
function formatDelta(value, unit, isDollar = false) {
|
|
635
|
+
if (value === 0)
|
|
636
|
+
return '—';
|
|
637
|
+
const sign = value > 0 ? '+' : '';
|
|
638
|
+
if (isDollar)
|
|
639
|
+
return `${sign}$${value.toFixed(6)}`;
|
|
640
|
+
return `${sign}${value}${unit}`;
|
|
641
|
+
}
|
|
642
|
+
function printTableRow(cells, widths, bold) {
|
|
643
|
+
const padded = cells.map((c, i) => c.padEnd(widths[i]));
|
|
644
|
+
const line = `| ${padded.join(' | ')} |`;
|
|
645
|
+
console.log(bold ? chalk_1.default.bold(line) : line);
|
|
646
|
+
}
|
|
647
|
+
async function generateABMarkdownReport(result, outputDir = DEFAULT_REPORT_DIR) {
|
|
648
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
649
|
+
const filePath = path.join(outputDir, `ab-${timestamp}.md`);
|
|
650
|
+
const { variantA: a, variantB: b, winner, deltas } = result;
|
|
651
|
+
const passA = `${a.assertions.filter(x => x.passed).length}/${a.assertions.length}`;
|
|
652
|
+
const passB = `${b.assertions.filter(x => x.passed).length}/${b.assertions.length}`;
|
|
653
|
+
let md = `# A/B Comparison: ${result.id}\n\n`;
|
|
654
|
+
md += `| Metric | Variant A (${a.id}) | Variant B (${b.id}) | Delta |\n`;
|
|
655
|
+
md += `|--------|---------------------|---------------------|-------|\n`;
|
|
656
|
+
md += `| Status | ${a.passed ? 'Pass' : 'FAIL'} ${passA} | ${b.passed ? 'Pass' : 'FAIL'} ${passB} | — |\n`;
|
|
657
|
+
md += `| Latency | ${a.duration}ms | ${b.duration}ms | ${formatDelta(deltas.latencyMs, 'ms')} |\n`;
|
|
658
|
+
if ((a.cost ?? 0) > 0 || (b.cost ?? 0) > 0) {
|
|
659
|
+
md += `| Cost | $${(a.cost ?? 0).toFixed(6)} | $${(b.cost ?? 0).toFixed(6)} | ${formatDelta(deltas.costDollars, '$', true)} |\n`;
|
|
660
|
+
}
|
|
661
|
+
if ((a.tokens?.totalTokens ?? 0) > 0 || (b.tokens?.totalTokens ?? 0) > 0) {
|
|
662
|
+
md += `| Tokens | ${a.tokens?.totalTokens ?? 0} | ${b.tokens?.totalTokens ?? 0} | ${formatDelta(deltas.tokens, '')} |\n`;
|
|
663
|
+
}
|
|
664
|
+
md += `\n`;
|
|
665
|
+
if (winner === 'tie') {
|
|
666
|
+
md += `**Result:** Tie — variants are equivalent\n`;
|
|
667
|
+
}
|
|
668
|
+
else {
|
|
669
|
+
const name = winner === 'A' ? `Variant A (${a.id})` : `Variant B (${b.id})`;
|
|
670
|
+
md += `**Winner:** ${name}\n`;
|
|
671
|
+
}
|
|
672
|
+
await (0, utils_1.ensureDir)(outputDir);
|
|
673
|
+
await fs.promises.writeFile(filePath, md, 'utf-8');
|
|
674
|
+
return filePath;
|
|
675
|
+
}
|
|
676
|
+
async function generateABHtmlReport(result, outputDir = DEFAULT_REPORT_DIR) {
|
|
677
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
678
|
+
const filePath = path.join(outputDir, `ab-${timestamp}.html`);
|
|
679
|
+
const { variantA: a, variantB: b, winner, deltas } = result;
|
|
680
|
+
const winnerSide = winner === 'tie' ? 'none' : winner;
|
|
681
|
+
// Compute winner subtext based on which metrics favor the winner
|
|
682
|
+
const reasons = [];
|
|
683
|
+
if (deltas.latencyMs !== 0) {
|
|
684
|
+
const pct = Math.abs(deltas.latencyMs) / Math.max(a.duration, b.duration) * 100;
|
|
685
|
+
if (pct >= 10) {
|
|
686
|
+
const faster = deltas.latencyMs < 0 ? 'B' : 'A';
|
|
687
|
+
if (faster === winner)
|
|
688
|
+
reasons.push(`${pct.toFixed(0)}% faster`);
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
if (deltas.costDollars !== 0 && Math.max(a.cost ?? 0, b.cost ?? 0) > 0) {
|
|
692
|
+
const pct = Math.abs(deltas.costDollars) / Math.max(a.cost ?? 0, b.cost ?? 0) * 100;
|
|
693
|
+
if (pct >= 5) {
|
|
694
|
+
const cheaper = deltas.costDollars < 0 ? 'B' : 'A';
|
|
695
|
+
if (cheaper === winner)
|
|
696
|
+
reasons.push(`${pct.toFixed(0)}% cheaper`);
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
// Render delta bars — normalize to fill width proportionally
|
|
700
|
+
function deltaBar(label, aVal, bVal, unit, lowerIsBetter = true) {
|
|
701
|
+
const max = Math.max(aVal, bVal);
|
|
702
|
+
if (max === 0)
|
|
703
|
+
return '';
|
|
704
|
+
const aPct = (aVal / max) * 100;
|
|
705
|
+
const bPct = (bVal / max) * 100;
|
|
706
|
+
return `
|
|
707
|
+
<div class="delta-bar">
|
|
708
|
+
<div class="label"><span>${label}</span><span class="mono">A: ${aVal}${unit} · B: ${bVal}${unit}</span></div>
|
|
709
|
+
<div class="track"><div class="fill a" style="width:${aPct}%"></div></div>
|
|
710
|
+
<div class="track" style="margin-top:4px"><div class="fill b" style="width:${bPct}%"></div></div>
|
|
711
|
+
</div>
|
|
712
|
+
`;
|
|
713
|
+
}
|
|
714
|
+
const showCost = (a.cost ?? 0) > 0 || (b.cost ?? 0) > 0;
|
|
715
|
+
const showTokens = (a.tokens?.totalTokens ?? 0) > 0 || (b.tokens?.totalTokens ?? 0) > 0;
|
|
716
|
+
function variantCard(label, r) {
|
|
717
|
+
const isWinner = winnerSide === label;
|
|
718
|
+
const passCount = r.assertions.filter(x => x.passed).length;
|
|
719
|
+
return `
|
|
720
|
+
<div class="variant-card ${isWinner ? 'winner' : ''}">
|
|
721
|
+
<div class="variant-label">${isWinner ? '★ WINNER · ' : ''}VARIANT ${label}</div>
|
|
722
|
+
<h3>${escapeHtml(r.id)}</h3>
|
|
723
|
+
<div class="meta" style="color: var(--text-dim); font-size: 0.85rem; margin-bottom: 1rem;">${escapeHtml(r.provider)}/${escapeHtml(r.model)}</div>
|
|
724
|
+
<div class="variant-metrics">
|
|
725
|
+
<div class="m"><div class="label">Status</div><div class="value" style="color: ${r.passed ? 'var(--pass)' : 'var(--fail)'}">${r.passed ? '✓' : '✗'} ${passCount}/${r.assertions.length}</div></div>
|
|
726
|
+
<div class="m"><div class="label">Latency</div><div class="value">${r.duration}ms</div></div>
|
|
727
|
+
${showCost ? `<div class="m"><div class="label">Cost</div><div class="value">$${(r.cost ?? 0).toFixed(6)}</div></div>` : ''}
|
|
728
|
+
${showTokens ? `<div class="m"><div class="label">Tokens</div><div class="value">${r.tokens?.totalTokens ?? 0}</div></div>` : ''}
|
|
729
|
+
</div>
|
|
730
|
+
<div class="assertions" style="margin-top: 1rem;">
|
|
731
|
+
${renderAssertionChips(r.assertions)}
|
|
732
|
+
</div>
|
|
733
|
+
<details>
|
|
734
|
+
<summary>Output preview</summary>
|
|
735
|
+
<pre>${escapeHtml((r.output || '').slice(0, 1500))}</pre>
|
|
736
|
+
</details>
|
|
737
|
+
</div>
|
|
738
|
+
`;
|
|
739
|
+
}
|
|
740
|
+
const winnerBanner = winner === 'tie' ? `
|
|
741
|
+
<div class="winner-banner" style="border-color: var(--warn);">
|
|
742
|
+
<div class="trophy">⚖️</div>
|
|
743
|
+
<h2 style="color: var(--warn);">Tie</h2>
|
|
744
|
+
<div class="subtext">Variants are equivalent on all measured metrics</div>
|
|
745
|
+
</div>
|
|
746
|
+
` : `
|
|
747
|
+
<div class="winner-banner">
|
|
748
|
+
<div class="trophy">🏆</div>
|
|
749
|
+
<h2>Variant ${winner} wins</h2>
|
|
750
|
+
<div class="subtext">${reasons.length > 0 ? reasons.join(' · ') : 'Higher pass rate'}</div>
|
|
751
|
+
</div>
|
|
752
|
+
`;
|
|
753
|
+
const body = `
|
|
754
|
+
<header>
|
|
755
|
+
<h1>A/B comparison</h1>
|
|
756
|
+
<div class="subtitle">${escapeHtml(result.id)} · ${new Date().toLocaleString()}</div>
|
|
757
|
+
</header>
|
|
758
|
+
${winnerBanner}
|
|
759
|
+
<div class="ab-grid">
|
|
760
|
+
${variantCard('A', a)}
|
|
761
|
+
${variantCard('B', b)}
|
|
762
|
+
</div>
|
|
763
|
+
<div style="background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 1.5rem; margin-bottom: 2rem;">
|
|
764
|
+
<h3 style="margin-bottom: 1rem;">Metric comparison</h3>
|
|
765
|
+
${deltaBar('Latency', a.duration, b.duration, 'ms')}
|
|
766
|
+
${showCost ? deltaBar('Cost', a.cost ?? 0, b.cost ?? 0, '') : ''}
|
|
767
|
+
${showTokens ? deltaBar('Tokens', a.tokens?.totalTokens ?? 0, b.tokens?.totalTokens ?? 0, '') : ''}
|
|
768
|
+
</div>
|
|
769
|
+
`;
|
|
770
|
+
await (0, utils_1.ensureDir)(outputDir);
|
|
771
|
+
await fs.promises.writeFile(filePath, renderHtmlShell(`A/B: ${result.id}`, body), 'utf-8');
|
|
772
|
+
return filePath;
|
|
773
|
+
}
|
|
308
774
|
function escapeHtml(s) {
|
|
309
775
|
return s
|
|
310
776
|
.replace(/&/g, '&')
|