stepproof 0.2.22 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/adapters/anthropic.d.ts +2 -2
  2. package/dist/adapters/anthropic.d.ts.map +1 -1
  3. package/dist/adapters/anthropic.js +11 -4
  4. package/dist/adapters/anthropic.js.map +1 -1
  5. package/dist/adapters/base.d.ts +9 -1
  6. package/dist/adapters/base.d.ts.map +1 -1
  7. package/dist/adapters/gemini.d.ts +8 -0
  8. package/dist/adapters/gemini.d.ts.map +1 -0
  9. package/dist/adapters/gemini.js +49 -0
  10. package/dist/adapters/gemini.js.map +1 -0
  11. package/dist/adapters/index.d.ts.map +1 -1
  12. package/dist/adapters/index.js +7 -1
  13. package/dist/adapters/index.js.map +1 -1
  14. package/dist/adapters/ollama.d.ts +8 -0
  15. package/dist/adapters/ollama.d.ts.map +1 -0
  16. package/dist/adapters/ollama.js +51 -0
  17. package/dist/adapters/ollama.js.map +1 -0
  18. package/dist/adapters/openai.d.ts +2 -2
  19. package/dist/adapters/openai.d.ts.map +1 -1
  20. package/dist/adapters/openai.js +7 -1
  21. package/dist/adapters/openai.js.map +1 -1
  22. package/dist/assertions/engine.d.ts +6 -1
  23. package/dist/assertions/engine.d.ts.map +1 -1
  24. package/dist/assertions/engine.js +176 -11
  25. package/dist/assertions/engine.js.map +1 -1
  26. package/dist/baseline.d.ts +22 -0
  27. package/dist/baseline.d.ts.map +1 -0
  28. package/dist/baseline.js +81 -0
  29. package/dist/baseline.js.map +1 -0
  30. package/dist/cache.d.ts +5 -0
  31. package/dist/cache.d.ts.map +1 -0
  32. package/dist/cache.js +71 -0
  33. package/dist/cache.js.map +1 -0
  34. package/dist/cli.js +214 -15
  35. package/dist/cli.js.map +1 -1
  36. package/dist/commands/compare.d.ts +43 -0
  37. package/dist/commands/compare.d.ts.map +1 -0
  38. package/dist/commands/compare.js +75 -0
  39. package/dist/commands/compare.js.map +1 -0
  40. package/dist/commands/history.d.ts +2 -0
  41. package/dist/commands/history.d.ts.map +1 -0
  42. package/dist/commands/history.js +46 -0
  43. package/dist/commands/history.js.map +1 -0
  44. package/dist/commands/results-store.d.ts +15 -0
  45. package/dist/commands/results-store.d.ts.map +1 -0
  46. package/dist/commands/results-store.js +77 -0
  47. package/dist/commands/results-store.js.map +1 -0
  48. package/dist/commands/view.d.ts +2 -0
  49. package/dist/commands/view.d.ts.map +1 -0
  50. package/dist/commands/view.js +51 -0
  51. package/dist/commands/view.js.map +1 -0
  52. package/dist/core/scenario-runner.d.ts +8 -0
  53. package/dist/core/scenario-runner.d.ts.map +1 -1
  54. package/dist/core/scenario-runner.js +56 -5
  55. package/dist/core/scenario-runner.js.map +1 -1
  56. package/dist/core/types.d.ts +21 -7
  57. package/dist/core/types.d.ts.map +1 -1
  58. package/dist/reporters/html-reporter.d.ts +3 -0
  59. package/dist/reporters/html-reporter.d.ts.map +1 -0
  60. package/dist/reporters/html-reporter.js +152 -0
  61. package/dist/reporters/html-reporter.js.map +1 -0
  62. package/dist/reporters/terminal-reporter.d.ts +10 -1
  63. package/dist/reporters/terminal-reporter.d.ts.map +1 -1
  64. package/dist/reporters/terminal-reporter.js +111 -7
  65. package/dist/reporters/terminal-reporter.js.map +1 -1
  66. package/package.json +2 -1
@@ -0,0 +1,75 @@
1
+ import { runScenario } from '../core/scenario-runner.js';
2
+ /**
3
+ * Run the same scenario against multiple providers and produce a comparison.
4
+ * Substitutes provider/model on ALL steps for each provider run.
5
+ */
6
+ export async function runComparison(scenario, scenarioFilePath, providers, iterations, options = {}) {
7
+ const startMs = Date.now();
8
+ const results = [];
9
+ for (const spec of providers) {
10
+ options.onProviderStart?.(spec.provider, spec.model);
11
+ // Clone scenario with overridden provider/model on all steps
12
+ const overriddenScenario = {
13
+ ...scenario,
14
+ iterations,
15
+ steps: scenario.steps.map(step => ({
16
+ ...step,
17
+ provider: spec.provider,
18
+ model: spec.model,
19
+ })),
20
+ };
21
+ const report = await runScenario(overriddenScenario, scenarioFilePath, {
22
+ iterations,
23
+ onIterationComplete: (iter, total) => {
24
+ options.onIterationComplete?.(spec.provider, iter, total);
25
+ },
26
+ });
27
+ results.push({ provider: spec.provider, model: spec.model, report });
28
+ }
29
+ // Build step breakdown — compare pass rates per step across providers
30
+ const stepIds = scenario.steps.map(s => s.id);
31
+ const stepBreakdown = stepIds.map(stepId => {
32
+ const rates = results.map(r => {
33
+ const stepSummary = r.report.steps.find(s => s.stepId === stepId);
34
+ return {
35
+ provider: r.provider,
36
+ model: r.model,
37
+ passRate: stepSummary?.passRate ?? 0,
38
+ passes: stepSummary?.passes ?? 0,
39
+ totalRuns: stepSummary?.totalRuns ?? 0,
40
+ };
41
+ });
42
+ const best = rates.reduce((a, b) => a.passRate >= b.passRate ? a : b);
43
+ return { stepId, rates, bestProvider: best.provider, bestModel: best.model };
44
+ });
45
+ // Overall winner: highest average pass rate across all steps
46
+ const avgRates = results.map(r => {
47
+ const avg = r.report.steps.reduce((sum, s) => sum + s.passRate, 0) / r.report.steps.length;
48
+ return { provider: r.provider, model: r.model, avg };
49
+ });
50
+ const winner = avgRates.reduce((a, b) => a.avg >= b.avg ? a : b);
51
+ return {
52
+ scenarioName: scenario.name,
53
+ iterations,
54
+ providers: results,
55
+ winner: winner.provider,
56
+ winnerModel: winner.model,
57
+ stepBreakdown,
58
+ durationMs: Date.now() - startMs,
59
+ };
60
+ }
61
+ /** Parse "provider:model" strings like "anthropic:claude-sonnet-4-6,openai:gpt-4o" */
62
+ export function parseProviderSpecs(input) {
63
+ return input.split(',').map(s => {
64
+ const trimmed = s.trim();
65
+ const colonIdx = trimmed.indexOf(':');
66
+ if (colonIdx === -1) {
67
+ throw new Error(`Invalid provider spec "${trimmed}" — expected format: provider:model (e.g. anthropic:claude-sonnet-4-6)`);
68
+ }
69
+ return {
70
+ provider: trimmed.slice(0, colonIdx),
71
+ model: trimmed.slice(colonIdx + 1),
72
+ };
73
+ });
74
+ }
75
+ //# sourceMappingURL=compare.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"compare.js","sourceRoot":"","sources":["../../src/commands/compare.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AA8BzD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAkB,EAClB,gBAAwB,EACxB,SAAyB,EACzB,UAAkB,EAClB,UAII,EAAE;IAEN,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAqB,EAAE,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,CAAC,eAAe,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAErD,6DAA6D;QAC7D,MAAM,kBAAkB,GAAa;YACnC,GAAG,QAAQ;YACX,UAAU;YACV,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACjC,GAAG,IAAI;gBACP,QAAQ,EAAE,IAAI,CAAC,QAA4C;gBAC3D,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;SACJ,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,kBAAkB,EAAE,gBAAgB,EAAE;YACrE,UAAU;YACV,mBAAmB,EAAE,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;gBACnC,OAAO,CAAC,mBAAmB,EAAE,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;YAC5D,CAAC;SACF,CAAC,CAAC;QAEH,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,sEAAsE;IACtE,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAoB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE;QAC1D,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YAC5B,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC;YAClE,OAAO;gBACL,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,QAAQ,EAAE,WAAW,EAAE,QAAQ,IAAI,CAAC;gBACpC,MAAM,EAAE,WAAW,EAAE,MAAM,IAAI,CAAC;gBAChC,SAAS,EAAE,WAAW,EAAE,SAAS,IAAI,CAAC;aACvC,CAAC;QACJ,CAAC,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACtE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;IAC/E,CAAC,CAAC,CAAC;IAEH,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;QAC/B,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;QAC3F,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC;IACvD,CAAC,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,OAAO;QACL,YAAY,EAAE,QAAQ,CAAC,IAAI;QAC3B,UAAU;QACV,SAAS,EAAE,OAAO;QAClB,MAAM,EAAE,MAAM,CAAC,QAAQ;QACvB,WAAW,EAAE,MAAM,CAAC,KAAK;QACzB,aAAa;QACb,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO;KACjC,CAAC;AACJ,CAAC;AAED,sFAAsF;AACtF,MAAM,UAAU,kBAAkB,CAAC,KAAa;IAC9C,OAAO,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;QAC9B,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACtC,IAAI,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,0BAA0B,OAAO,wEAAwE,CAAC,CAAC;QAC7H,CAAC;QACD,OAAO;YACL,QAAQ,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;YACpC,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC;SACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function runHistory(scenario?: string): void;
2
+ //# sourceMappingURL=history.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../src/commands/history.ts"],"names":[],"mappings":"AAKA,wBAAgB,UAAU,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAqClD"}
@@ -0,0 +1,46 @@
1
+ import * as fs from 'node:fs';
2
+ import { getResultsDir, listReports } from './results-store.js';
3
+ export function runHistory(scenario) {
4
+ const resultsDir = getResultsDir();
5
+ if (!fs.existsSync(resultsDir)) {
6
+ console.error('\nNo results found. Run a scenario first: stepproof run ./scenarios/first-test.yaml\n');
7
+ process.exit(2);
8
+ }
9
+ const files = listReports(scenario);
10
+ if (files.length === 0) {
11
+ if (scenario) {
12
+ console.error(`\nNo results found for scenario "${scenario}"\n`);
13
+ }
14
+ else {
15
+ console.error('\nNo results found. Run a scenario first.\n');
16
+ }
17
+ process.exit(2);
18
+ }
19
+ console.log('');
20
+ console.log(scenario ? `History for: ${scenario}` : 'Recent runs');
21
+ console.log('─'.repeat(60));
22
+ for (const file of files) {
23
+ try {
24
+ const report = JSON.parse(fs.readFileSync(file, 'utf8'));
25
+ const date = new Date(report.startedAt).toLocaleString();
26
+ const verdict = report.allPassed ? '\x1b[32mPASS\x1b[0m' : '\x1b[31mFAIL\x1b[0m';
27
+ const duration = formatDuration(report.durationMs);
28
+ const stepsInfo = `${report.steps.filter(s => !s.belowThreshold).length}/${report.steps.length} steps`;
29
+ console.log(` ${verdict} ${report.scenarioName} ${date} ${duration} ${stepsInfo}`);
30
+ }
31
+ catch {
32
+ // Skip corrupted files
33
+ }
34
+ }
35
+ console.log('');
36
+ }
37
+ function formatDuration(ms) {
38
+ if (ms < 1000)
39
+ return `${ms}ms`;
40
+ if (ms < 60_000)
41
+ return `${(ms / 1000).toFixed(1)}s`;
42
+ const mins = Math.floor(ms / 60_000);
43
+ const secs = ((ms % 60_000) / 1000).toFixed(0);
44
+ return `${mins}m ${secs}s`;
45
+ }
46
+ //# sourceMappingURL=history.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"history.js","sourceRoot":"","sources":["../../src/commands/history.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGhE,MAAM,UAAU,UAAU,CAAC,QAAiB;IAC1C,MAAM,UAAU,GAAG,aAAa,EAAE,CAAC;IAEnC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,uFAAuF,CAAC,CAAC;QACvG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,KAAK,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;IAEpC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,IAAI,QAAQ,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,oCAAoC,QAAQ,KAAK,CAAC,CAAC;QACnE,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAC/D,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAChB,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,gBAAgB,QAAQ,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,MAAM,GAAmB,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;YACzE,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,cAAc,EAAE,CAAC;YACzD,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,qBAAqB,CAAC;YACjF,MAAM,QAAQ,GAAG,cAAc,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YACnD,MAAM,SAAS,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC;YACvG,OAAO,CAAC,GAAG,CAAC,KAAK,OAAO,KAAK,MAAM,CAAC,YAAY,KAAK,IAAI,KAAK,QAAQ,KAAK,SAAS,EAAE,CAAC,CAAC;QAC1F,CAAC;QAAC,MAAM,CAAC;YACP,uBAAuB;QACzB,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAClB,CAAC;AAED,SAAS,cAAc,CAAC,EAAU;IAChC,IAAI,EAAE,GAAG,IAAI;QAAE,OAAO,GAAG,EAAE,IAAI,CAAC;IAChC,IAAI,EAAE,GAAG,MAAM;QAAE,OAAO,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACrD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,MAAM,CAAC,CAAC;IACrC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,GAAG,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAC/C,OAAO,GAAG,IAAI,KAAK,IAAI,GAAG,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { ScenarioReport } from '../core/types.js';
2
+ /** Get results directory path (relative to cwd) */
3
+ export declare function getResultsDir(): string;
4
+ /**
5
+ * Save a report to .stepproof/results/{slug}-{timestamp}.json
6
+ * Prunes old reports to keep only the last MAX_REPORTS_PER_SCENARIO per scenario.
7
+ */
8
+ export declare function saveReport(report: ScenarioReport): string;
9
+ /** Find the most recent report file across all scenarios */
10
+ export declare function findLatestReport(): string | undefined;
11
+ /** Find the most recent report for a specific scenario (by name or slug prefix match) */
12
+ export declare function findLatestReportForScenario(scenario: string): string | undefined;
13
+ /** List recent report files, optionally filtered by scenario. Returns newest first. */
14
+ export declare function listReports(scenario?: string): string[];
15
+ //# sourceMappingURL=results-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"results-store.d.ts","sourceRoot":"","sources":["../../src/commands/results-store.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAKvD,mDAAmD;AACnD,wBAAgB,aAAa,IAAI,MAAM,CAEtC;AAOD;;;GAGG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CAwBzD;AAED,4DAA4D;AAC5D,wBAAgB,gBAAgB,IAAI,MAAM,GAAG,SAAS,CAUrD;AAED,yFAAyF;AACzF,wBAAgB,2BAA2B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAahF;AAED,uFAAuF;AACvF,wBAAgB,WAAW,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAavD"}
@@ -0,0 +1,77 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ const RESULTS_DIR_NAME = '.stepproof/results';
4
+ const MAX_REPORTS_PER_SCENARIO = 10;
5
+ /** Get results directory path (relative to cwd) */
6
+ export function getResultsDir() {
7
+ return path.resolve(process.cwd(), RESULTS_DIR_NAME);
8
+ }
9
+ /** Slugify scenario name for use in filenames */
10
+ function slugify(name) {
11
+ return name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
12
+ }
13
+ /**
14
+ * Save a report to .stepproof/results/{slug}-{timestamp}.json
15
+ * Prunes old reports to keep only the last MAX_REPORTS_PER_SCENARIO per scenario.
16
+ */
17
+ export function saveReport(report) {
18
+ const dir = getResultsDir();
19
+ fs.mkdirSync(dir, { recursive: true });
20
+ const slug = slugify(report.scenarioName);
21
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
22
+ const filename = `${slug}-${timestamp}.json`;
23
+ const filepath = path.join(dir, filename);
24
+ fs.writeFileSync(filepath, JSON.stringify(report, null, 2), 'utf8');
25
+ // Prune: keep only last MAX_REPORTS_PER_SCENARIO for this scenario slug
26
+ const prefix = slug + '-';
27
+ const allForScenario = fs.readdirSync(dir)
28
+ .filter(f => f.startsWith(prefix) && f.endsWith('.json'))
29
+ .sort()
30
+ .reverse(); // newest first (ISO timestamps sort lexicographically)
31
+ // O(n) single pass — n is bounded by report count per scenario
32
+ for (let i = MAX_REPORTS_PER_SCENARIO; i < allForScenario.length; i++) {
33
+ try {
34
+ fs.unlinkSync(path.join(dir, allForScenario[i]));
35
+ }
36
+ catch { /* ignore */ }
37
+ }
38
+ return filepath;
39
+ }
40
+ /** Find the most recent report file across all scenarios */
41
+ export function findLatestReport() {
42
+ const dir = getResultsDir();
43
+ if (!fs.existsSync(dir))
44
+ return undefined;
45
+ const files = fs.readdirSync(dir)
46
+ .filter(f => f.endsWith('.json'))
47
+ .sort()
48
+ .reverse();
49
+ return files.length > 0 ? path.join(dir, files[0]) : undefined;
50
+ }
51
+ /** Find the most recent report for a specific scenario (by name or slug prefix match) */
52
+ export function findLatestReportForScenario(scenario) {
53
+ const dir = getResultsDir();
54
+ if (!fs.existsSync(dir))
55
+ return undefined;
56
+ const slug = slugify(scenario);
57
+ const prefix = slug + '-';
58
+ const files = fs.readdirSync(dir)
59
+ .filter(f => f.startsWith(prefix) && f.endsWith('.json'))
60
+ .sort()
61
+ .reverse();
62
+ return files.length > 0 ? path.join(dir, files[0]) : undefined;
63
+ }
64
+ /** List recent report files, optionally filtered by scenario. Returns newest first. */
65
+ export function listReports(scenario) {
66
+ const dir = getResultsDir();
67
+ if (!fs.existsSync(dir))
68
+ return [];
69
+ let files = fs.readdirSync(dir).filter(f => f.endsWith('.json'));
70
+ if (scenario) {
71
+ const slug = slugify(scenario);
72
+ const prefix = slug + '-';
73
+ files = files.filter(f => f.startsWith(prefix));
74
+ }
75
+ return files.sort().reverse().map(f => path.join(dir, f));
76
+ }
77
+ //# sourceMappingURL=results-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"results-store.js","sourceRoot":"","sources":["../../src/commands/results-store.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAGlC,MAAM,gBAAgB,GAAG,oBAAoB,CAAC;AAC9C,MAAM,wBAAwB,GAAG,EAAE,CAAC;AAEpC,mDAAmD;AACnD,MAAM,UAAU,aAAa;IAC3B,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,gBAAgB,CAAC,CAAC;AACvD,CAAC;AAED,iDAAiD;AACjD,SAAS,OAAO,CAAC,IAAY;IAC3B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;AAC9E,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,MAAsB;IAC/C,MAAM,GAAG,GAAG,aAAa,EAAE,CAAC;IAC5B,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAEvC,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;IAC1C,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IACjE,MAAM,QAAQ,GAAG,GAAG,IAAI,IAAI,SAAS,OAAO,CAAC;IAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAE1C,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IAEpE,wEAAwE;IACxE,MAAM,MAAM,GAAG,IAAI,GAAG,GAAG,CAAC;IAC1B,MAAM,cAAc,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC;SACvC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;SACxD,IAAI,EAAE;SACN,OAAO,EAAE,CAAC,CAAC,uDAAuD;IAErE,+DAA+D;IAC/D,KAAK,IAAI,CAAC,GAAG,wBAAwB,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtE,IAAI,CAAC;YAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;IAClF,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,4DAA4D;AAC5D,MAAM,UAAU,gBAAgB;IAC9B,MAAM,GAAG,GAAG,aAAa,EAAE,CAAC;IAC5B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,OAAO,SAAS,CAAC;IAE1C,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC;SAC9B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;SAChC,IAAI,EAAE;SACN,OAAO,EAAE,CAAC;IAEb,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AACjE,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,2BAA2B,CAAC,QAAgB;IAC1D,MAAM,GAAG,GAAG,aAAa,EAAE,CAAC;IAC5B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,OAAO,SAAS,CAAC;IAE1C,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC/B,MAAM,MAAM,GAAG,IAAI,GAAG,GAAG,CAAC;IAE1B,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC;SAC9B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;SACxD,IAAI,EAAE;SACN,OAAO,EAAE,CAAC;IAEb,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AACjE,CAAC;AAED,uFAAuF;AACvF,MAAM,UAAU,WAAW,CAAC,QAAiB;IAC3C,MAAM,GAAG,GAAG,aAAa,EAAE,CAAC;IAC5B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,IAAI,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IAEjE,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC/B,MAAM,MAAM,GAAG,IAAI,GAAG,GAAG,CAAC;QAC1B,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;AAC5D,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function runView(scenario?: string): void;
2
+ //# sourceMappingURL=view.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"view.d.ts","sourceRoot":"","sources":["../../src/commands/view.ts"],"names":[],"mappings":"AAQA,wBAAgB,OAAO,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAgD/C"}
@@ -0,0 +1,51 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ import * as os from 'node:os';
4
+ import { exec } from 'node:child_process';
5
+ import { generateHtmlReport } from '../reporters/html-reporter.js';
6
+ import { getResultsDir, findLatestReport, findLatestReportForScenario } from './results-store.js';
7
+ export function runView(scenario) {
8
+ const resultsDir = getResultsDir();
9
+ if (!fs.existsSync(resultsDir)) {
10
+ console.error('\nNo results found. Run a scenario first: stepproof run ./scenarios/first-test.yaml\n');
11
+ process.exit(2);
12
+ }
13
+ let reportPath;
14
+ if (scenario) {
15
+ reportPath = findLatestReportForScenario(scenario);
16
+ if (!reportPath) {
17
+ console.error(`\nNo results found for scenario "${scenario}"\n`);
18
+ process.exit(2);
19
+ }
20
+ }
21
+ else {
22
+ reportPath = findLatestReport();
23
+ if (!reportPath) {
24
+ console.error('\nNo results found. Run a scenario first.\n');
25
+ process.exit(2);
26
+ }
27
+ }
28
+ let report;
29
+ try {
30
+ report = JSON.parse(fs.readFileSync(reportPath, 'utf8'));
31
+ }
32
+ catch {
33
+ console.error(`\nFailed to read report: ${reportPath}\n`);
34
+ process.exit(2);
35
+ }
36
+ const html = generateHtmlReport(report);
37
+ const tmpDir = path.join(os.tmpdir(), 'stepproof');
38
+ fs.mkdirSync(tmpDir, { recursive: true });
39
+ const htmlPath = path.join(tmpDir, `report-${Date.now()}.html`);
40
+ fs.writeFileSync(htmlPath, html, 'utf8');
41
+ const openCmd = process.platform === 'darwin' ? 'open'
42
+ : process.platform === 'win32' ? 'start'
43
+ : 'xdg-open';
44
+ console.log(`\nOpening report: ${htmlPath}\n`);
45
+ exec(`${openCmd} "${htmlPath}"`, (err) => {
46
+ if (err) {
47
+ console.log(`Could not open browser automatically. Open this file manually:\n ${htmlPath}\n`);
48
+ }
49
+ });
50
+ }
51
+ //# sourceMappingURL=view.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"view.js","sourceRoot":"","sources":["../../src/commands/view.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,IAAI,EAAE,MAAM,oBAAoB,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,2BAA2B,EAAE,MAAM,oBAAoB,CAAC;AAGlG,MAAM,UAAU,OAAO,CAAC,QAAiB;IACvC,MAAM,UAAU,GAAG,aAAa,EAAE,CAAC;IAEnC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,uFAAuF,CAAC,CAAC;QACvG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,UAA8B,CAAC;IAEnC,IAAI,QAAQ,EAAE,CAAC;QACb,UAAU,GAAG,2BAA2B,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,oCAAoC,QAAQ,KAAK,CAAC,CAAC;YACjE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;SAAM,CAAC;QACN,UAAU,GAAG,gBAAgB,EAAE,CAAC;QAChC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC;YAC7D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,MAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC;IAC3D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,4BAA4B,UAAU,IAAI,CAAC,CAAC;QAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,IAAI,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAC;IACxC,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,WAAW,CAAC,CAAC;IACnD,EAAE,CAAC,SAAS,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,UAAU,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAChE,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;IAEzC,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM;QACpD,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,OAAO;YACxC,CAAC,CAAC,UAAU,CAAC;IAEf,OAAO,CAAC,GAAG,CAAC,qBAAqB,QAAQ,IAAI,CAAC,CAAC;IAC/C,IAAI,CAAC,GAAG,OAAO,KAAK,QAAQ,GAAG,EAAE,CAAC,GAAG,EAAE,EAAE;QACvC,IAAI,GAAG,EAAE,CAAC;YACR,OAAO,CAAC,GAAG,CAAC,qEAAqE,QAAQ,IAAI,CAAC,CAAC;QACjG,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -1,4 +1,8 @@
1
1
  import type { Scenario, ScenarioReport } from './types.js';
2
+ export interface CacheStats {
3
+ hits: number;
4
+ misses: number;
5
+ }
2
6
  export interface RunOptions {
3
7
  /** Override iterations from scenario file */
4
8
  iterations?: number;
@@ -6,6 +10,10 @@ export interface RunOptions {
6
10
  onIterationComplete?: (iteration: number, total: number) => void;
7
11
  /** Called after each step within an iteration */
8
12
  onStepComplete?: (stepId: string, passed: boolean) => void;
13
+ /** Disable LLM response caching */
14
+ noCache?: boolean;
15
+ /** Populated after run — cache hit/miss stats */
16
+ cacheStats?: CacheStats;
9
17
  }
10
18
  export declare function runScenario(scenario: Scenario, scenarioFilePath: string, options?: RunOptions): Promise<ScenarioReport>;
11
19
  //# sourceMappingURL=scenario-runner.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"scenario-runner.d.ts","sourceRoot":"","sources":["../../src/core/scenario-runner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAA2B,MAAM,YAAY,CAAC;AAEpF,MAAM,WAAW,UAAU;IACzB,6CAA6C;IAC7C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4CAA4C;IAC5C,mBAAmB,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACjE,iDAAiD;IACjD,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,KAAK,IAAI,CAAC;CAC5D;AAED,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,QAAQ,EAClB,gBAAgB,EAAE,MAAM,EACxB,OAAO,GAAE,UAAe,GACvB,OAAO,CAAC,cAAc,CAAC,CA8FzB"}
1
+ {"version":3,"file":"scenario-runner.d.ts","sourceRoot":"","sources":["../../src/core/scenario-runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAA2B,MAAM,YAAY,CAAC;AAoBpF,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,6CAA6C;IAC7C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4CAA4C;IAC5C,mBAAmB,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACjE,iDAAiD;IACjD,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,KAAK,IAAI,CAAC;IAC3D,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,iDAAiD;IACjD,UAAU,CAAC,EAAE,UAAU,CAAC;CACzB;AAED,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,QAAQ,EAClB,gBAAgB,EAAE,MAAM,EACxB,OAAO,GAAE,UAAe,GACvB,OAAO,CAAC,cAAc,CAAC,CA0IzB"}
@@ -2,6 +2,20 @@ import * as path from 'node:path';
2
2
  import { getAdapter } from '../adapters/index.js';
3
3
  import { runAssertions } from '../assertions/engine.js';
4
4
  import { substituteVariables } from './scenario-parser.js';
5
+ import { getCached, setCache } from '../cache.js';
6
+ // Approximate cost per 1K tokens by model
7
+ const COST_PER_1K = {
8
+ 'claude-haiku-4-5-20251001': { input: 0.001, output: 0.005 },
9
+ 'claude-sonnet-4-6-20260401': { input: 0.003, output: 0.015 },
10
+ 'gpt-4o': { input: 0.0025, output: 0.01 },
11
+ 'gpt-4o-mini': { input: 0.00015, output: 0.0006 },
12
+ };
13
+ function calculateCost(model, inputTokens, outputTokens) {
14
+ const pricing = COST_PER_1K[model];
15
+ if (!pricing)
16
+ return 0;
17
+ return (inputTokens / 1000) * pricing.input + (outputTokens / 1000) * pricing.output;
18
+ }
5
19
  export async function runScenario(scenario, scenarioFilePath, options = {}) {
6
20
  const iterations = options.iterations ?? scenario.iterations ?? 10;
7
21
  const scenarioDir = path.dirname(path.resolve(scenarioFilePath));
@@ -9,6 +23,8 @@ export async function runScenario(scenario, scenarioFilePath, options = {}) {
9
23
  const startedAt = new Date().toISOString();
10
24
  const startMs = Date.now();
11
25
  const allResults = [];
26
+ const useCache = !options.noCache;
27
+ const cacheStats = { hits: 0, misses: 0 };
12
28
  for (let i = 1; i <= iterations; i++) {
13
29
  const stepOutputs = {};
14
30
  for (const step of scenario.steps) {
@@ -16,23 +32,46 @@ export async function runScenario(scenario, scenarioFilePath, options = {}) {
16
32
  const resolvedSystem = step.system
17
33
  ? substituteVariables(step.system, variables, stepOutputs)
18
34
  : undefined;
19
- const stepStartMs = Date.now();
20
35
  let output = '';
21
36
  let error;
37
+ let durationMs = 0;
38
+ let inputTokens;
39
+ let outputTokens;
40
+ let costUsd;
22
41
  try {
23
- const adapter = getAdapter(step.provider, step.model);
24
- output = await adapter.call(resolvedPrompt, resolvedSystem);
42
+ // Check cache before calling adapter
43
+ let response = useCache
44
+ ? getCached(step.provider, step.model, resolvedPrompt, resolvedSystem)
45
+ : null;
46
+ if (response !== null) {
47
+ cacheStats.hits++;
48
+ }
49
+ else {
50
+ const adapter = getAdapter(step.provider, step.model);
51
+ response = await adapter.call(resolvedPrompt, resolvedSystem);
52
+ cacheStats.misses++;
53
+ if (useCache) {
54
+ setCache(step.provider, step.model, resolvedPrompt, resolvedSystem, response);
55
+ }
56
+ }
57
+ output = response.text;
58
+ durationMs = response.durationMs;
59
+ if (response.usage) {
60
+ inputTokens = response.usage.inputTokens;
61
+ outputTokens = response.usage.outputTokens;
62
+ costUsd = calculateCost(step.model, inputTokens, outputTokens);
63
+ }
25
64
  stepOutputs[step.id] = output;
26
65
  }
27
66
  catch (e) {
28
67
  error = e.message;
68
+ cacheStats.misses++;
29
69
  stepOutputs[step.id] = '';
30
70
  }
31
- const durationMs = Date.now() - stepStartMs;
32
71
  let assertionResults = [];
33
72
  let assertionsPassed = false;
34
73
  if (!error) {
35
- const { results, allPassed } = await runAssertions(output, step.assertions, scenarioDir);
74
+ const { results, allPassed } = await runAssertions(output, step.assertions, scenarioDir, { durationMs, costUsd });
36
75
  assertionResults = results;
37
76
  assertionsPassed = allPassed;
38
77
  }
@@ -45,6 +84,9 @@ export async function runScenario(scenario, scenarioFilePath, options = {}) {
45
84
  assertionResults,
46
85
  error,
47
86
  durationMs,
87
+ inputTokens,
88
+ outputTokens,
89
+ costUsd,
48
90
  };
49
91
  allResults.push(result);
50
92
  options.onStepComplete?.(step.id, stepPassed);
@@ -58,6 +100,10 @@ export async function runScenario(scenario, scenarioFilePath, options = {}) {
58
100
  const failures = stepResults.length - passes;
59
101
  const passRate = stepResults.length > 0 ? passes / stepResults.length : 0;
60
102
  const minPassRate = step.min_pass_rate ?? 0.8;
103
+ const totalDurationMs = stepResults.reduce((s, r) => s + r.durationMs, 0);
104
+ const avgDurationMs = stepResults.length > 0 ? totalDurationMs / stepResults.length : 0;
105
+ const totalCostUsd = stepResults.reduce((s, r) => s + (r.costUsd ?? 0), 0);
106
+ const avgCostUsd = stepResults.length > 0 ? totalCostUsd / stepResults.length : 0;
61
107
  return {
62
108
  stepId: step.id,
63
109
  totalRuns: stepResults.length,
@@ -66,11 +112,16 @@ export async function runScenario(scenario, scenarioFilePath, options = {}) {
66
112
  passRate,
67
113
  minPassRate,
68
114
  belowThreshold: passRate < minPassRate,
115
+ avgDurationMs,
116
+ totalCostUsd,
117
+ avgCostUsd,
69
118
  };
70
119
  });
71
120
  const allPassed = steps.every((s) => !s.belowThreshold);
72
121
  const completedAt = new Date().toISOString();
73
122
  const durationMs = Date.now() - startMs;
123
+ // Expose cache stats to caller
124
+ options.cacheStats = cacheStats;
74
125
  return {
75
126
  scenarioName: scenario.name,
76
127
  iterations,
@@ -1 +1 @@
1
- {"version":3,"file":"scenario-runner.js","sourceRoot":"","sources":["../../src/core/scenario-runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAY3D,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAkB,EAClB,gBAAwB,EACxB,UAAsB,EAAE;IAExB,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC;IACnE,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACjE,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,IAAI,EAAE,CAAC;IAE3C,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC3B,MAAM,UAAU,GAAiB,EAAE,CAAC;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,WAAW,GAA2B,EAAE,CAAC;QAE/C,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;YAClC,MAAM,cAAc,GAAG,mBAAmB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;YAChF,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM;gBAChC,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC;gBAC1D,CAAC,CAAC,SAAS,CAAC;YAEd,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC/B,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,KAAyB,CAAC;YAE9B,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtD,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,cAAc,CAAC,CAAC;gBAC5D,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC;YAChC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,KAAK,GAAI,CAAW,CAAC,OAAO,CAAC;gBAC7B,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC;YAC5B,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC;YAE5C,IAAI,gBAAgB,GAA0D,EAAE,CAAC;YACjF,IAAI,gBAAgB,GAAG,KAAK,CAAC;YAE7B,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;gBACzF,gBAAgB,GAAG,OAAO,CAAC;gBAC3B,gBAAgB,GAAG,SAAS,CAAC;YAC/B,CAAC;YAED,MAAM,UAAU,GAAG,CAAC,KAAK,IAAI,gBAAgB,CAAC;YAE9C,MAAM,MAAM,GAAe;gBACzB,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,SAAS,EAAE,CAAC;gBACZ,MAAM;gBACN,MAAM,EAAE,UAAU;gBAClB,gBAAgB;gBAChB,KAAK;gBACL,UAAU;aACX,CAAC;YAEF,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACxB,OAAO,CAAC,cAAc,EAAE,CAAC,IAAI,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QAED,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IAC/C,CAAC;IAED,+BAA+B;IAC/B,MAAM,KAAK,GAAkB,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACvD,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC1D,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC7C,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1E,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,IAAI,GAAG,CAAC;QAE9C,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,SAAS,EAAE,WAAW,CAAC,MAAM;YAC7B,MAAM;YACN,QAAQ;YACR,QAAQ;YACR,WAAW;YACX,cAAc,EAAE,QAAQ,GAAG,WAAW;SACvC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;IACxD,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;IAExC,OAAO;QACL,YAAY,EAAE,QAAQ,CAAC,IAAI;QAC3B,UAAU;QACV,SAAS;QACT,WAAW;QACX,UAAU;QACV,KAAK;QACL,SAAS;QACT,OAAO,EAAE,UAAU;KACpB,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"scenario-runner.js","sourceRoot":"","sources":["../../src/core/scenario-runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAElD,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGlD,0CAA0C;AAC1C,MAAM,WAAW,GAAsD;IACrE,2BAA2B,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE;IAC5D,4BAA4B,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE;IAC7D,QAAQ,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE;IACzC,aAAa,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE;CAClD,CAAC;AAEF,SAAS,aAAa,CACpB,KAAa,EACb,WAAmB,EACnB,YAAoB;IAEpB,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,CAAC,OAAO;QAAE,OAAO,CAAC,CAAC;IACvB,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;AACvF,CAAC;AAoBD,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAkB,EAClB,gBAAwB,EACxB,UAAsB,EAAE;IAExB,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC;IACnE,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACjE,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,IAAI,EAAE,CAAC;IAE3C,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC3B,MAAM,UAAU,GAAiB,EAAE,CAAC;IACpC,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC;IAClC,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAEtD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,WAAW,GAA2B,EAAE,CAAC;QAE/C,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;YAClC,MAAM,cAAc,GAAG,mBAAmB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;YAChF,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM;gBAChC,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC;gBAC1D,CAAC,CAAC,SAAS,CAAC;YAEd,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,KAAyB,CAAC;YAC9B,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,IAAI,WAA+B,CAAC;YACpC,IAAI,YAAgC,CAAC;YACrC,IAAI,OAA2B,CAAC;YAEhC,IAAI,CAAC;gBACH,qCAAqC;gBACrC,IAAI,QAAQ,GAA2B,QAAQ;oBAC7C,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,KAAK,EAAE,cAAc,EAAE,cAAc,CAAC;oBACtE,CAAC,CAAC,IAAI,CAAC;gBAET,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;oBACtB,UAAU,CAAC,IAAI,EAAE,CAAC;gBACpB,CAAC;qBAAM,CAAC;oBACN,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;oBACtD,QAAQ,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,cAAc,CAAC,CAAC;oBAC9D,UAAU,CAAC,MAAM,EAAE,CAAC;oBACpB,IAAI,QAAQ,EAAE,CAAC;wBACb,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,QAAQ,CAAC,CAAC;oBAChF,CAAC;gBACH,CAAC;gBAED,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC;gBACvB,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC;gBACjC,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;oBACnB,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC;oBACzC,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,YAAY,CAAC;oBAC3C,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,YAAY,CAAC,CAAC;gBACjE,CAAC;gBACD,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC;YAChC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,KAAK,GAAI,CAAW,CAAC,OAAO,CAAC;gBAC7B,UAAU,CAAC,MAAM,EAAE,CAAC;gBACpB,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC;YAC5B,CAAC;YAED,IAAI,gBAAgB,GAA0D,EAAE,CAAC;YACjF,IAAI,gBAAgB,GAAG,KAAK,CAAC;YAE7B,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CAChD,MAAM,EACN,IAAI,CAAC,UAAU,EACf,WAAW,EACX,EAAE,UAAU,EAAE,OAAO,EAAE,CACxB,CAAC;gBACF,gBAAgB,GAAG,OAAO,CAAC;gBAC3B,gBAAgB,GAAG,SAAS,CAAC;YAC/B,CAAC;YAED,MAAM,UAAU,GAAG,CAAC,KAAK,IAAI,gBAAgB,CAAC;YAE9C,MAAM,MAAM,GAAe;gBACzB,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,SAAS,EAAE,CAAC;gBACZ,MAAM;gBACN,MAAM,EAAE,UAAU;gBAClB,gBAAgB;gBAChB,KAAK;gBACL,UAAU;gBACV,WAAW;gBACX,YAAY;gBACZ,OAAO;aACR,CAAC;YAEF,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACxB,OAAO,CAAC,cAAc,EAAE,CAAC,IAAI,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QAED,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IAC/C,CAAC;IAED,+BAA+B;IAC/B,MAAM,KAAK,GAAkB,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACvD,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC1D,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC7C,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1E,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,IAAI,GAAG,CAAC;QAE9C,MAAM,eAAe,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QAC1E,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACxF,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAElF,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,SAAS,EAAE,WAAW,CAAC,MAAM;YAC7B,MAAM;YACN,QAAQ;YACR,QAAQ;YACR,WAAW;YACX,cAAc,EAAE,QAAQ,GAAG,WAAW;YACtC,aAAa;YACb,YAAY;YACZ,UAAU;SACX,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;IACxD,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;IAExC,+BAA+B;IAC/B,OAAO,CAAC,UAAU,GAAG,UAAU,CAAC;IAEhC,OAAO;QACL,YAAY,EAAE,QAAQ,CAAC,IAAI;QAC3B,UAAU;QACV,SAAS;QACT,WAAW;QACX,UAAU;QACV,KAAK;QACL,SAAS;QACT,OAAO,EAAE,UAAU;KACpB,CAAC;AACJ,CAAC"}
@@ -1,19 +1,27 @@
1
- export type Provider = 'openai' | 'anthropic';
2
- export type AssertionType = 'contains' | 'not_contains' | 'regex' | 'json_schema' | 'llm_judge';
1
+ export type Provider = 'openai' | 'anthropic' | 'gemini' | 'ollama';
2
+ export type AssertionType = 'contains' | 'not_contains' | 'regex' | 'json_schema' | 'llm_judge' | 'similarity' | 'sentiment' | 'toxicity' | 'starts_with' | 'ends_with' | 'length' | 'word_count' | 'cost_under' | 'latency_under';
3
3
  export interface Assertion {
4
4
  type: AssertionType;
5
- /** For contains, not_contains, regex */
6
- value?: string;
5
+ /** For contains, not_contains, regex, starts_with, ends_with, cost_under, latency_under */
6
+ value?: string | number;
7
7
  /** For json_schema: path to JSON schema file (relative to scenario file) */
8
8
  schema?: string;
9
9
  /** For llm_judge: the evaluation prompt */
10
10
  prompt?: string;
11
11
  /** For llm_judge: the expected response prefix (default: "yes") */
12
12
  pass_on?: string;
13
- /** For llm_judge: override provider (default: anthropic) */
13
+ /** For llm_judge, similarity, sentiment, toxicity: override provider (default: anthropic) */
14
14
  provider?: Provider;
15
- /** For llm_judge: override model (default: claude-haiku or gpt-4o-mini) */
15
+ /** For llm_judge, similarity, sentiment, toxicity: override model */
16
16
  model?: string;
17
+ /** For similarity: minimum similarity score 0.0-1.0 (default: 0.7) */
18
+ threshold?: number;
19
+ /** For toxicity: maximum toxicity score 0.0-1.0 (default: 0.5) */
20
+ max_score?: number;
21
+ /** For length, word_count: minimum value */
22
+ min?: number;
23
+ /** For length, word_count: maximum value */
24
+ max?: number;
17
25
  }
18
26
  export interface Step {
19
27
  id: string;
@@ -23,7 +31,7 @@ export interface Step {
23
31
  prompt: string;
24
32
  /** Optional system prompt */
25
33
  system?: string;
26
- /** Minimum pass rate threshold (0.01.0). Default: 0.8 */
34
+ /** Minimum pass rate threshold (0.0-1.0). Default: 0.8 */
27
35
  min_pass_rate?: number;
28
36
  assertions: Assertion[];
29
37
  }
@@ -48,6 +56,9 @@ export interface StepResult {
48
56
  assertionResults: AssertionResult[];
49
57
  error?: string;
50
58
  durationMs: number;
59
+ inputTokens?: number;
60
+ outputTokens?: number;
61
+ costUsd?: number;
51
62
  }
52
63
  export interface StepSummary {
53
64
  stepId: string;
@@ -57,6 +68,9 @@ export interface StepSummary {
57
68
  passRate: number;
58
69
  minPassRate: number;
59
70
  belowThreshold: boolean;
71
+ avgDurationMs: number;
72
+ totalCostUsd: number;
73
+ avgCostUsd: number;
60
74
  }
61
75
  export interface ScenarioReport {
62
76
  scenarioName: string;
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,QAAQ,GAAG,WAAW,CAAC;AAE9C,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,cAAc,GAAG,OAAO,GAAG,aAAa,GAAG,WAAW,CAAC;AAEhG,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,aAAa,CAAC;IACpB,wCAAwC;IACxC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,4EAA4E;IAC5E,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mEAAmE;IACnE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,2EAA2E;IAC3E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,kGAAkG;IAClG,MAAM,EAAE,MAAM,CAAC;IACf,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,0DAA0D;IAC1D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,+CAA+C;IAC/C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,WAAW,EAAE,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,QAAQ,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAEpE,MAAM,MAAM,aAAa,GACrB,UAAU,GACV,cAAc,GACd,OAAO,GACP,aAAa,GACb,WAAW,GACX,YAAY,GACZ,WAAW,GACX,UAAU,GACV,aAAa,GACb,WAAW,GACX,QAAQ,GACR,YAAY,GACZ,YAAY,GACZ,eAAe,CAAC;AAEpB,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,aAAa,CAAC;IACpB,2FAA2F;IAC3F,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,4EAA4E;IAC5E,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mEAAmE;IACnE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,6FAA6F;IAC7F,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,qEAAqE;IACrE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,sEAAsE;IACtE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kEAAkE;IAClE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,4CAA4C;IAC5C,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,kGAAkG;IAClG,MAAM,EAAE,MAAM,CAAC;IACf,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,0DAA0D;IAC1D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,+CAA+C;IAC/C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,WAAW,EAAE,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB"}
@@ -0,0 +1,3 @@
1
+ import type { ScenarioReport } from '../core/types.js';
2
+ export declare function generateHtmlReport(report: ScenarioReport): string;
3
+ //# sourceMappingURL=html-reporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-reporter.d.ts","sourceRoot":"","sources":["../../src/reporters/html-reporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAA2B,MAAM,kBAAkB,CAAC;AAEhF,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CAuFjE"}