stepproof 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +276 -0
- package/dist/adapters/anthropic.d.ts +8 -0
- package/dist/adapters/anthropic.d.ts.map +1 -0
- package/dist/adapters/anthropic.js +26 -0
- package/dist/adapters/anthropic.js.map +1 -0
- package/dist/adapters/base.d.ts +4 -0
- package/dist/adapters/base.d.ts.map +1 -0
- package/dist/adapters/base.js +2 -0
- package/dist/adapters/base.js.map +1 -0
- package/dist/adapters/index.d.ts +4 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/index.js +13 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/openai.d.ts +8 -0
- package/dist/adapters/openai.d.ts.map +1 -0
- package/dist/adapters/openai.js +25 -0
- package/dist/adapters/openai.js.map +1 -0
- package/dist/assertions/engine.d.ts +6 -0
- package/dist/assertions/engine.d.ts.map +1 -0
- package/dist/assertions/engine.js +124 -0
- package/dist/assertions/engine.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +126 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +39 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/core/scenario-parser.d.ts +4 -0
- package/dist/core/scenario-parser.d.ts.map +1 -0
- package/dist/core/scenario-parser.js +92 -0
- package/dist/core/scenario-parser.js.map +1 -0
- package/dist/core/scenario-runner.d.ts +11 -0
- package/dist/core/scenario-runner.d.ts.map +1 -0
- package/dist/core/scenario-runner.js +85 -0
- package/dist/core/scenario-runner.js.map +1 -0
- package/dist/core/types.d.ts +71 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +2 -0
- package/dist/core/types.js.map +1 -0
- package/dist/reporters/json-reporter.d.ts +4 -0
- package/dist/reporters/json-reporter.d.ts.map +1 -0
- package/dist/reporters/json-reporter.js +9 -0
- package/dist/reporters/json-reporter.js.map +1 -0
- package/dist/reporters/junit-reporter.d.ts +3 -0
- package/dist/reporters/junit-reporter.d.ts.map +1 -0
- package/dist/reporters/junit-reporter.js +34 -0
- package/dist/reporters/junit-reporter.js.map +1 -0
- package/dist/reporters/sarif-reporter.d.ts +3 -0
- package/dist/reporters/sarif-reporter.d.ts.map +1 -0
- package/dist/reporters/sarif-reporter.js +47 -0
- package/dist/reporters/sarif-reporter.js.map +1 -0
- package/dist/reporters/terminal-reporter.d.ts +4 -0
- package/dist/reporters/terminal-reporter.d.ts.map +1 -0
- package/dist/reporters/terminal-reporter.js +73 -0
- package/dist/reporters/terminal-reporter.js.map +1 -0
- package/package.json +62 -0
- package/schemas/scenario.schema.json +119 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
import { parseScenario } from './core/scenario-parser.js';
|
|
5
|
+
import { runScenario } from './core/scenario-runner.js';
|
|
6
|
+
import { writeJsonReport } from './reporters/json-reporter.js';
|
|
7
|
+
import { printReport } from './reporters/terminal-reporter.js';
|
|
8
|
+
import { formatSarif } from './reporters/sarif-reporter.js';
|
|
9
|
+
import { formatJunit } from './reporters/junit-reporter.js';
|
|
10
|
+
import * as fs from 'node:fs';
|
|
11
|
+
import { guard } from '@preflight/license';
|
|
12
|
+
import { runInit } from './commands/init.js';
|
|
13
|
+
const program = new Command();
|
|
14
|
+
program
|
|
15
|
+
.name('stepproof')
|
|
16
|
+
.description('Regression testing for multi-step AI workflows. Not observability — a CI gate.')
|
|
17
|
+
.version('0.2.0')
|
|
18
|
+
.addHelpText('after', `
|
|
19
|
+
Examples:
|
|
20
|
+
stepproof init scaffold a starter scenario
|
|
21
|
+
stepproof run ./scenarios/first-test.yaml run one scenario
|
|
22
|
+
stepproof run ./scenarios/ run all scenarios in a directory
|
|
23
|
+
stepproof run test.yaml --format sarif --output results.sarif SARIF output for CI`);
|
|
24
|
+
program
|
|
25
|
+
.command('init [dir]')
|
|
26
|
+
.description('Scaffold a starter scenario in ./scenarios/first-test.yaml')
|
|
27
|
+
.action((dir) => {
|
|
28
|
+
runInit(dir);
|
|
29
|
+
});
|
|
30
|
+
program
|
|
31
|
+
.command('run <scenario>')
|
|
32
|
+
.description('Run a scenario YAML file and report pass rates per step')
|
|
33
|
+
.option('-n, --iterations <number>', 'Number of iterations to run (overrides scenario file)', parseInt)
|
|
34
|
+
.option('-o, --output <file>', 'Path for output file (JSON by default; SARIF or JUnit when --format is set)', 'stepproof-report.json')
|
|
35
|
+
.option('--no-json', 'Skip JSON report output')
|
|
36
|
+
.option('--quiet', 'Suppress terminal output (use with --output for CI)')
|
|
37
|
+
.option('--format <format>', 'Output format: sarif, junit')
|
|
38
|
+
.option('--report <format>', '(deprecated: use --format)')
|
|
39
|
+
.action(async (scenarioPath, opts) => {
|
|
40
|
+
// --report is deprecated; normalize to --format
|
|
41
|
+
if (opts.report && !opts.format) {
|
|
42
|
+
process.stderr.write('Warning: --report is deprecated, use --format instead\n');
|
|
43
|
+
opts.format = opts.report;
|
|
44
|
+
}
|
|
45
|
+
if (opts.format && opts.format !== 'sarif' && opts.format !== 'junit') {
|
|
46
|
+
console.error(`\nError: --format must be "sarif" or "junit", got "${opts.format}"`);
|
|
47
|
+
process.exit(2);
|
|
48
|
+
}
|
|
49
|
+
// License gate — check before running the scenario (avoid wasted API calls)
|
|
50
|
+
if (opts.format === 'sarif' || opts.format === 'junit') {
|
|
51
|
+
guard('team', { feature: `--format ${opts.format}` });
|
|
52
|
+
}
|
|
53
|
+
// --format implies quiet (suppress terminal output) unless --quiet already set
|
|
54
|
+
const isQuiet = opts.quiet || !!opts.format;
|
|
55
|
+
const resolvedPath = path.resolve(process.cwd(), scenarioPath);
|
|
56
|
+
let scenario;
|
|
57
|
+
try {
|
|
58
|
+
scenario = parseScenario(resolvedPath);
|
|
59
|
+
}
|
|
60
|
+
catch (e) {
|
|
61
|
+
console.error(`\nError parsing scenario: ${e.message}`);
|
|
62
|
+
process.exit(2);
|
|
63
|
+
}
|
|
64
|
+
if (!isQuiet) {
|
|
65
|
+
console.log(`\nLoading: ${scenario.name}`);
|
|
66
|
+
if (opts.iterations) {
|
|
67
|
+
console.log(`Overriding iterations: ${scenario.iterations ?? 10} → ${opts.iterations}`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
let currentIteration = 0;
|
|
71
|
+
const totalIterations = opts.iterations ?? scenario.iterations ?? 10;
|
|
72
|
+
let report;
|
|
73
|
+
try {
|
|
74
|
+
report = await runScenario(scenario, resolvedPath, {
|
|
75
|
+
iterations: opts.iterations,
|
|
76
|
+
onIterationComplete: (iteration, total) => {
|
|
77
|
+
currentIteration = iteration;
|
|
78
|
+
if (!isQuiet) {
|
|
79
|
+
process.stdout.write(`\r Completed iteration ${iteration}/${total}...`);
|
|
80
|
+
if (iteration === total) {
|
|
81
|
+
process.stdout.write('\r' + ' '.repeat(50) + '\r');
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
catch (e) {
|
|
88
|
+
console.error(`\nError running scenario: ${e.message}`);
|
|
89
|
+
process.exit(2);
|
|
90
|
+
}
|
|
91
|
+
// Handle --format sarif / --format junit
|
|
92
|
+
if (opts.format === 'sarif' || opts.format === 'junit') {
|
|
93
|
+
const formatted = opts.format === 'sarif' ? formatSarif(report) : formatJunit(report);
|
|
94
|
+
const hasExplicitOutput = process.argv.includes('--output') || process.argv.includes('-o');
|
|
95
|
+
if (hasExplicitOutput) {
|
|
96
|
+
try {
|
|
97
|
+
fs.writeFileSync(opts.output, formatted, 'utf-8');
|
|
98
|
+
}
|
|
99
|
+
catch (e) {
|
|
100
|
+
console.error(`Warning: Could not write ${opts.format} report: ${e.message}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
process.stdout.write(formatted + '\n');
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
const reportPath = opts.json ? opts.output : undefined;
|
|
108
|
+
if (!isQuiet) {
|
|
109
|
+
printReport(report, reportPath);
|
|
110
|
+
}
|
|
111
|
+
if (opts.json) {
|
|
112
|
+
try {
|
|
113
|
+
writeJsonReport(report, opts.output);
|
|
114
|
+
}
|
|
115
|
+
catch (e) {
|
|
116
|
+
console.error(`Warning: Could not write JSON report: ${e.message}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Exit 1 if any step below threshold — this is the CI gate
|
|
120
|
+
if (!report.allPassed) {
|
|
121
|
+
process.exit(1);
|
|
122
|
+
}
|
|
123
|
+
process.exit(0);
|
|
124
|
+
});
|
|
125
|
+
program.parse(process.argv);
|
|
126
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,WAAW,EAAiB,MAAM,kCAAkC,CAAC;AAC9E,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAE7C,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,WAAW,CAAC;KACjB,WAAW,CAAC,gFAAgF,CAAC;KAC7F,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,OAAO,EAAE;;;;;qFAK6D,CAAC,CAAC;AAEvF,OAAO;KACJ,OAAO,CAAC,YAAY,CAAC;KACrB,WAAW,CAAC,4DAA4D,CAAC;KACzE,MAAM,CAAC,CAAC,GAAY,EAAE,EAAE;IACvB,OAAO,CAAC,GAAG,CAAC,CAAC;AACf,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,gBAAgB,CAAC;KACzB,WAAW,CAAC,yDAAyD,CAAC;KACtE,MAAM,CAAC,2BAA2B,EAAE,uDAAuD,EAAE,QAAQ,CAAC;KACtG,MAAM,CAAC,qBAAqB,EAAE,6EAA6E,EAAE,uBAAuB,CAAC;KACrI,MAAM,CAAC,WAAW,EAAE,yBAAyB,CAAC;KAC9C,MAAM,CAAC,SAAS,EAAE,qDAAqD,CAAC;KACxE,MAAM,CAAC,mBAAmB,EAAE,6BAA6B,CAAC;KAC1D,MAAM,CAAC,mBAAmB,EAAE,4BAA4B,CAAC;KACzD,MAAM,CAAC,KAAK,EAAE,YAAoB,EAAE,IAOpC,EAAE,EAAE;IACH,gDAAgD;IAChD,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QAChC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAChF,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,KAAK,OAAO,IAAI,IAAI,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;QACtE,OAAO,CAAC,KAAK,CAAC,sDAAsD,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QACpF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,4EAA4E;IAC5E,IAAI,IAAI,CAAC,MAAM,KAAK,OAAO,IAAI,IAAI,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;QACvD,KAAK,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,YAAY,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,+EAA+E;IAC/E,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;IAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,YAAY,CAAC,CAAC;IAE/D,IAAI,QAAQ,CAAC;IACb,IAAI,CAAC;QACH,QAAQ,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,6BAA8B,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACnE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,OAAO,CAAC,GAAG,CAAC,0BAA0B,QAAQ,CAAC,UAAU,IAAI,EAAE,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAC1F,CAAC;IACH,CAAC;IAED,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,MAAM,eAAe,GAAG,IAAI,CAAC,UAAU,IAAI,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC;IAErE,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,WAAW,CAAC,QAAQ,EAAE,YAAY,EAAE;YACjD,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,mBAAmB,EAAE,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;gBACxC,gBAAgB,GAAG,SAAS,CAAC;gBAC7B,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,SAAS,IAAI,KAAK,KAAK,CAAC,CAAC;oBACzE,IAAI,SAAS,KAAK,KAAK,EAAE,CAAC;wBACxB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;oBACrD,CAAC;gBACH,CAAC;YACH,CAAC;SACF,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,6BAA8B,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACnE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,yCAAyC;IACzC,IAAI,IAAI,CAAC,MAAM,KAAK,OAAO,IAAI,IAAI,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;QACvD,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QACtF,MAAM,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC3F,IAAI,iBAAiB,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACpD,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,YAAa,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YAC3F,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;QACzC,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;IAEvD,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,WAAW,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACd,IAAI,CAAC;YACH,eAAe,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,yCAA0C,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IAED,2DAA2D;IAC3D,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;QACtB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../src/commands/init.ts"],"names":[],"mappings":"AAoBA,wBAAgB,OAAO,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAsBhD"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
2
|
+
import { resolve } from 'path';
|
|
3
|
+
const SCENARIO_SCAFFOLD = `# stepproof scenario — edit this, then run: stepproof run ./scenarios/first-test.yaml
|
|
4
|
+
name: First Test
|
|
5
|
+
description: "Test your AI agent's response quality"
|
|
6
|
+
iterations: 5
|
|
7
|
+
|
|
8
|
+
steps:
|
|
9
|
+
- id: step-1
|
|
10
|
+
prompt: "Explain what you do in one sentence."
|
|
11
|
+
threshold: 0.8 # 80% of runs must pass
|
|
12
|
+
checks:
|
|
13
|
+
- type: contains
|
|
14
|
+
value: "AI" # replace with text you expect in the response
|
|
15
|
+
|
|
16
|
+
# More check types: contains, not_contains, regex, length_gt, length_lt
|
|
17
|
+
# Full docs: https://github.com/StanislavBG/stepproof
|
|
18
|
+
`;
|
|
19
|
+
export function runInit(outputDir) {
|
|
20
|
+
const dir = resolve(outputDir ?? './scenarios');
|
|
21
|
+
const dest = resolve(dir, 'first-test.yaml');
|
|
22
|
+
if (existsSync(dest)) {
|
|
23
|
+
console.log(`Scenario already exists: ${dest}`);
|
|
24
|
+
console.log('Edit it, then run: stepproof run ./scenarios/first-test.yaml');
|
|
25
|
+
process.exit(0);
|
|
26
|
+
}
|
|
27
|
+
mkdirSync(dir, { recursive: true });
|
|
28
|
+
writeFileSync(dest, SCENARIO_SCAFFOLD, 'utf-8');
|
|
29
|
+
console.log(`\n✔ Created ${dest}`);
|
|
30
|
+
console.log('');
|
|
31
|
+
console.log('Next:');
|
|
32
|
+
console.log(' 1. Edit the scenario — replace the prompt and checks with your actual test');
|
|
33
|
+
console.log(' 2. stepproof run ./scenarios/first-test.yaml');
|
|
34
|
+
console.log('');
|
|
35
|
+
console.log('Add to CI: stepproof run ./scenarios/ --format sarif --output results.sarif');
|
|
36
|
+
console.log('');
|
|
37
|
+
console.log('Ready for a deploy gate? Try: npx agent-gate init');
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=init.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/commands/init.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAE/B,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;CAezB,CAAC;AAEF,MAAM,UAAU,OAAO,CAAC,SAAkB;IACxC,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,IAAI,aAAa,CAAC,CAAC;IAChD,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;IAE7C,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,GAAG,CAAC,4BAA4B,IAAI,EAAE,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAC;QAC5E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACpC,aAAa,CAAC,IAAI,EAAE,iBAAiB,EAAE,OAAO,CAAC,CAAC;IAEhD,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC;IACnC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAChB,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACrB,OAAO,CAAC,GAAG,CAAC,8EAA8E,CAAC,CAAC;IAC5F,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;IAC9D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAChB,OAAO,CAAC,GAAG,CAAC,6EAA6E,CAAC,CAAC;IAC3F,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAChB,OAAO,CAAC,GAAG,CAAC,mDAAmD,CAAC,CAAC;AACnE,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { Scenario } from './types.js';
|
|
2
|
+
export declare function parseScenario(filePath: string): Scenario;
|
|
3
|
+
export declare function substituteVariables(template: string, variables: Record<string, string>, stepOutputs: Record<string, string>): string;
|
|
4
|
+
//# sourceMappingURL=scenario-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario-parser.d.ts","sourceRoot":"","sources":["../../src/core/scenario-parser.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAQ,MAAM,YAAY,CAAC;AAEjD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAuCxD;AA0CD,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAClC,MAAM,CAyBR"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as yaml from 'js-yaml';
|
|
3
|
+
export function parseScenario(filePath) {
|
|
4
|
+
let content;
|
|
5
|
+
try {
|
|
6
|
+
content = fs.readFileSync(filePath, 'utf-8');
|
|
7
|
+
}
|
|
8
|
+
catch (e) {
|
|
9
|
+
throw new Error(`Cannot read scenario file: ${filePath}`);
|
|
10
|
+
}
|
|
11
|
+
let raw;
|
|
12
|
+
try {
|
|
13
|
+
raw = yaml.load(content);
|
|
14
|
+
}
|
|
15
|
+
catch (e) {
|
|
16
|
+
throw new Error(`Invalid YAML in scenario file: ${e.message}`);
|
|
17
|
+
}
|
|
18
|
+
if (!raw || typeof raw !== 'object') {
|
|
19
|
+
throw new Error('Scenario file must be a YAML object');
|
|
20
|
+
}
|
|
21
|
+
const scenario = raw;
|
|
22
|
+
if (!scenario.name || typeof scenario.name !== 'string') {
|
|
23
|
+
throw new Error('Scenario must have a "name" field (string)');
|
|
24
|
+
}
|
|
25
|
+
if (!Array.isArray(scenario.steps) || scenario.steps.length === 0) {
|
|
26
|
+
throw new Error('Scenario must have a "steps" array with at least one step');
|
|
27
|
+
}
|
|
28
|
+
const steps = scenario.steps.map((rawStep, i) => validateStep(rawStep, i));
|
|
29
|
+
return {
|
|
30
|
+
name: scenario.name,
|
|
31
|
+
iterations: typeof scenario.iterations === 'number' ? scenario.iterations : 10,
|
|
32
|
+
variables: typeof scenario.variables === 'object' && scenario.variables !== null
|
|
33
|
+
? scenario.variables
|
|
34
|
+
: {},
|
|
35
|
+
steps,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
function validateStep(raw, index) {
|
|
39
|
+
if (!raw || typeof raw !== 'object') {
|
|
40
|
+
throw new Error(`Step ${index + 1} must be an object`);
|
|
41
|
+
}
|
|
42
|
+
const step = raw;
|
|
43
|
+
const pos = step.id ? `step "${step.id}"` : `step ${index + 1}`;
|
|
44
|
+
if (!step.id || typeof step.id !== 'string') {
|
|
45
|
+
throw new Error(`Step ${index + 1} must have an "id" field (string)`);
|
|
46
|
+
}
|
|
47
|
+
if (!step.provider || !['openai', 'anthropic'].includes(step.provider)) {
|
|
48
|
+
throw new Error(`${pos}: "provider" must be "openai" or "anthropic"`);
|
|
49
|
+
}
|
|
50
|
+
if (!step.model || typeof step.model !== 'string') {
|
|
51
|
+
throw new Error(`${pos}: "model" field is required (string)`);
|
|
52
|
+
}
|
|
53
|
+
if (!step.prompt || typeof step.prompt !== 'string') {
|
|
54
|
+
throw new Error(`${pos}: "prompt" field is required (string)`);
|
|
55
|
+
}
|
|
56
|
+
const minPassRate = typeof step.min_pass_rate === 'number' ? step.min_pass_rate : 0.8;
|
|
57
|
+
if (minPassRate < 0 || minPassRate > 1) {
|
|
58
|
+
throw new Error(`${pos}: "min_pass_rate" must be between 0.0 and 1.0`);
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
id: step.id,
|
|
62
|
+
provider: step.provider,
|
|
63
|
+
model: step.model,
|
|
64
|
+
prompt: step.prompt,
|
|
65
|
+
system: typeof step.system === 'string' ? step.system : undefined,
|
|
66
|
+
min_pass_rate: minPassRate,
|
|
67
|
+
assertions: Array.isArray(step.assertions) ? step.assertions : [],
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
export function substituteVariables(template, variables, stepOutputs) {
|
|
71
|
+
return template.replace(/\{\{([^}]+)\}\}/g, (match, key) => {
|
|
72
|
+
const trimmed = key.trim();
|
|
73
|
+
// Check for step output reference: {{step_id.output}}
|
|
74
|
+
if (trimmed.includes('.')) {
|
|
75
|
+
const [stepId, field] = trimmed.split('.', 2);
|
|
76
|
+
if (field === 'output' && stepOutputs[stepId] !== undefined) {
|
|
77
|
+
return stepOutputs[stepId];
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Check global variables
|
|
81
|
+
if (variables[trimmed] !== undefined) {
|
|
82
|
+
return variables[trimmed];
|
|
83
|
+
}
|
|
84
|
+
// Check environment variables
|
|
85
|
+
if (process.env[trimmed] !== undefined) {
|
|
86
|
+
return process.env[trimmed];
|
|
87
|
+
}
|
|
88
|
+
// Leave unresolved — caller decides if this is an error
|
|
89
|
+
return match;
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=scenario-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario-parser.js","sourceRoot":"","sources":["../../src/core/scenario-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,SAAS,CAAC;AAGhC,MAAM,UAAU,aAAa,CAAC,QAAgB;IAC5C,IAAI,OAAe,CAAC;IACpB,IAAI,CAAC;QACH,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,IAAI,GAAY,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC3B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,kCAAmC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,QAAQ,GAAG,GAA8B,CAAC;IAEhD,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,OAAO,QAAQ,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QACxD,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;IAC/E,CAAC;IAED,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,OAAgB,EAAE,CAAS,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;IAE5F,OAAO;QACL,IAAI,EAAE,QAAQ,CAAC,IAAI;QACnB,UAAU,EAAE,OAAO,QAAQ,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE;QAC9E,SAAS,EAAE,OAAO,QAAQ,CAAC,SAAS,KAAK,QAAQ,IAAI,QAAQ,CAAC,SAAS,KAAK,IAAI;YAC9E,CAAC,CAAC,QAAQ,CAAC,SAAmC;YAC9C,CAAC,CAAC,EAAE;QACN,KAAK;KACN,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CAAC,GAAY,EAAE,KAAa;IAC/C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QACpC,MAAM,IAAI,KAAK,CAAC,QAAQ,KAAK,GAAG,CAAC,oBAAoB,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,IAAI,GAAG,GAA8B,CAAC;IAC5C,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,KAAK,GAAG,CAAC,EAAE,CAAC;IAEhE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,OAAO,IAAI,CAAC,EAAE,KAAK,QAAQ,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,QAAQ,KAAK,GAAG,CAAC,mCAAmC,CAAC,CAAC;IACxE,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAkB,CAAC,EAAE,CAAC;QACjF,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,8CAA8C,CAAC,CAAC;IACxE,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;QAClD,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,sCAAsC,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACpD,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,uCAAuC,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,IAAI,CAAC,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC;IACtF,IAAI,WAAW,GAAG,CAAC,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,+CAA+C,CAAC,CAAC;IACzE,CAAC;IAED,OAAO;QACL,EAAE,EAAE,IAAI,CAAC,EAAE;QACX,QAAQ,EAAE,IAAI,CAAC,QAAkC;QACjD,KAAK,EAAE,IAAI,CAAC,KAAe;QAC3B,MAAM,EAAE,IAAI,CAAC,MAAgB;QAC7B,MAAM,EAAE,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;QACjE,aAAa,EAAE,WAAW;QAC1B,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE;KAClE,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,QAAgB,EAChB,SAAiC,EACjC,WAAmC;IAEnC,OAAO,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QACzD,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QAE3B,sDAAsD;QACtD,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YAC9C,IAAI,KAAK,KAAK,QAAQ,IAAI,WAAW,CAAC,MAAM,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC5D,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,yBAAyB;QACzB,IAAI,SAAS,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;YACrC,OAAO,SAAS,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QAED,8BAA8B;QAC9B,IAAI,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;YACvC,OAAO,OAAO,CAAC,GAAG,CAAC,OAAO,CAAE,CAAC;QAC/B,CAAC;QAED,wDAAwD;QACxD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Scenario, ScenarioReport } from './types.js';
|
|
2
|
+
export interface RunOptions {
|
|
3
|
+
/** Override iterations from scenario file */
|
|
4
|
+
iterations?: number;
|
|
5
|
+
/** Called after each iteration completes */
|
|
6
|
+
onIterationComplete?: (iteration: number, total: number) => void;
|
|
7
|
+
/** Called after each step within an iteration */
|
|
8
|
+
onStepComplete?: (stepId: string, passed: boolean) => void;
|
|
9
|
+
}
|
|
10
|
+
export declare function runScenario(scenario: Scenario, scenarioFilePath: string, options?: RunOptions): Promise<ScenarioReport>;
|
|
11
|
+
//# sourceMappingURL=scenario-runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario-runner.d.ts","sourceRoot":"","sources":["../../src/core/scenario-runner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAA2B,MAAM,YAAY,CAAC;AAEpF,MAAM,WAAW,UAAU;IACzB,6CAA6C;IAC7C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4CAA4C;IAC5C,mBAAmB,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACjE,iDAAiD;IACjD,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,KAAK,IAAI,CAAC;CAC5D;AAED,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,QAAQ,EAClB,gBAAgB,EAAE,MAAM,EACxB,OAAO,GAAE,UAAe,GACvB,OAAO,CAAC,cAAc,CAAC,CA8FzB"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
import { getAdapter } from '../adapters/index.js';
|
|
3
|
+
import { runAssertions } from '../assertions/engine.js';
|
|
4
|
+
import { substituteVariables } from './scenario-parser.js';
|
|
5
|
+
export async function runScenario(scenario, scenarioFilePath, options = {}) {
|
|
6
|
+
const iterations = options.iterations ?? scenario.iterations ?? 10;
|
|
7
|
+
const scenarioDir = path.dirname(path.resolve(scenarioFilePath));
|
|
8
|
+
const variables = scenario.variables ?? {};
|
|
9
|
+
const startedAt = new Date().toISOString();
|
|
10
|
+
const startMs = Date.now();
|
|
11
|
+
const allResults = [];
|
|
12
|
+
for (let i = 1; i <= iterations; i++) {
|
|
13
|
+
const stepOutputs = {};
|
|
14
|
+
for (const step of scenario.steps) {
|
|
15
|
+
const resolvedPrompt = substituteVariables(step.prompt, variables, stepOutputs);
|
|
16
|
+
const resolvedSystem = step.system
|
|
17
|
+
? substituteVariables(step.system, variables, stepOutputs)
|
|
18
|
+
: undefined;
|
|
19
|
+
const stepStartMs = Date.now();
|
|
20
|
+
let output = '';
|
|
21
|
+
let error;
|
|
22
|
+
try {
|
|
23
|
+
const adapter = getAdapter(step.provider, step.model);
|
|
24
|
+
output = await adapter.call(resolvedPrompt, resolvedSystem);
|
|
25
|
+
stepOutputs[step.id] = output;
|
|
26
|
+
}
|
|
27
|
+
catch (e) {
|
|
28
|
+
error = e.message;
|
|
29
|
+
stepOutputs[step.id] = '';
|
|
30
|
+
}
|
|
31
|
+
const durationMs = Date.now() - stepStartMs;
|
|
32
|
+
let assertionResults = [];
|
|
33
|
+
let assertionsPassed = false;
|
|
34
|
+
if (!error) {
|
|
35
|
+
const { results, allPassed } = await runAssertions(output, step.assertions, scenarioDir);
|
|
36
|
+
assertionResults = results;
|
|
37
|
+
assertionsPassed = allPassed;
|
|
38
|
+
}
|
|
39
|
+
const stepPassed = !error && assertionsPassed;
|
|
40
|
+
const result = {
|
|
41
|
+
stepId: step.id,
|
|
42
|
+
iteration: i,
|
|
43
|
+
output,
|
|
44
|
+
passed: stepPassed,
|
|
45
|
+
assertionResults,
|
|
46
|
+
error,
|
|
47
|
+
durationMs,
|
|
48
|
+
};
|
|
49
|
+
allResults.push(result);
|
|
50
|
+
options.onStepComplete?.(step.id, stepPassed);
|
|
51
|
+
}
|
|
52
|
+
options.onIterationComplete?.(i, iterations);
|
|
53
|
+
}
|
|
54
|
+
// Aggregate per-step summaries
|
|
55
|
+
const steps = scenario.steps.map((step) => {
|
|
56
|
+
const stepResults = allResults.filter((r) => r.stepId === step.id);
|
|
57
|
+
const passes = stepResults.filter((r) => r.passed).length;
|
|
58
|
+
const failures = stepResults.length - passes;
|
|
59
|
+
const passRate = stepResults.length > 0 ? passes / stepResults.length : 0;
|
|
60
|
+
const minPassRate = step.min_pass_rate ?? 0.8;
|
|
61
|
+
return {
|
|
62
|
+
stepId: step.id,
|
|
63
|
+
totalRuns: stepResults.length,
|
|
64
|
+
passes,
|
|
65
|
+
failures,
|
|
66
|
+
passRate,
|
|
67
|
+
minPassRate,
|
|
68
|
+
belowThreshold: passRate < minPassRate,
|
|
69
|
+
};
|
|
70
|
+
});
|
|
71
|
+
const allPassed = steps.every((s) => !s.belowThreshold);
|
|
72
|
+
const completedAt = new Date().toISOString();
|
|
73
|
+
const durationMs = Date.now() - startMs;
|
|
74
|
+
return {
|
|
75
|
+
scenarioName: scenario.name,
|
|
76
|
+
iterations,
|
|
77
|
+
startedAt,
|
|
78
|
+
completedAt,
|
|
79
|
+
durationMs,
|
|
80
|
+
steps,
|
|
81
|
+
allPassed,
|
|
82
|
+
results: allResults,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=scenario-runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario-runner.js","sourceRoot":"","sources":["../../src/core/scenario-runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAY3D,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAkB,EAClB,gBAAwB,EACxB,UAAsB,EAAE;IAExB,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC;IACnE,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACjE,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,IAAI,EAAE,CAAC;IAE3C,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC3B,MAAM,UAAU,GAAiB,EAAE,CAAC;IAEpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,WAAW,GAA2B,EAAE,CAAC;QAE/C,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;YAClC,MAAM,cAAc,GAAG,mBAAmB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;YAChF,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM;gBAChC,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC;gBAC1D,CAAC,CAAC,SAAS,CAAC;YAEd,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC/B,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,KAAyB,CAAC;YAE9B,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtD,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,cAAc,CAAC,CAAC;gBAC5D,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC;YAChC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,KAAK,GAAI,CAAW,CAAC,OAAO,CAAC;gBAC7B,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC;YAC5B,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC;YAE5C,IAAI,gBAAgB,GAA0D,EAAE,CAAC;YACjF,IAAI,gBAAgB,GAAG,KAAK,CAAC;YAE7B,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;gBACzF,gBAAgB,GAAG,OAAO,CAAC;gBAC3B,gBAAgB,GAAG,SAAS,CAAC;YAC/B,CAAC;YAED,MAAM,UAAU,GAAG,CAAC,KAAK,IAAI,gBAAgB,CAAC;YAE9C,MAAM,MAAM,GAAe;gBACzB,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,SAAS,EAAE,CAAC;gBACZ,MAAM;gBACN,MAAM,EAAE,UAAU;gBAClB,gBAAgB;gBAChB,KAAK;gBACL,UAAU;aACX,CAAC;YAEF,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACxB,OAAO,CAAC,cAAc,EAAE,CAAC,IAAI,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QAED,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IAC/C,CAAC;IAED,+BAA+B;IAC/B,MAAM,KAAK,GAAkB,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACvD,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC1D,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,MAAM,CAAC;QAC7C,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1E,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,IAAI,GAAG,CAAC;QAE9C,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,SAAS,EAAE,WAAW,CAAC,MAAM;YAC7B,MAAM;YACN,QAAQ;YACR,QAAQ;YACR,WAAW;YACX,cAAc,EAAE,QAAQ,GAAG,WAAW;SACvC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;IACxD,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;IAExC,OAAO;QACL,YAAY,EAAE,QAAQ,CAAC,IAAI;QAC3B,UAAU;QACV,SAAS;QACT,WAAW;QACX,UAAU;QACV,KAAK;QACL,SAAS;QACT,OAAO,EAAE,UAAU;KACpB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
export type Provider = 'openai' | 'anthropic';
|
|
2
|
+
export type AssertionType = 'contains' | 'not_contains' | 'regex' | 'json_schema' | 'llm_judge';
|
|
3
|
+
export interface Assertion {
|
|
4
|
+
type: AssertionType;
|
|
5
|
+
/** For contains, not_contains, regex */
|
|
6
|
+
value?: string;
|
|
7
|
+
/** For json_schema: path to JSON schema file (relative to scenario file) */
|
|
8
|
+
schema?: string;
|
|
9
|
+
/** For llm_judge: the evaluation prompt */
|
|
10
|
+
prompt?: string;
|
|
11
|
+
/** For llm_judge: the expected response prefix (default: "yes") */
|
|
12
|
+
pass_on?: string;
|
|
13
|
+
/** For llm_judge: override provider (default: anthropic) */
|
|
14
|
+
provider?: Provider;
|
|
15
|
+
/** For llm_judge: override model (default: claude-haiku or gpt-4o-mini) */
|
|
16
|
+
model?: string;
|
|
17
|
+
}
|
|
18
|
+
export interface Step {
|
|
19
|
+
id: string;
|
|
20
|
+
provider: Provider;
|
|
21
|
+
model: string;
|
|
22
|
+
/** Prompt template. Use {{variable}} for variables, {{step_id.output}} for prior step outputs. */
|
|
23
|
+
prompt: string;
|
|
24
|
+
/** Optional system prompt */
|
|
25
|
+
system?: string;
|
|
26
|
+
/** Minimum pass rate threshold (0.0–1.0). Default: 0.8 */
|
|
27
|
+
min_pass_rate?: number;
|
|
28
|
+
assertions: Assertion[];
|
|
29
|
+
}
|
|
30
|
+
export interface Scenario {
|
|
31
|
+
name: string;
|
|
32
|
+
/** Number of iterations to run. Default: 10 */
|
|
33
|
+
iterations?: number;
|
|
34
|
+
/** Global variables for template substitution */
|
|
35
|
+
variables?: Record<string, string>;
|
|
36
|
+
steps: Step[];
|
|
37
|
+
}
|
|
38
|
+
export interface AssertionResult {
|
|
39
|
+
type: string;
|
|
40
|
+
passed: boolean;
|
|
41
|
+
message?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface StepResult {
|
|
44
|
+
stepId: string;
|
|
45
|
+
iteration: number;
|
|
46
|
+
output: string;
|
|
47
|
+
passed: boolean;
|
|
48
|
+
assertionResults: AssertionResult[];
|
|
49
|
+
error?: string;
|
|
50
|
+
durationMs: number;
|
|
51
|
+
}
|
|
52
|
+
export interface StepSummary {
|
|
53
|
+
stepId: string;
|
|
54
|
+
totalRuns: number;
|
|
55
|
+
passes: number;
|
|
56
|
+
failures: number;
|
|
57
|
+
passRate: number;
|
|
58
|
+
minPassRate: number;
|
|
59
|
+
belowThreshold: boolean;
|
|
60
|
+
}
|
|
61
|
+
export interface ScenarioReport {
|
|
62
|
+
scenarioName: string;
|
|
63
|
+
iterations: number;
|
|
64
|
+
startedAt: string;
|
|
65
|
+
completedAt: string;
|
|
66
|
+
durationMs: number;
|
|
67
|
+
steps: StepSummary[];
|
|
68
|
+
allPassed: boolean;
|
|
69
|
+
results: StepResult[];
|
|
70
|
+
}
|
|
71
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,QAAQ,GAAG,WAAW,CAAC;AAE9C,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,cAAc,GAAG,OAAO,GAAG,aAAa,GAAG,WAAW,CAAC;AAEhG,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,aAAa,CAAC;IACpB,wCAAwC;IACxC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,4EAA4E;IAC5E,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mEAAmE;IACnE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4DAA4D;IAC5D,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,2EAA2E;IAC3E,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,kGAAkG;IAClG,MAAM,EAAE,MAAM,CAAC;IACf,6BAA6B;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,0DAA0D;IAC1D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,+CAA+C;IAC/C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,WAAW,EAAE,CAAC;IACrB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,UAAU,EAAE,CAAC;CACvB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-reporter.d.ts","sourceRoot":"","sources":["../../src/reporters/json-reporter.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAEvD,wBAAgB,eAAe,CAAC,MAAM,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,CAGhF;AAED,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CAE/D"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
export function writeJsonReport(report, outputPath) {
|
|
3
|
+
const json = JSON.stringify(report, null, 2);
|
|
4
|
+
fs.writeFileSync(outputPath, json, 'utf-8');
|
|
5
|
+
}
|
|
6
|
+
export function formatJsonReport(report) {
|
|
7
|
+
return JSON.stringify(report, null, 2);
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=json-reporter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-reporter.js","sourceRoot":"","sources":["../../src/reporters/json-reporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAG9B,MAAM,UAAU,eAAe,CAAC,MAAsB,EAAE,UAAkB;IACxE,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,MAAsB;IACrD,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AACzC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"junit-reporter.d.ts","sourceRoot":"","sources":["../../src/reporters/junit-reporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAWvD,wBAAgB,WAAW,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CA8B1D"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
function escapeXml(str) {
|
|
2
|
+
return str
|
|
3
|
+
.replace(/&/g, '&')
|
|
4
|
+
.replace(/</g, '<')
|
|
5
|
+
.replace(/>/g, '>')
|
|
6
|
+
.replace(/"/g, '"')
|
|
7
|
+
.replace(/'/g, ''');
|
|
8
|
+
}
|
|
9
|
+
export function formatJunit(report) {
|
|
10
|
+
const totalTests = report.steps.length;
|
|
11
|
+
const totalFailures = report.steps.filter((s) => s.belowThreshold).length;
|
|
12
|
+
const timeSeconds = (report.durationMs / 1000).toFixed(3);
|
|
13
|
+
const testCases = report.steps.map((step) => {
|
|
14
|
+
const passRatePct = (step.passRate * 100).toFixed(1);
|
|
15
|
+
const thresholdPct = (step.minPassRate * 100).toFixed(0);
|
|
16
|
+
const openTag = ` <testcase name="${escapeXml(step.stepId)}" classname="stepproof.steps" time="0">`;
|
|
17
|
+
if (step.belowThreshold) {
|
|
18
|
+
const failureMessage = `Pass rate ${passRatePct}% below threshold ${thresholdPct}%`;
|
|
19
|
+
const failureBody = `${escapeXml(step.stepId)}: ${step.passes}/${step.totalRuns} iterations passed`;
|
|
20
|
+
return `${openTag}\n <failure message="${escapeXml(failureMessage)}">${escapeXml(failureBody)}</failure>\n </testcase>`;
|
|
21
|
+
}
|
|
22
|
+
return `${openTag}\n </testcase>`;
|
|
23
|
+
});
|
|
24
|
+
const lines = [
|
|
25
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
26
|
+
`<testsuites name="stepproof" tests="${totalTests}" failures="${totalFailures}" time="${timeSeconds}">`,
|
|
27
|
+
` <testsuite name="${escapeXml(report.scenarioName)}" tests="${totalTests}" failures="${totalFailures}" time="${timeSeconds}">`,
|
|
28
|
+
...testCases,
|
|
29
|
+
' </testsuite>',
|
|
30
|
+
'</testsuites>',
|
|
31
|
+
];
|
|
32
|
+
return lines.join('\n');
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=junit-reporter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"junit-reporter.js","sourceRoot":"","sources":["../../src/reporters/junit-reporter.ts"],"names":[],"mappings":"AAEA,SAAS,SAAS,CAAC,GAAW;IAC5B,OAAO,GAAG;SACP,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,MAAsB;IAChD,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;IACvC,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,MAAM,CAAC;IAC1E,MAAM,WAAW,GAAG,CAAC,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAE1D,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QAC1C,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAEzD,MAAM,OAAO,GAAG,uBAAuB,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,yCAAyC,CAAC;QAEvG,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,MAAM,cAAc,GAAG,aAAa,WAAW,qBAAqB,YAAY,GAAG,CAAC;YACpF,MAAM,WAAW,GAAG,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,oBAAoB,CAAC;YACpG,OAAO,GAAG,OAAO,6BAA6B,SAAS,CAAC,cAAc,CAAC,KAAK,SAAS,CAAC,WAAW,CAAC,6BAA6B,CAAC;QAClI,CAAC;QAED,OAAO,GAAG,OAAO,mBAAmB,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG;QACZ,wCAAwC;QACxC,uCAAuC,UAAU,eAAe,aAAa,WAAW,WAAW,IAAI;QACvG,sBAAsB,SAAS,CAAC,MAAM,CAAC,YAAY,CAAC,YAAY,UAAU,eAAe,aAAa,WAAW,WAAW,IAAI;QAChI,GAAG,SAAS;QACZ,gBAAgB;QAChB,eAAe;KAChB,CAAC;IAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sarif-reporter.d.ts","sourceRoot":"","sources":["../../src/reporters/sarif-reporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAEvD,wBAAgB,WAAW,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,CAmD1D"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export function formatSarif(report) {
|
|
2
|
+
const rules = report.steps.map((step) => ({
|
|
3
|
+
id: step.stepId,
|
|
4
|
+
name: step.stepId,
|
|
5
|
+
shortDescription: {
|
|
6
|
+
text: `Step: ${step.stepId} — min pass rate ${(step.minPassRate * 100).toFixed(0)}%`,
|
|
7
|
+
},
|
|
8
|
+
}));
|
|
9
|
+
const results = report.steps.map((step) => {
|
|
10
|
+
const passRatePct = (step.passRate * 100).toFixed(1);
|
|
11
|
+
const thresholdPct = (step.minPassRate * 100).toFixed(0);
|
|
12
|
+
if (step.belowThreshold) {
|
|
13
|
+
return {
|
|
14
|
+
ruleId: step.stepId,
|
|
15
|
+
level: 'error',
|
|
16
|
+
message: {
|
|
17
|
+
text: `Step "${step.stepId}" pass rate ${passRatePct}% is below threshold ${thresholdPct}% (${step.passes}/${step.totalRuns} iterations passed)`,
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
return {
|
|
22
|
+
ruleId: step.stepId,
|
|
23
|
+
level: 'none',
|
|
24
|
+
message: {
|
|
25
|
+
text: `Step "${step.stepId}" passed — ${passRatePct}% pass rate (${step.passes}/${step.totalRuns} iterations passed)`,
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
});
|
|
29
|
+
const sarif = {
|
|
30
|
+
$schema: 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
31
|
+
version: '2.1.0',
|
|
32
|
+
runs: [
|
|
33
|
+
{
|
|
34
|
+
tool: {
|
|
35
|
+
driver: {
|
|
36
|
+
name: 'stepproof',
|
|
37
|
+
version: '0.1.0',
|
|
38
|
+
rules,
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
results,
|
|
42
|
+
},
|
|
43
|
+
],
|
|
44
|
+
};
|
|
45
|
+
return JSON.stringify(sarif, null, 2);
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=sarif-reporter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sarif-reporter.js","sourceRoot":"","sources":["../../src/reporters/sarif-reporter.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,WAAW,CAAC,MAAsB;IAChD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACxC,EAAE,EAAE,IAAI,CAAC,MAAM;QACf,IAAI,EAAE,IAAI,CAAC,MAAM;QACjB,gBAAgB,EAAE;YAChB,IAAI,EAAE,SAAS,IAAI,CAAC,MAAM,oBAAoB,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SACrF;KACF,CAAC,CAAC,CAAC;IAEJ,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxC,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAEzD,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,OAAO;gBACd,OAAO,EAAE;oBACP,IAAI,EAAE,SAAS,IAAI,CAAC,MAAM,eAAe,WAAW,wBAAwB,YAAY,MAAM,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,qBAAqB;iBACjJ;aACF,CAAC;QACJ,CAAC;QAED,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,KAAK,EAAE,MAAM;YACb,OAAO,EAAE;gBACP,IAAI,EAAE,SAAS,IAAI,CAAC,MAAM,cAAc,WAAW,gBAAgB,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,qBAAqB;aACtH;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG;QACZ,OAAO,EACL,gGAAgG;QAClG,OAAO,EAAE,OAAO;QAChB,IAAI,EAAE;YACJ;gBACE,IAAI,EAAE;oBACJ,MAAM,EAAE;wBACN,IAAI,EAAE,WAAW;wBACjB,OAAO,EAAE,OAAO;wBAChB,KAAK;qBACN;iBACF;gBACD,OAAO;aACR;SACF;KACF,CAAC;IAEF,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AACxC,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ScenarioReport } from '../core/types.js';
|
|
2
|
+
export declare function printReport(report: ScenarioReport, reportPath?: string): void;
|
|
3
|
+
export declare function printProgress(stepId: string, iteration: number, total: number): void;
|
|
4
|
+
//# sourceMappingURL=terminal-reporter.d.ts.map
|