@itsthelore/proofkeeper 2026.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/NOTICE +10 -0
- package/README.md +207 -0
- package/dist/agent/adapters/claude.d.ts +93 -0
- package/dist/agent/adapters/claude.d.ts.map +1 -0
- package/dist/agent/adapters/claude.js +96 -0
- package/dist/agent/adapters/claude.js.map +1 -0
- package/dist/agent/drive.d.ts +53 -0
- package/dist/agent/drive.d.ts.map +1 -0
- package/dist/agent/drive.js +194 -0
- package/dist/agent/drive.js.map +1 -0
- package/dist/agent/loop.d.ts +40 -0
- package/dist/agent/loop.d.ts.map +1 -0
- package/dist/agent/loop.js +29 -0
- package/dist/agent/loop.js.map +1 -0
- package/dist/agent/model.d.ts +43 -0
- package/dist/agent/model.d.ts.map +1 -0
- package/dist/agent/model.js +10 -0
- package/dist/agent/model.js.map +1 -0
- package/dist/agent/observe.d.ts +48 -0
- package/dist/agent/observe.d.ts.map +1 -0
- package/dist/agent/observe.js +65 -0
- package/dist/agent/observe.js.map +1 -0
- package/dist/agent/tools.d.ts +74 -0
- package/dist/agent/tools.d.ts.map +1 -0
- package/dist/agent/tools.js +257 -0
- package/dist/agent/tools.js.map +1 -0
- package/dist/cli.d.ts +61 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +648 -0
- package/dist/cli.js.map +1 -0
- package/dist/compiler/actions.d.ts +101 -0
- package/dist/compiler/actions.d.ts.map +1 -0
- package/dist/compiler/actions.js +13 -0
- package/dist/compiler/actions.js.map +1 -0
- package/dist/compiler/compiler.d.ts +25 -0
- package/dist/compiler/compiler.d.ts.map +1 -0
- package/dist/compiler/compiler.js +42 -0
- package/dist/compiler/compiler.js.map +1 -0
- package/dist/compiler/emit.d.ts +21 -0
- package/dist/compiler/emit.d.ts.map +1 -0
- package/dist/compiler/emit.js +164 -0
- package/dist/compiler/emit.js.map +1 -0
- package/dist/compiler/http.d.ts +30 -0
- package/dist/compiler/http.d.ts.map +1 -0
- package/dist/compiler/http.js +30 -0
- package/dist/compiler/http.js.map +1 -0
- package/dist/compiler/recorder.d.ts +62 -0
- package/dist/compiler/recorder.d.ts.map +1 -0
- package/dist/compiler/recorder.js +148 -0
- package/dist/compiler/recorder.js.map +1 -0
- package/dist/compiler/summary.d.ts +11 -0
- package/dist/compiler/summary.d.ts.map +1 -0
- package/dist/compiler/summary.js +56 -0
- package/dist/compiler/summary.js.map +1 -0
- package/dist/compiler/terminal.d.ts +42 -0
- package/dist/compiler/terminal.d.ts.map +1 -0
- package/dist/compiler/terminal.js +47 -0
- package/dist/compiler/terminal.js.map +1 -0
- package/dist/compiler/types.d.ts +25 -0
- package/dist/compiler/types.d.ts.map +1 -0
- package/dist/compiler/types.js +10 -0
- package/dist/compiler/types.js.map +1 -0
- package/dist/coverage/graph.d.ts +55 -0
- package/dist/coverage/graph.d.ts.map +1 -0
- package/dist/coverage/graph.js +87 -0
- package/dist/coverage/graph.js.map +1 -0
- package/dist/coverage/model.d.ts +36 -0
- package/dist/coverage/model.d.ts.map +1 -0
- package/dist/coverage/model.js +57 -0
- package/dist/coverage/model.js.map +1 -0
- package/dist/coverage/report.d.ts +27 -0
- package/dist/coverage/report.d.ts.map +1 -0
- package/dist/coverage/report.js +45 -0
- package/dist/coverage/report.js.map +1 -0
- package/dist/coverage/source.d.ts +23 -0
- package/dist/coverage/source.d.ts.map +1 -0
- package/dist/coverage/source.js +48 -0
- package/dist/coverage/source.js.map +1 -0
- package/dist/fidelity/gate.d.ts +34 -0
- package/dist/fidelity/gate.d.ts.map +1 -0
- package/dist/fidelity/gate.js +38 -0
- package/dist/fidelity/gate.js.map +1 -0
- package/dist/index.d.ts +69 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +49 -0
- package/dist/index.js.map +1 -0
- package/dist/learning/store.d.ts +44 -0
- package/dist/learning/store.d.ts.map +1 -0
- package/dist/learning/store.js +64 -0
- package/dist/learning/store.js.map +1 -0
- package/dist/qa/concurrency.d.ts +7 -0
- package/dist/qa/concurrency.d.ts.map +1 -0
- package/dist/qa/concurrency.js +21 -0
- package/dist/qa/concurrency.js.map +1 -0
- package/dist/qa/run-qa.d.ts +87 -0
- package/dist/qa/run-qa.d.ts.map +1 -0
- package/dist/qa/run-qa.js +106 -0
- package/dist/qa/run-qa.js.map +1 -0
- package/dist/qa/run-scoped.d.ts +82 -0
- package/dist/qa/run-scoped.d.ts.map +1 -0
- package/dist/qa/run-scoped.js +96 -0
- package/dist/qa/run-scoped.js.map +1 -0
- package/dist/runner/playwright-report.d.ts +52 -0
- package/dist/runner/playwright-report.d.ts.map +1 -0
- package/dist/runner/playwright-report.js +90 -0
- package/dist/runner/playwright-report.js.map +1 -0
- package/dist/runner/playwright-runner.d.ts +38 -0
- package/dist/runner/playwright-runner.d.ts.map +1 -0
- package/dist/runner/playwright-runner.js +73 -0
- package/dist/runner/playwright-runner.js.map +1 -0
- package/dist/runner/types.d.ts +45 -0
- package/dist/runner/types.d.ts.map +1 -0
- package/dist/runner/types.js +10 -0
- package/dist/runner/types.js.map +1 -0
- package/dist/scaffold/scaffold.d.ts +22 -0
- package/dist/scaffold/scaffold.d.ts.map +1 -0
- package/dist/scaffold/scaffold.js +34 -0
- package/dist/scaffold/scaffold.js.map +1 -0
- package/dist/scope/config.d.ts +89 -0
- package/dist/scope/config.d.ts.map +1 -0
- package/dist/scope/config.js +172 -0
- package/dist/scope/config.js.map +1 -0
- package/dist/scope/diff-scope.d.ts +31 -0
- package/dist/scope/diff-scope.d.ts.map +1 -0
- package/dist/scope/diff-scope.js +42 -0
- package/dist/scope/diff-scope.js.map +1 -0
- package/dist/scope/glob.d.ts +17 -0
- package/dist/scope/glob.d.ts.map +1 -0
- package/dist/scope/glob.js +50 -0
- package/dist/scope/glob.js.map +1 -0
- package/dist/writeback/comment.d.ts +103 -0
- package/dist/writeback/comment.d.ts.map +1 -0
- package/dist/writeback/comment.js +150 -0
- package/dist/writeback/comment.js.map +1 -0
- package/dist/writeback/gateways/github-rest.d.ts +66 -0
- package/dist/writeback/gateways/github-rest.d.ts.map +1 -0
- package/dist/writeback/gateways/github-rest.js +107 -0
- package/dist/writeback/gateways/github-rest.js.map +1 -0
- package/dist/writeback/merge.d.ts +27 -0
- package/dist/writeback/merge.d.ts.map +1 -0
- package/dist/writeback/merge.js +89 -0
- package/dist/writeback/merge.js.map +1 -0
- package/dist/writeback/proposal.d.ts +52 -0
- package/dist/writeback/proposal.d.ts.map +1 -0
- package/dist/writeback/proposal.js +79 -0
- package/dist/writeback/proposal.js.map +1 -0
- package/dist/writeback/proposer.d.ts +94 -0
- package/dist/writeback/proposer.d.ts.map +1 -0
- package/dist/writeback/proposer.js +79 -0
- package/dist/writeback/proposer.js.map +1 -0
- package/dist/writeback/verified-by.d.ts +56 -0
- package/dist/writeback/verified-by.d.ts.map +1 -0
- package/dist/writeback/verified-by.js +60 -0
- package/dist/writeback/verified-by.js.map +1 -0
- package/package.json +62 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,648 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* The `proofkeeper` CLI.
|
|
4
|
+
*
|
|
5
|
+
* - `coverage` exposes the coverage read-model (Initiative 1).
|
|
6
|
+
* - `qa` (alias `verify`) runs the full QA loop for one capability: select →
|
|
7
|
+
* drive → compile → fidelity → run → (optional) propose the write-back.
|
|
8
|
+
*
|
|
9
|
+
* Exit codes are a stable contract: 0 = success (every capability verified, or
|
|
10
|
+
* the driven capability passed the fidelity gate), 1 = not verified (unverified
|
|
11
|
+
* capabilities, or an unstable test), 2 = usage/parse error.
|
|
12
|
+
*/
|
|
13
|
+
import { computeCoverage } from "./coverage/model.js";
|
|
14
|
+
import { renderHuman, renderJson } from "./coverage/report.js";
|
|
15
|
+
import { GraphParseError } from "./coverage/graph.js";
|
|
16
|
+
import { loadGraphFromCorpus, loadGraphFromFile } from "./coverage/source.js";
|
|
17
|
+
import { runQa } from "./qa/run-qa.js";
|
|
18
|
+
import { runScopedQa, collectFailureSuggestions } from "./qa/run-scoped.js";
|
|
19
|
+
import { parseConfig, ConfigParseError } from "./scope/config.js";
|
|
20
|
+
import { AutonomousDriver } from "./agent/drive.js";
|
|
21
|
+
import { ClaudeModelClient } from "./agent/adapters/claude.js";
|
|
22
|
+
import { CodegenCompiler } from "./compiler/compiler.js";
|
|
23
|
+
import { FileLearningStore } from "./learning/store.js";
|
|
24
|
+
import { PlaywrightRunner } from "./runner/playwright-runner.js";
|
|
25
|
+
import { GitHubRestGateway } from "./writeback/gateways/github-rest.js";
|
|
26
|
+
import { GitHubWriteBackProposer } from "./writeback/proposer.js";
|
|
27
|
+
import { renderScopedQaComment, upsertComment, SCOPED_QA_MARKER } from "./writeback/comment.js";
|
|
28
|
+
import { scaffoldConfig, renderScaffoldedConfig } from "./scaffold/scaffold.js";
|
|
29
|
+
import { execFile } from "node:child_process";
|
|
30
|
+
import { readFile, writeFile, stat } from "node:fs/promises";
|
|
31
|
+
import { readFileSync } from "node:fs";
|
|
32
|
+
import { promisify } from "node:util";
|
|
33
|
+
const execFileAsync = promisify(execFile);
|
|
34
|
+
const EXIT_OK = 0;
|
|
35
|
+
const EXIT_UNVERIFIED = 1;
|
|
36
|
+
const EXIT_USAGE = 2;
|
|
37
|
+
const USAGE = `proofkeeper — autonomous verification for the Lore family
|
|
38
|
+
|
|
39
|
+
Usage:
|
|
40
|
+
proofkeeper coverage (--graph-file <path> | --corpus <dir>) [--json]
|
|
41
|
+
proofkeeper init (--graph-file <path> | --corpus <dir>) [--url <url>] [--out <path>]
|
|
42
|
+
proofkeeper qa (--graph-file <path> | --corpus <dir>) --url <url> [options]
|
|
43
|
+
proofkeeper qa (--graph-file <path> | --corpus <dir>) --config <path>
|
|
44
|
+
(--changed <files> | --base-ref <ref>) [options]
|
|
45
|
+
proofkeeper --help
|
|
46
|
+
|
|
47
|
+
Commands:
|
|
48
|
+
coverage Report which Lore capabilities have no verifying (verified_by) test.
|
|
49
|
+
init Scaffold a proofkeeper.config.json from the coverage graph: one
|
|
50
|
+
capability per requirement node, plus a starter environment block.
|
|
51
|
+
Reads only the published Lore contract; never overwrites a file.
|
|
52
|
+
qa Drive one capability, compile a test, gate it on fidelity, and
|
|
53
|
+
(optionally) propose the Verified By write-back. Alias: verify.
|
|
54
|
+
With --config, scope to a change: drive every unverified capability
|
|
55
|
+
the changed files touch and post the evidence to a pull request.
|
|
56
|
+
|
|
57
|
+
Coverage options:
|
|
58
|
+
--graph-file <path> Read a 'rac export --graph' JSON file (primary).
|
|
59
|
+
--corpus <dir> Shell out to 'rac export --graph <dir>' (requires rac on PATH).
|
|
60
|
+
--json Emit the stable machine-readable contract.
|
|
61
|
+
|
|
62
|
+
init options:
|
|
63
|
+
--graph-file <path> | --corpus <dir> Coverage source (one required).
|
|
64
|
+
--url <url> Development environment URL (default: http://localhost:3000).
|
|
65
|
+
--out <path> Where to write the config (default: proofkeeper.config.json).
|
|
66
|
+
Refuses to overwrite an existing file.
|
|
67
|
+
|
|
68
|
+
qa options:
|
|
69
|
+
--graph-file <path> | --corpus <dir> Coverage source (one required).
|
|
70
|
+
--url <url> Product entry point the drive starts from (required).
|
|
71
|
+
--capability <id> Verify this capability (default: the first unverified).
|
|
72
|
+
--goal <text> Goal for the model (default: derived from the capability).
|
|
73
|
+
--target-name <name> Run target name (default: local).
|
|
74
|
+
--base-url <url> Base URL the compiled test runs against (default: --url).
|
|
75
|
+
--n <count> Fidelity re-runs the test must pass (default: 3).
|
|
76
|
+
--max-steps <count> Cap on model turns during the drive.
|
|
77
|
+
--out-dir <dir> Where the compiled .spec.ts is written (default: tests/generated).
|
|
78
|
+
--plan Emit a Markdown test plan before driving; show it in the PR.
|
|
79
|
+
--propose Propose a Verified By write-back PR when the test is stable.
|
|
80
|
+
--target-path <path> Artifact to write back to (required with --propose).
|
|
81
|
+
--repo <owner/name> Target repository for the write-back (required with --propose).
|
|
82
|
+
--base <branch> Base branch the write-back PR targets (default: main).
|
|
83
|
+
|
|
84
|
+
scoped qa options (with --config):
|
|
85
|
+
--config <path> Path map: which capabilities each changed file touches.
|
|
86
|
+
--changed <a,b,c> Comma-separated changed files (else --base-ref).
|
|
87
|
+
--base-ref <ref> Diff against this git ref to find changed files.
|
|
88
|
+
--concurrency <n> Capabilities driven at once (default: 3).
|
|
89
|
+
--url <url> Default start URL when a config capability declares none.
|
|
90
|
+
--propose Propose a write-back for capabilities that declare an artifact.
|
|
91
|
+
--repo <owner/name> Repository for write-backs and the PR comment.
|
|
92
|
+
--pr <number> Post the scoped-QA evidence comment on this pull request.
|
|
93
|
+
|
|
94
|
+
Model: qa uses the bundled Claude adapter when ANTHROPIC_API_KEY is set. Bring a
|
|
95
|
+
different provider by calling runQa() from the library with your own ModelClient.
|
|
96
|
+
Write-back: --propose needs a GitHub token in GITHUB_TOKEN.
|
|
97
|
+
|
|
98
|
+
Options:
|
|
99
|
+
--help, -h Show this help.
|
|
100
|
+
--version, -v Print the version.
|
|
101
|
+
|
|
102
|
+
Exit codes:
|
|
103
|
+
0 success (everything verified, or the driven test is stable)
|
|
104
|
+
1 not verified (unverified capabilities, or an unstable test)
|
|
105
|
+
2 usage or parse error
|
|
106
|
+
`;
|
|
107
|
+
class UsageError extends Error {
|
|
108
|
+
}
|
|
109
|
+
function parseCoverageArgs(argv) {
|
|
110
|
+
const args = { json: false };
|
|
111
|
+
for (let i = 0; i < argv.length; i++) {
|
|
112
|
+
const arg = argv[i];
|
|
113
|
+
switch (arg) {
|
|
114
|
+
case "--graph-file":
|
|
115
|
+
args.graphFile = argv[++i];
|
|
116
|
+
break;
|
|
117
|
+
case "--corpus":
|
|
118
|
+
args.corpus = argv[++i];
|
|
119
|
+
break;
|
|
120
|
+
case "--json":
|
|
121
|
+
args.json = true;
|
|
122
|
+
break;
|
|
123
|
+
default:
|
|
124
|
+
throw new UsageError(`unknown option '${arg}'`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (!args.graphFile && !args.corpus) {
|
|
128
|
+
throw new UsageError("coverage requires --graph-file <path> or --corpus <dir>");
|
|
129
|
+
}
|
|
130
|
+
if (args.graphFile && args.corpus) {
|
|
131
|
+
throw new UsageError("pass only one of --graph-file or --corpus");
|
|
132
|
+
}
|
|
133
|
+
if ((args.graphFile !== undefined && !args.graphFile) || (args.corpus !== undefined && !args.corpus)) {
|
|
134
|
+
throw new UsageError("missing value for --graph-file/--corpus");
|
|
135
|
+
}
|
|
136
|
+
return args;
|
|
137
|
+
}
|
|
138
|
+
async function runCoverage(argv) {
|
|
139
|
+
const args = parseCoverageArgs(argv);
|
|
140
|
+
const graph = args.graphFile
|
|
141
|
+
? await loadGraphFromFile(args.graphFile)
|
|
142
|
+
: await loadGraphFromCorpus(args.corpus);
|
|
143
|
+
const report = computeCoverage(graph);
|
|
144
|
+
process.stdout.write((args.json ? renderJson(report) : renderHuman(report)) + "\n");
|
|
145
|
+
return report.unverified.length > 0 ? EXIT_UNVERIFIED : EXIT_OK;
|
|
146
|
+
}
|
|
147
|
+
const DEFAULT_CONFIG_PATH = "proofkeeper.config.json";
|
|
148
|
+
/** Parse `init` arguments. Pure and exported so it is unit-testable. */
|
|
149
|
+
export function parseInitArgs(argv) {
|
|
150
|
+
const raw = {};
|
|
151
|
+
for (let i = 0; i < argv.length; i++) {
|
|
152
|
+
const arg = argv[i];
|
|
153
|
+
switch (arg) {
|
|
154
|
+
case "--graph-file":
|
|
155
|
+
raw.graphFile = requireValue(argv[++i], "--graph-file");
|
|
156
|
+
break;
|
|
157
|
+
case "--corpus":
|
|
158
|
+
raw.corpus = requireValue(argv[++i], "--corpus");
|
|
159
|
+
break;
|
|
160
|
+
case "--url":
|
|
161
|
+
raw.url = requireValue(argv[++i], "--url");
|
|
162
|
+
break;
|
|
163
|
+
case "--out":
|
|
164
|
+
raw.out = requireValue(argv[++i], "--out");
|
|
165
|
+
break;
|
|
166
|
+
default:
|
|
167
|
+
throw new UsageError(`unknown option '${arg}'`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (!raw.graphFile && !raw.corpus) {
|
|
171
|
+
throw new UsageError("init requires --graph-file <path> or --corpus <dir>");
|
|
172
|
+
}
|
|
173
|
+
if (raw.graphFile && raw.corpus) {
|
|
174
|
+
throw new UsageError("pass only one of --graph-file or --corpus");
|
|
175
|
+
}
|
|
176
|
+
return {
|
|
177
|
+
...(raw.graphFile !== undefined ? { graphFile: raw.graphFile } : {}),
|
|
178
|
+
...(raw.corpus !== undefined ? { corpus: raw.corpus } : {}),
|
|
179
|
+
...(raw.url !== undefined ? { url: raw.url } : {}),
|
|
180
|
+
out: raw.out ?? DEFAULT_CONFIG_PATH,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
/** True when a path already exists on disk. */
|
|
184
|
+
async function pathExists(path) {
|
|
185
|
+
try {
|
|
186
|
+
await stat(path);
|
|
187
|
+
return true;
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
return false;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
async function runInit(argv) {
|
|
194
|
+
const args = parseInitArgs(argv);
|
|
195
|
+
const graph = args.graphFile
|
|
196
|
+
? await loadGraphFromFile(args.graphFile)
|
|
197
|
+
: await loadGraphFromCorpus(args.corpus);
|
|
198
|
+
// Never overwrite: refuse before generating so the user's file is untouched.
|
|
199
|
+
if (await pathExists(args.out)) {
|
|
200
|
+
throw new UsageError(`'${args.out}' already exists — remove it or pass --out <path> to write elsewhere`);
|
|
201
|
+
}
|
|
202
|
+
const config = scaffoldConfig(graph, { ...(args.url !== undefined ? { url: args.url } : {}) });
|
|
203
|
+
await writeFile(args.out, renderScaffoldedConfig(config), "utf8");
|
|
204
|
+
const count = config.capabilities.length;
|
|
205
|
+
process.stdout.write(`Wrote ${args.out} with ${count} capabilit${count === 1 ? "y" : "ies"} from the coverage graph.\n` +
|
|
206
|
+
"Next steps:\n" +
|
|
207
|
+
" - Narrow each capability's path globs from 'src/**' to the files it owns.\n" +
|
|
208
|
+
" - Set your environment URLs and, if the product needs sign-in, an auth block.\n" +
|
|
209
|
+
" - Add personas for role-specific flows.\n");
|
|
210
|
+
return EXIT_OK;
|
|
211
|
+
}
|
|
212
|
+
function requireValue(value, flag) {
|
|
213
|
+
if (value === undefined || value === "")
|
|
214
|
+
throw new UsageError(`missing value for ${flag}`);
|
|
215
|
+
return value;
|
|
216
|
+
}
|
|
217
|
+
function parsePositiveInt(value, flag) {
|
|
218
|
+
const n = Number(requireValue(value, flag));
|
|
219
|
+
if (!Number.isInteger(n) || n < 1)
|
|
220
|
+
throw new UsageError(`${flag} must be a positive integer`);
|
|
221
|
+
return n;
|
|
222
|
+
}
|
|
223
|
+
/** Parse `qa`/`verify` arguments. Pure and exported so it is unit-testable. */
|
|
224
|
+
export function parseQaArgs(argv) {
|
|
225
|
+
const raw = { propose: false };
|
|
226
|
+
for (let i = 0; i < argv.length; i++) {
|
|
227
|
+
const arg = argv[i];
|
|
228
|
+
switch (arg) {
|
|
229
|
+
case "--graph-file":
|
|
230
|
+
raw.graphFile = requireValue(argv[++i], "--graph-file");
|
|
231
|
+
break;
|
|
232
|
+
case "--corpus":
|
|
233
|
+
raw.corpus = requireValue(argv[++i], "--corpus");
|
|
234
|
+
break;
|
|
235
|
+
case "--capability":
|
|
236
|
+
raw.capability = requireValue(argv[++i], "--capability");
|
|
237
|
+
break;
|
|
238
|
+
case "--url":
|
|
239
|
+
raw.url = requireValue(argv[++i], "--url");
|
|
240
|
+
break;
|
|
241
|
+
case "--goal":
|
|
242
|
+
raw.goal = requireValue(argv[++i], "--goal");
|
|
243
|
+
break;
|
|
244
|
+
case "--target-name":
|
|
245
|
+
raw.targetName = requireValue(argv[++i], "--target-name");
|
|
246
|
+
break;
|
|
247
|
+
case "--base-url":
|
|
248
|
+
raw.baseUrl = requireValue(argv[++i], "--base-url");
|
|
249
|
+
break;
|
|
250
|
+
case "--n":
|
|
251
|
+
raw.n = parsePositiveInt(argv[++i], "--n");
|
|
252
|
+
break;
|
|
253
|
+
case "--max-steps":
|
|
254
|
+
raw.maxSteps = parsePositiveInt(argv[++i], "--max-steps");
|
|
255
|
+
break;
|
|
256
|
+
case "--out-dir":
|
|
257
|
+
raw.outDir = requireValue(argv[++i], "--out-dir");
|
|
258
|
+
break;
|
|
259
|
+
case "--plan":
|
|
260
|
+
raw.plan = true;
|
|
261
|
+
break;
|
|
262
|
+
case "--propose":
|
|
263
|
+
raw.propose = true;
|
|
264
|
+
break;
|
|
265
|
+
case "--target-path":
|
|
266
|
+
raw.targetPath = requireValue(argv[++i], "--target-path");
|
|
267
|
+
break;
|
|
268
|
+
case "--repo":
|
|
269
|
+
raw.repo = requireValue(argv[++i], "--repo");
|
|
270
|
+
break;
|
|
271
|
+
case "--base":
|
|
272
|
+
raw.base = requireValue(argv[++i], "--base");
|
|
273
|
+
break;
|
|
274
|
+
default:
|
|
275
|
+
throw new UsageError(`unknown option '${arg}'`);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
if (!raw.graphFile && !raw.corpus) {
|
|
279
|
+
throw new UsageError("qa requires --graph-file <path> or --corpus <dir>");
|
|
280
|
+
}
|
|
281
|
+
if (raw.graphFile && raw.corpus) {
|
|
282
|
+
throw new UsageError("pass only one of --graph-file or --corpus");
|
|
283
|
+
}
|
|
284
|
+
if (!raw.url) {
|
|
285
|
+
throw new UsageError("qa requires --url <url>");
|
|
286
|
+
}
|
|
287
|
+
if (raw.propose) {
|
|
288
|
+
if (!raw.targetPath)
|
|
289
|
+
throw new UsageError("--propose requires --target-path <path>");
|
|
290
|
+
if (!raw.repo)
|
|
291
|
+
throw new UsageError("--propose requires --repo <owner/name>");
|
|
292
|
+
if (!raw.repo.includes("/"))
|
|
293
|
+
throw new UsageError("--repo must be 'owner/name'");
|
|
294
|
+
}
|
|
295
|
+
return {
|
|
296
|
+
...(raw.graphFile !== undefined ? { graphFile: raw.graphFile } : {}),
|
|
297
|
+
...(raw.corpus !== undefined ? { corpus: raw.corpus } : {}),
|
|
298
|
+
...(raw.capability !== undefined ? { capability: raw.capability } : {}),
|
|
299
|
+
url: raw.url,
|
|
300
|
+
...(raw.goal !== undefined ? { goal: raw.goal } : {}),
|
|
301
|
+
targetName: raw.targetName ?? "local",
|
|
302
|
+
baseUrl: raw.baseUrl ?? raw.url,
|
|
303
|
+
n: raw.n ?? 3,
|
|
304
|
+
...(raw.maxSteps !== undefined ? { maxSteps: raw.maxSteps } : {}),
|
|
305
|
+
outDir: raw.outDir ?? "tests/generated",
|
|
306
|
+
plan: raw.plan ?? false,
|
|
307
|
+
propose: raw.propose ?? false,
|
|
308
|
+
...(raw.targetPath !== undefined ? { targetPath: raw.targetPath } : {}),
|
|
309
|
+
...(raw.base !== undefined ? { base: raw.base } : {}),
|
|
310
|
+
...(raw.repo !== undefined ? { repo: raw.repo } : {}),
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
/** Resolve a model from the environment (bundled Claude adapter). */
|
|
314
|
+
function resolveModel() {
|
|
315
|
+
if (process.env.ANTHROPIC_API_KEY)
|
|
316
|
+
return new ClaudeModelClient();
|
|
317
|
+
throw new UsageError("qa needs a model: set ANTHROPIC_API_KEY to use the bundled Claude adapter, " +
|
|
318
|
+
"or call runQa() from the library with your own ModelClient.");
|
|
319
|
+
}
|
|
320
|
+
/** A browser-backed drive seam: launch Chromium, drive, always close. */
|
|
321
|
+
function browserDrive(model) {
|
|
322
|
+
return async (options) => {
|
|
323
|
+
const { chromium } = await import("@playwright/test");
|
|
324
|
+
const browser = await chromium.launch();
|
|
325
|
+
try {
|
|
326
|
+
const page = await browser.newPage();
|
|
327
|
+
return await new AutonomousDriver(page, model, options).drive();
|
|
328
|
+
}
|
|
329
|
+
finally {
|
|
330
|
+
await browser.close();
|
|
331
|
+
}
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
function resolveProposer(args) {
|
|
335
|
+
if (!args.propose)
|
|
336
|
+
return undefined;
|
|
337
|
+
const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN;
|
|
338
|
+
if (!token) {
|
|
339
|
+
throw new UsageError("--propose needs a GitHub token in GITHUB_TOKEN (contents:write + pull_requests:write).");
|
|
340
|
+
}
|
|
341
|
+
const [owner, repo] = args.repo.split("/", 2);
|
|
342
|
+
const gateway = new GitHubRestGateway({ owner: owner, repo: repo, token });
|
|
343
|
+
return new GitHubWriteBackProposer(gateway);
|
|
344
|
+
}
|
|
345
|
+
async function runQaCommand(argv) {
|
|
346
|
+
// PR-triggered scoped mode is selected by --config.
|
|
347
|
+
if (argv.includes("--config"))
|
|
348
|
+
return runScopedCommand(argv);
|
|
349
|
+
const args = parseQaArgs(argv);
|
|
350
|
+
const model = resolveModel();
|
|
351
|
+
const proposer = resolveProposer(args);
|
|
352
|
+
const graph = args.graphFile
|
|
353
|
+
? await loadGraphFromFile(args.graphFile)
|
|
354
|
+
: await loadGraphFromCorpus(args.corpus);
|
|
355
|
+
const target = { name: args.targetName, baseURL: args.baseUrl };
|
|
356
|
+
const options = {
|
|
357
|
+
graph,
|
|
358
|
+
...(args.capability !== undefined ? { capabilityId: args.capability } : {}),
|
|
359
|
+
startUrl: args.url,
|
|
360
|
+
...(args.goal !== undefined ? { goal: args.goal } : {}),
|
|
361
|
+
target,
|
|
362
|
+
n: args.n,
|
|
363
|
+
...(args.maxSteps !== undefined ? { maxSteps: args.maxSteps } : {}),
|
|
364
|
+
...(args.plan ? { plan: true } : {}),
|
|
365
|
+
...(args.propose
|
|
366
|
+
? { propose: { targetPath: args.targetPath, ...(args.base !== undefined ? { baseBranch: args.base } : {}) } }
|
|
367
|
+
: {}),
|
|
368
|
+
};
|
|
369
|
+
const deps = {
|
|
370
|
+
drive: browserDrive(model),
|
|
371
|
+
compiler: new CodegenCompiler({ outDir: args.outDir }),
|
|
372
|
+
runner: new PlaywrightRunner(),
|
|
373
|
+
learning: new FileLearningStore(),
|
|
374
|
+
...(proposer ? { proposer } : {}),
|
|
375
|
+
};
|
|
376
|
+
const result = await runQa(deps, options);
|
|
377
|
+
process.stdout.write(renderQaResult(result) + "\n");
|
|
378
|
+
return result.verified ? EXIT_OK : EXIT_UNVERIFIED;
|
|
379
|
+
}
|
|
380
|
+
function renderQaResult(result) {
|
|
381
|
+
const v = result.loop.verdict;
|
|
382
|
+
const lines = [
|
|
383
|
+
`Capability: ${result.capability.id} — ${result.capability.title}`,
|
|
384
|
+
`Drive: ${result.drive.steps} step(s), ${result.drive.finished ? "finished" : "stopped at step budget"}`,
|
|
385
|
+
`Compiled: ${result.loop.candidate.specPath}`,
|
|
386
|
+
`Fidelity: ${v.passed}/${v.attempts} re-runs green — ${v.stable ? "stable" : "unstable, quarantined"}`,
|
|
387
|
+
];
|
|
388
|
+
if (result.writeBack) {
|
|
389
|
+
lines.push(result.writeBack.status === "proposed"
|
|
390
|
+
? `Write-back: proposed ${result.writeBack.url}`
|
|
391
|
+
: `Write-back: no change (${result.writeBack.reason})`);
|
|
392
|
+
}
|
|
393
|
+
else if (result.verified) {
|
|
394
|
+
lines.push("Write-back: not requested (pass --propose to open a PR)");
|
|
395
|
+
}
|
|
396
|
+
return lines.join("\n");
|
|
397
|
+
}
|
|
398
|
+
/** Parse `qa --config …` (scoped) arguments. Pure and exported for testing. */
|
|
399
|
+
export function parseScopedArgs(argv) {
|
|
400
|
+
const raw = { propose: false };
|
|
401
|
+
for (let i = 0; i < argv.length; i++) {
|
|
402
|
+
const arg = argv[i];
|
|
403
|
+
switch (arg) {
|
|
404
|
+
case "--graph-file":
|
|
405
|
+
raw.graphFile = requireValue(argv[++i], "--graph-file");
|
|
406
|
+
break;
|
|
407
|
+
case "--corpus":
|
|
408
|
+
raw.corpus = requireValue(argv[++i], "--corpus");
|
|
409
|
+
break;
|
|
410
|
+
case "--config":
|
|
411
|
+
raw.config = requireValue(argv[++i], "--config");
|
|
412
|
+
break;
|
|
413
|
+
case "--changed":
|
|
414
|
+
raw.changed = requireValue(argv[++i], "--changed")
|
|
415
|
+
.split(",")
|
|
416
|
+
.map((s) => s.trim())
|
|
417
|
+
.filter(Boolean);
|
|
418
|
+
break;
|
|
419
|
+
case "--base-ref":
|
|
420
|
+
raw.baseRef = requireValue(argv[++i], "--base-ref");
|
|
421
|
+
break;
|
|
422
|
+
case "--url":
|
|
423
|
+
raw.url = requireValue(argv[++i], "--url");
|
|
424
|
+
break;
|
|
425
|
+
case "--target-name":
|
|
426
|
+
raw.targetName = requireValue(argv[++i], "--target-name");
|
|
427
|
+
break;
|
|
428
|
+
case "--n":
|
|
429
|
+
raw.n = parsePositiveInt(argv[++i], "--n");
|
|
430
|
+
break;
|
|
431
|
+
case "--max-steps":
|
|
432
|
+
raw.maxSteps = parsePositiveInt(argv[++i], "--max-steps");
|
|
433
|
+
break;
|
|
434
|
+
case "--out-dir":
|
|
435
|
+
raw.outDir = requireValue(argv[++i], "--out-dir");
|
|
436
|
+
break;
|
|
437
|
+
case "--plan":
|
|
438
|
+
raw.plan = true;
|
|
439
|
+
break;
|
|
440
|
+
case "--concurrency":
|
|
441
|
+
raw.concurrency = parsePositiveInt(argv[++i], "--concurrency");
|
|
442
|
+
break;
|
|
443
|
+
case "--propose":
|
|
444
|
+
raw.propose = true;
|
|
445
|
+
break;
|
|
446
|
+
case "--base":
|
|
447
|
+
raw.base = requireValue(argv[++i], "--base");
|
|
448
|
+
break;
|
|
449
|
+
case "--repo":
|
|
450
|
+
raw.repo = requireValue(argv[++i], "--repo");
|
|
451
|
+
break;
|
|
452
|
+
case "--pr":
|
|
453
|
+
raw.pr = parsePositiveInt(argv[++i], "--pr");
|
|
454
|
+
break;
|
|
455
|
+
default:
|
|
456
|
+
throw new UsageError(`unknown option '${arg}'`);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
if (!raw.graphFile && !raw.corpus)
|
|
460
|
+
throw new UsageError("qa requires --graph-file <path> or --corpus <dir>");
|
|
461
|
+
if (raw.graphFile && raw.corpus)
|
|
462
|
+
throw new UsageError("pass only one of --graph-file or --corpus");
|
|
463
|
+
if (!raw.config)
|
|
464
|
+
throw new UsageError("scoped qa requires --config <path>");
|
|
465
|
+
if (!raw.changed && !raw.baseRef)
|
|
466
|
+
throw new UsageError("scoped qa requires --changed <files> or --base-ref <ref>");
|
|
467
|
+
if (raw.changed && raw.baseRef)
|
|
468
|
+
throw new UsageError("pass only one of --changed or --base-ref");
|
|
469
|
+
if ((raw.propose || raw.pr !== undefined) && !raw.repo) {
|
|
470
|
+
throw new UsageError("--propose / --pr require --repo <owner/name>");
|
|
471
|
+
}
|
|
472
|
+
if (raw.repo && !raw.repo.includes("/"))
|
|
473
|
+
throw new UsageError("--repo must be 'owner/name'");
|
|
474
|
+
return {
|
|
475
|
+
...(raw.graphFile !== undefined ? { graphFile: raw.graphFile } : {}),
|
|
476
|
+
...(raw.corpus !== undefined ? { corpus: raw.corpus } : {}),
|
|
477
|
+
config: raw.config,
|
|
478
|
+
...(raw.changed !== undefined ? { changed: raw.changed } : {}),
|
|
479
|
+
...(raw.baseRef !== undefined ? { baseRef: raw.baseRef } : {}),
|
|
480
|
+
...(raw.url !== undefined ? { url: raw.url } : {}),
|
|
481
|
+
targetName: raw.targetName ?? "local",
|
|
482
|
+
n: raw.n ?? 3,
|
|
483
|
+
...(raw.maxSteps !== undefined ? { maxSteps: raw.maxSteps } : {}),
|
|
484
|
+
outDir: raw.outDir ?? "tests/generated",
|
|
485
|
+
plan: raw.plan ?? false,
|
|
486
|
+
...(raw.concurrency !== undefined ? { concurrency: raw.concurrency } : {}),
|
|
487
|
+
propose: raw.propose,
|
|
488
|
+
...(raw.base !== undefined ? { base: raw.base } : {}),
|
|
489
|
+
...(raw.repo !== undefined ? { repo: raw.repo } : {}),
|
|
490
|
+
...(raw.pr !== undefined ? { pr: raw.pr } : {}),
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
/** Files changed against a git ref, as `git diff --name-only <ref>`. */
|
|
494
|
+
async function gitChangedFiles(baseRef) {
|
|
495
|
+
try {
|
|
496
|
+
const { stdout } = await execFileAsync("git", ["diff", "--name-only", baseRef], { maxBuffer: 16 * 1024 * 1024 });
|
|
497
|
+
return stdout.split("\n").map((s) => s.trim()).filter(Boolean);
|
|
498
|
+
}
|
|
499
|
+
catch (err) {
|
|
500
|
+
throw new UsageError(`git diff --name-only ${baseRef} failed: ${err.message}`);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
/** Map a scoped run into the PR comment input. */
|
|
504
|
+
function toScopedComment(result, changedCount, failureSuggestions = []) {
|
|
505
|
+
const driven = result.driven.map((d) => {
|
|
506
|
+
if (d.error !== undefined)
|
|
507
|
+
return { id: d.capability.id, title: d.capability.title, error: d.error };
|
|
508
|
+
const r = d.result;
|
|
509
|
+
const row = { id: d.capability.id, title: d.capability.title, stable: r.verified };
|
|
510
|
+
if (r.writeBack?.status === "proposed")
|
|
511
|
+
row.writeBackUrl = r.writeBack.url;
|
|
512
|
+
return row;
|
|
513
|
+
});
|
|
514
|
+
return {
|
|
515
|
+
changedCount,
|
|
516
|
+
driven,
|
|
517
|
+
alreadyVerified: result.scope.scoped.filter((s) => s.verified).map((s) => ({ id: s.id, title: s.title })),
|
|
518
|
+
unknown: result.scope.unknown,
|
|
519
|
+
...(failureSuggestions.length > 0 ? { failureSuggestions } : {}),
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
async function runScopedCommand(argv) {
|
|
523
|
+
const args = parseScopedArgs(argv);
|
|
524
|
+
const model = resolveModel();
|
|
525
|
+
const graph = args.graphFile
|
|
526
|
+
? await loadGraphFromFile(args.graphFile)
|
|
527
|
+
: await loadGraphFromCorpus(args.corpus);
|
|
528
|
+
let configText;
|
|
529
|
+
try {
|
|
530
|
+
configText = await readFile(args.config, "utf8");
|
|
531
|
+
}
|
|
532
|
+
catch (err) {
|
|
533
|
+
throw new UsageError(`could not read config '${args.config}': ${err.message}`);
|
|
534
|
+
}
|
|
535
|
+
const config = parseConfig(configText);
|
|
536
|
+
const changedPaths = args.changed ?? (await gitChangedFiles(args.baseRef));
|
|
537
|
+
// A gateway is needed to propose write-backs and/or post the PR comment.
|
|
538
|
+
let gateway;
|
|
539
|
+
if (args.repo) {
|
|
540
|
+
const token = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN;
|
|
541
|
+
if (!token)
|
|
542
|
+
throw new UsageError("--propose / --pr need a GitHub token in GITHUB_TOKEN.");
|
|
543
|
+
const [owner, repo] = args.repo.split("/", 2);
|
|
544
|
+
gateway = new GitHubRestGateway({ owner: owner, repo: repo, token });
|
|
545
|
+
}
|
|
546
|
+
const proposer = args.propose && gateway ? new GitHubWriteBackProposer(gateway) : undefined;
|
|
547
|
+
// Per-capability isolated output so concurrent drives never clobber each other.
|
|
548
|
+
const dirSlug = (id) => id.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "") || "capability";
|
|
549
|
+
const learning = new FileLearningStore();
|
|
550
|
+
const deps = {
|
|
551
|
+
drive: browserDrive(model),
|
|
552
|
+
makeCompiler: (id) => new CodegenCompiler({ outDir: `${args.outDir}/${dirSlug(id)}` }),
|
|
553
|
+
makeRunner: (id) => new PlaywrightRunner({ outputDir: `test-results/${dirSlug(id)}` }),
|
|
554
|
+
learning,
|
|
555
|
+
...(proposer ? { proposer } : {}),
|
|
556
|
+
};
|
|
557
|
+
const result = await runScopedQa(deps, {
|
|
558
|
+
graph,
|
|
559
|
+
config,
|
|
560
|
+
changedPaths,
|
|
561
|
+
targetName: args.targetName,
|
|
562
|
+
...(args.url !== undefined ? { defaultUrl: args.url } : {}),
|
|
563
|
+
n: args.n,
|
|
564
|
+
...(args.maxSteps !== undefined ? { maxSteps: args.maxSteps } : {}),
|
|
565
|
+
...(args.plan ? { plan: true } : {}),
|
|
566
|
+
...(args.concurrency !== undefined ? { concurrency: args.concurrency } : {}),
|
|
567
|
+
...(args.propose ? { propose: { ...(args.base !== undefined ? { baseBranch: args.base } : {}) } } : {}),
|
|
568
|
+
});
|
|
569
|
+
// Failure-learning: surface recorded failure modes in the report (suggest_in_report).
|
|
570
|
+
// The catalog-write strategies write outside the propose-only boundary — deferred.
|
|
571
|
+
const strategy = config.failureLearning ?? "suggest_in_report";
|
|
572
|
+
if (strategy !== "suggest_in_report") {
|
|
573
|
+
process.stderr.write(`note: failureLearning '${strategy}' writes catalog updates outside the propose-only ` +
|
|
574
|
+
"boundary and is not yet wired; surfacing failure modes in the report instead.\n");
|
|
575
|
+
}
|
|
576
|
+
const suggestions = await collectFailureSuggestions(result, learning);
|
|
577
|
+
process.stdout.write(renderScopedQaComment(toScopedComment(result, changedPaths.length, suggestions)) + "\n");
|
|
578
|
+
if (gateway && args.pr !== undefined) {
|
|
579
|
+
await upsertComment(gateway, {
|
|
580
|
+
number: args.pr,
|
|
581
|
+
marker: SCOPED_QA_MARKER,
|
|
582
|
+
body: renderScopedQaComment(toScopedComment(result, changedPaths.length, suggestions)),
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
// Gate red if any touched-and-unverified capability did not become stable.
|
|
586
|
+
const anyUnverified = result.driven.some((d) => d.error !== undefined || d.result?.verified !== true);
|
|
587
|
+
return anyUnverified ? EXIT_UNVERIFIED : EXIT_OK;
|
|
588
|
+
}
|
|
589
|
+
// ---------------------------------------------------------------------------
|
|
590
|
+
// dispatch
|
|
591
|
+
// ---------------------------------------------------------------------------
|
|
592
|
+
/** The package version, read from the bundled package.json (works from dist/ and via tsx). */
|
|
593
|
+
function readVersion() {
|
|
594
|
+
const pkg = JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"));
|
|
595
|
+
return pkg.version ?? "0.0.0";
|
|
596
|
+
}
|
|
597
|
+
export async function main(argv) {
|
|
598
|
+
const [command, ...rest] = argv;
|
|
599
|
+
if (command === "--help" || command === "-h" || command === undefined) {
|
|
600
|
+
process.stdout.write(USAGE);
|
|
601
|
+
return command === undefined ? EXIT_USAGE : EXIT_OK;
|
|
602
|
+
}
|
|
603
|
+
if (command === "--version" || command === "-v") {
|
|
604
|
+
process.stdout.write(readVersion() + "\n");
|
|
605
|
+
return EXIT_OK;
|
|
606
|
+
}
|
|
607
|
+
try {
|
|
608
|
+
switch (command) {
|
|
609
|
+
case "coverage":
|
|
610
|
+
return await runCoverage(rest);
|
|
611
|
+
case "init":
|
|
612
|
+
return await runInit(rest);
|
|
613
|
+
case "qa":
|
|
614
|
+
case "verify":
|
|
615
|
+
return await runQaCommand(rest);
|
|
616
|
+
default:
|
|
617
|
+
process.stderr.write(`unknown command '${command}'\n\n${USAGE}`);
|
|
618
|
+
return EXIT_USAGE;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
catch (err) {
|
|
622
|
+
if (err instanceof UsageError) {
|
|
623
|
+
process.stderr.write(`error: ${err.message}\n\n${USAGE}`);
|
|
624
|
+
return EXIT_USAGE;
|
|
625
|
+
}
|
|
626
|
+
if (err instanceof GraphParseError || err instanceof ConfigParseError) {
|
|
627
|
+
process.stderr.write(`error: ${err.message}\n`);
|
|
628
|
+
return EXIT_USAGE;
|
|
629
|
+
}
|
|
630
|
+
throw err;
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
// Entry point when invoked as a binary — guarded so importing `main` (e.g. in
|
|
634
|
+
// tests) does not execute the CLI.
|
|
635
|
+
import { argv } from "node:process";
|
|
636
|
+
import { fileURLToPath } from "node:url";
|
|
637
|
+
const invokedDirectly = argv[1] !== undefined && fileURLToPath(import.meta.url) === argv[1];
|
|
638
|
+
if (invokedDirectly) {
|
|
639
|
+
main(argv.slice(2))
|
|
640
|
+
.then((code) => {
|
|
641
|
+
process.exitCode = code;
|
|
642
|
+
})
|
|
643
|
+
.catch((err) => {
|
|
644
|
+
process.stderr.write(`fatal: ${err.message}\n`);
|
|
645
|
+
process.exitCode = 1;
|
|
646
|
+
});
|
|
647
|
+
}
|
|
648
|
+
//# sourceMappingURL=cli.js.map
|