@tritard/waterbrother 0.9.8 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -62
- package/package.json +1 -1
- package/src/cli.js +35 -20
- package/src/config.js +1 -0
- package/src/verifier.js +166 -0
- package/src/workflow.js +45 -1
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
#
|
|
1
|
+
#waterbrother
|
|
2
2
|
|
|
3
3
|
A local coding CLI that connects to Grok (`api.x.ai`) with codex/claude-style interactive workflows, local tool calls, session persistence, and approval controls.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
##Web docs interface
|
|
6
6
|
|
|
7
7
|
This repo includes a static docs web interface:
|
|
8
8
|
|
|
@@ -17,18 +17,18 @@ This repo includes a static docs web interface:
|
|
|
17
17
|
|
|
18
18
|
It is Vercel-ready via `vercel.json` (clean URLs, no build step required).
|
|
19
19
|
|
|
20
|
-
##
|
|
20
|
+
##Implemented features
|
|
21
21
|
|
|
22
22
|
- Interactive and one-shot chat modes
|
|
23
23
|
- Codex-style non-interactive commands:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
- `waterbrother exec <prompt>`
|
|
25
|
+
- `waterbrother review <prompt>`
|
|
26
|
+
- `waterbrother resume [session-id] [prompt]`
|
|
27
|
+
- `waterbrother resume --last`
|
|
28
28
|
- First-run onboarding wizard in terminal
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
- asks for API key
|
|
30
|
+
- offers opening `https://console.x.ai/`
|
|
31
|
+
- prompts for default model and agent profile
|
|
32
32
|
- Grok API integration (`/chat/completions`)
|
|
33
33
|
- Vision command for local images: `waterbrother vision <image-path> <prompt>`
|
|
34
34
|
- Authenticated GitHub repo reading for GitHub URLs, including private repos when `gh` is logged in
|
|
@@ -41,33 +41,33 @@ It is Vercel-ready via `vercel.json` (clean URLs, no build step required).
|
|
|
41
41
|
- Local model catalog (`waterbrother models catalog`)
|
|
42
42
|
- Onboarding guide command (`waterbrother onboarding`)
|
|
43
43
|
- Local self-update command (`waterbrother update`)
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
- git-clone installs pull latest source, install deps, and run checks
|
|
45
|
+
- npm installs upgrade with `npm install -g @tritard/waterbrother@latest`
|
|
46
46
|
- Environment diagnostics (`waterbrother doctor`)
|
|
47
47
|
- Tool calling for file, shell, search, and git tasks
|
|
48
48
|
- Diff preview in approval prompts: see exactly what will change before approving file writes and replacements
|
|
49
49
|
- Fuzzy whitespace-tolerant matching in `replace_in_file` to reduce failed edits
|
|
50
50
|
- Shell working directory tracking (`cd` commands update the shell cwd for subsequent calls)
|
|
51
51
|
- Approval policy for mutating/shell tools: `auto`, `on-request`, `never`
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
52
|
+
- supports path-aware allow/ask/deny rules via config
|
|
53
|
+
- supports command-aware shell allow/ask/deny rules via config
|
|
54
|
+
- includes `apply_patch`, `make_directory`, and `restore_checkpoint` in approval-protected actions
|
|
55
|
+
- `restore_checkpoint` is treated as high-risk and always requires explicit approval
|
|
56
|
+
- On-request prompt supports keyboard-first actions:
|
|
57
|
+
- `↑/↓` changes the highlighted approval row
|
|
58
|
+
- `Enter` or `y` approve once
|
|
59
|
+
- `p` saves a session approval rule for the current shell-command prefix or tool
|
|
60
|
+
- `Esc` denies and optionally provides alternate guidance
|
|
61
|
+
- the chooser renders in a bordered block with the default action highlighted
|
|
62
|
+
- the footer shows a short rules indicator when session approval rules are active
|
|
63
63
|
- AI-powered commit command: `waterbrother commit [--push]`
|
|
64
64
|
- Split config layers:
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
- user config (`~/.waterbrother/config.json`)
|
|
66
|
+
- project overrides (`.waterbrother/config.json`)
|
|
67
67
|
- Session persistence (`~/.waterbrother/sessions/*.json`)
|
|
68
68
|
- Two-tier project memory:
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
- global instructions (`~/.waterbrother/WATERBROTHER.md`)
|
|
70
|
+
- project instructions (`WATERBROTHER.md`) — both merged into system prompt
|
|
71
71
|
- Accurate token tracking using API usage data when available (falls back to estimation)
|
|
72
72
|
- Manual compaction command (`/compact`) for long-running sessions
|
|
73
73
|
- Session forking with `/fork`
|
|
@@ -76,39 +76,39 @@ It is Vercel-ready via `vercel.json` (clean URLs, no build step required).
|
|
|
76
76
|
- Git-backed local checkpoints with restore support (`/checkpoints`, `/rewind [id]`)
|
|
77
77
|
- Deterministic patch application tool (`apply_patch`) with preflight validation
|
|
78
78
|
- Turn contracts (new)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
- agent must declare intended scope before edits or risky shell calls
|
|
80
|
+
- contract includes summary, allowed paths, expected commands, and verification commands
|
|
81
|
+
- runtime blocks out-of-scope mutations automatically
|
|
82
82
|
- Turn receipts (new)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
83
|
+
- every tool-heavy or mutating turn writes a local receipt under `.waterbrother/receipts/`
|
|
84
|
+
- receipt captures contract, files touched, checkpoint, verification results, and command/tool provenance
|
|
85
|
+
- inspect with `/receipts`, `/receipt last`, `/receipt <id>`
|
|
86
|
+
- summary printing is configurable with `receiptMode` / `--receipts auto|off|verbose`
|
|
87
|
+
- default `auto` suppresses noisy receipt lines for minimal read-only turns
|
|
88
88
|
- Automatic post-edit verification
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
- verification commands from the turn contract run automatically after edits
|
|
90
|
+
- optional default verification commands available through config
|
|
91
91
|
- Auto-compaction near context limits (`autoCompactThreshold`, default `0.9`)
|
|
92
92
|
- Interactive slash controls with command palette
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
- `/` opens command menu
|
|
94
|
+
- `↑/↓` changes selection
|
|
95
|
+
- `Enter` accepts selected command
|
|
96
96
|
- Read-only file tools can inspect common home folders such as ~/Desktop, ~/Downloads, and ~/Documents without falling back to shell; /desktop, /downloads, and /documents are treated as aliases for those locations on macOS
|
|
97
97
|
- Turn presentation improvements
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
98
|
+
- streaming assistant output for faster perceived response
|
|
99
|
+
- explicit run-state tracking (`planning`, `reading`, `editing`, `running`, `reviewing`, `done`, `error`) persisted in session metadata
|
|
100
|
+
- heartbeat/stuck detection with interrupt hint during long-running steps
|
|
101
|
+
- spinner/progress animation while model or tools are running
|
|
102
|
+
- live visible trace lines during turns for phases like thinking and tool use, with verbose-only run-state heartbeat details
|
|
103
|
+
- per-turn summary with duration, tool outcomes, and token usage when available
|
|
104
|
+
- compact trace grouping so tool retries do not spam the status line
|
|
105
|
+
- formatted code-fence rendering with line numbers
|
|
106
106
|
- Headless pipe mode for one-shot automation:
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
- `-p` reads prompt from stdin or `--prompt`
|
|
108
|
+
- `--output-format text|json|stream-json`
|
|
109
109
|
- Production-readiness tracking page for the active P0/P1/P2 release matrix
|
|
110
110
|
|
|
111
|
-
##
|
|
111
|
+
##Quick start
|
|
112
112
|
|
|
113
113
|
User install:
|
|
114
114
|
|
|
@@ -157,10 +157,10 @@ waterbrother vision ./mockup.png "Suggest concrete CSS and layout improvements"
|
|
|
157
157
|
Git workflow:
|
|
158
158
|
|
|
159
159
|
```bash
|
|
160
|
-
waterbrother commit
|
|
161
|
-
waterbrother commit --push
|
|
162
|
-
waterbrother pr
|
|
163
|
-
waterbrother pr --branch=my-feat
|
|
160
|
+
waterbrother commit # stage, diff, generate commit message, confirm
|
|
161
|
+
waterbrother commit --push # same as above, then push
|
|
162
|
+
waterbrother pr # commit, push, generate PR title+body, create via gh
|
|
163
|
+
waterbrother pr --branch=my-feat # create branch first if on main, then PR
|
|
164
164
|
```
|
|
165
165
|
|
|
166
166
|
Utility commands:
|
|
@@ -180,13 +180,13 @@ Web research examples:
|
|
|
180
180
|
waterbrother "Read https://console.x.ai and summarize how to create an API key"
|
|
181
181
|
waterbrother "Search the web for the latest xAI API docs about vision support and cite the sources"
|
|
182
182
|
|
|
183
|
-
#
|
|
183
|
+
#interactive
|
|
184
184
|
/read https://console.x.ai/
|
|
185
185
|
/search latest xAI vision docs
|
|
186
186
|
/open 1
|
|
187
187
|
```
|
|
188
188
|
|
|
189
|
-
##
|
|
189
|
+
##Release flow
|
|
190
190
|
|
|
191
191
|
Partners should ship updates by pushing a version tag, not by running `npm publish` locally.
|
|
192
192
|
|
|
@@ -218,7 +218,7 @@ This lets partners such as Umair and Austin ship releases without using the publ
|
|
|
218
218
|
Long-running session controls:
|
|
219
219
|
|
|
220
220
|
```bash
|
|
221
|
-
#
|
|
221
|
+
#inside interactive mode
|
|
222
222
|
/compact
|
|
223
223
|
/compact 32
|
|
224
224
|
/cost
|
|
@@ -236,7 +236,7 @@ Long-running session controls:
|
|
|
236
236
|
/memory add Always run tests before final answer.
|
|
237
237
|
/memory reload
|
|
238
238
|
|
|
239
|
-
#
|
|
239
|
+
#config tuning
|
|
240
240
|
waterbrother config set autoCompactThreshold 0.9
|
|
241
241
|
waterbrother config set traceMode verbose
|
|
242
242
|
waterbrother config set receiptMode verbose
|
|
@@ -267,11 +267,11 @@ waterbrother config set-json mcpServers '{"filesystem":{"command":"npx","args":[
|
|
|
267
267
|
waterbrother mcp list
|
|
268
268
|
```
|
|
269
269
|
|
|
270
|
-
##
|
|
270
|
+
##Task console
|
|
271
271
|
|
|
272
272
|
Waterbrother treats serious work as **tasks**, not chat turns.
|
|
273
273
|
|
|
274
|
-
###
|
|
274
|
+
###Commands
|
|
275
275
|
|
|
276
276
|
| Command | Description |
|
|
277
277
|
|---------|-------------|
|
|
@@ -290,7 +290,7 @@ Waterbrother treats serious work as **tasks**, not chat turns.
|
|
|
290
290
|
| `/close` | Close the active task |
|
|
291
291
|
| `/panel` | Show/toggle operator panel |
|
|
292
292
|
|
|
293
|
-
###
|
|
293
|
+
###Typical flow
|
|
294
294
|
|
|
295
295
|
```
|
|
296
296
|
/feature auth-rework
|
|
@@ -308,3 +308,4 @@ Supported in this release:
|
|
|
308
308
|
- automatic tool discovery at startup
|
|
309
309
|
- tool routing through normal approval + trace flow
|
|
310
310
|
- interactive inspection with `/mcp`
|
|
311
|
+
|
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -5987,6 +5987,26 @@ async function promptLoop(agent, session, context) {
|
|
|
5987
5987
|
onAssistant() {
|
|
5988
5988
|
markProgress();
|
|
5989
5989
|
turnSummary.events.push({ at: Date.now(), name: "responded" });
|
|
5990
|
+
},
|
|
5991
|
+
onVerifyStart(count) {
|
|
5992
|
+
markProgress();
|
|
5993
|
+
printRailTransition("verifying");
|
|
5994
|
+
spinner.setLabel(`running ${count} verifier${count > 1 ? "s" : ""}...`);
|
|
5995
|
+
},
|
|
5996
|
+
onVerifyResult(result) {
|
|
5997
|
+
markProgress();
|
|
5998
|
+
const icon = result.ok ? green("✓") : red("✗");
|
|
5999
|
+
const count = result.issueCount !== null ? ` (${result.issueCount} issues)` : "";
|
|
6000
|
+
console.log(`${dim(" ▸")} ${icon} ${result.name}${count}`);
|
|
6001
|
+
},
|
|
6002
|
+
onAutofixStart() {
|
|
6003
|
+
markProgress();
|
|
6004
|
+
printRailTransition("fixing");
|
|
6005
|
+
spinner.setLabel("autofixing...");
|
|
6006
|
+
},
|
|
6007
|
+
onAutofixEnd() {
|
|
6008
|
+
markProgress();
|
|
6009
|
+
spinner.setLabel("re-verifying...");
|
|
5990
6010
|
}
|
|
5991
6011
|
}
|
|
5992
6012
|
});
|
|
@@ -5996,7 +6016,7 @@ async function promptLoop(agent, session, context) {
|
|
|
5996
6016
|
spinner.stop();
|
|
5997
6017
|
|
|
5998
6018
|
// Verifying phase (impact + sentinel happened inside runBuildWorkflow)
|
|
5999
|
-
if (buildResult.receipt?.mutated) {
|
|
6019
|
+
if (buildResult.receipt?.mutated && !buildResult.verifierResults) {
|
|
6000
6020
|
printRailTransition("verifying");
|
|
6001
6021
|
}
|
|
6002
6022
|
if (buildResult.review) {
|
|
@@ -6028,6 +6048,11 @@ async function promptLoop(agent, session, context) {
|
|
|
6028
6048
|
}
|
|
6029
6049
|
}
|
|
6030
6050
|
|
|
6051
|
+
// Verifiers
|
|
6052
|
+
if (buildResult.verifierSummary) {
|
|
6053
|
+
lines.push(`${dim("verifiers:")} ${buildResult.verifierSummary}`);
|
|
6054
|
+
}
|
|
6055
|
+
|
|
6031
6056
|
// Impact
|
|
6032
6057
|
if (buildResult.impactSummary) {
|
|
6033
6058
|
const is = buildResult.impactSummary;
|
|
@@ -6217,25 +6242,15 @@ async function promptLoop(agent, session, context) {
|
|
|
6217
6242
|
continue;
|
|
6218
6243
|
}
|
|
6219
6244
|
|
|
6220
|
-
// Extract flags
|
|
6245
|
+
// Extract flags
|
|
6246
|
+
const metricMatch = rawArgs.match(/--metric\s+"([^"]+)"|--metric\s+(\S+)/);
|
|
6221
6247
|
const attemptsMatch = rawArgs.match(/--attempts\s+(\d+)/);
|
|
6222
6248
|
const timeMatch = rawArgs.match(/--time\s+(\d+)/);
|
|
6223
|
-
|
|
6224
|
-
|
|
6225
|
-
|
|
6226
|
-
|
|
6227
|
-
|
|
6228
|
-
const afterMetric = cleaned.slice(metricIdx + 8).trim();
|
|
6229
|
-
// If quoted, take the quoted content; otherwise take everything until end
|
|
6230
|
-
if (afterMetric.startsWith('"')) {
|
|
6231
|
-
const endQuote = afterMetric.indexOf('"', 1);
|
|
6232
|
-
metricCmd = endQuote > 0 ? afterMetric.slice(1, endQuote) : afterMetric.slice(1);
|
|
6233
|
-
} else {
|
|
6234
|
-
metricCmd = afterMetric;
|
|
6235
|
-
}
|
|
6236
|
-
cleaned = cleaned.slice(0, metricIdx).trim();
|
|
6237
|
-
}
|
|
6238
|
-
const goalArg = cleaned.trim();
|
|
6249
|
+
const goalArg = rawArgs
|
|
6250
|
+
.replace(/--metric\s+"[^"]+"|--metric\s+\S+/g, "")
|
|
6251
|
+
.replace(/--attempts\s+\d+/g, "")
|
|
6252
|
+
.replace(/--time\s+\d+/g, "")
|
|
6253
|
+
.trim();
|
|
6239
6254
|
|
|
6240
6255
|
if (!goalArg) {
|
|
6241
6256
|
console.log("experiment needs a goal");
|
|
@@ -6244,8 +6259,8 @@ async function promptLoop(agent, session, context) {
|
|
|
6244
6259
|
|
|
6245
6260
|
const charter = parseCharterFromGoal(goalArg);
|
|
6246
6261
|
|
|
6247
|
-
if (
|
|
6248
|
-
charter.metric.command =
|
|
6262
|
+
if (metricMatch) {
|
|
6263
|
+
charter.metric.command = (metricMatch[1] || metricMatch[2]).trim();
|
|
6249
6264
|
}
|
|
6250
6265
|
if (attemptsMatch) {
|
|
6251
6266
|
charter.budget.maxAttempts = parseInt(attemptsMatch[1], 10);
|
package/src/config.js
CHANGED
|
@@ -198,6 +198,7 @@ export function resolveRuntimeConfig(config, overrides = {}) {
|
|
|
198
198
|
: true,
|
|
199
199
|
decisionModel: overrides.decisionModel || config.decisionModel || "",
|
|
200
200
|
plannerModel: overrides.plannerModel || config.plannerModel || "",
|
|
201
|
+
verifiers: Array.isArray(overrides.verifiers) ? overrides.verifiers : Array.isArray(config.verifiers) ? config.verifiers : [],
|
|
201
202
|
taskDefaults: normalizeTaskDefaults(
|
|
202
203
|
overrides.taskDefaults !== undefined ? overrides.taskDefaults : config.taskDefaults
|
|
203
204
|
),
|
package/src/verifier.js
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { promisify } from "node:util";
|
|
3
|
+
|
|
4
|
+
const execFileAsync = promisify(execFile);
|
|
5
|
+
const MAX_OUTPUT_CHARS = 3000;
|
|
6
|
+
const MAX_AUTOFIX_ATTEMPTS = 2;
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Verifier config shape:
|
|
10
|
+
* {
|
|
11
|
+
* verifiers: [
|
|
12
|
+
* { command: "npx eslint . --format json", name: "eslint", autofix: true },
|
|
13
|
+
* { command: "npx tsc --noEmit", name: "typescript", autofix: false },
|
|
14
|
+
* { command: "npm audit --json", name: "security", autofix: false }
|
|
15
|
+
* ]
|
|
16
|
+
* }
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
async function runCommand(command, cwd) {
|
|
20
|
+
const isWin = process.platform === "win32";
|
|
21
|
+
const opts = { cwd, env: process.env, maxBuffer: 8 * 1024 * 1024, timeout: 120000 };
|
|
22
|
+
try {
|
|
23
|
+
let stdout, stderr;
|
|
24
|
+
if (isWin) {
|
|
25
|
+
const result = await execFileAsync("powershell.exe", ["-NoProfile", "-Command", command], opts);
|
|
26
|
+
stdout = String(result.stdout || "");
|
|
27
|
+
stderr = String(result.stderr || "");
|
|
28
|
+
} else {
|
|
29
|
+
const result = await execFileAsync("/bin/sh", ["-c", command], opts);
|
|
30
|
+
stdout = String(result.stdout || "");
|
|
31
|
+
stderr = String(result.stderr || "");
|
|
32
|
+
}
|
|
33
|
+
return { ok: true, stdout, stderr, exitCode: 0 };
|
|
34
|
+
} catch (error) {
|
|
35
|
+
return {
|
|
36
|
+
ok: false,
|
|
37
|
+
stdout: String(error.stdout || ""),
|
|
38
|
+
stderr: String(error.stderr || error.message || ""),
|
|
39
|
+
exitCode: error.code || 1
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function parseIssueCount(output) {
|
|
45
|
+
// Try common patterns: "X errors", "X warnings", "X problems", "X issues"
|
|
46
|
+
const match = output.match(/(\d+)\s+(error|warning|problem|issue|vulnerabilit)/i);
|
|
47
|
+
return match ? parseInt(match[1], 10) : null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export async function runVerifiers({ verifiers, cwd }) {
|
|
51
|
+
if (!Array.isArray(verifiers) || verifiers.length === 0) return [];
|
|
52
|
+
|
|
53
|
+
const results = [];
|
|
54
|
+
for (const v of verifiers) {
|
|
55
|
+
const name = v.name || v.command.split(/\s+/)[0];
|
|
56
|
+
const result = await runCommand(v.command, cwd);
|
|
57
|
+
const combined = `${result.stdout}\n${result.stderr}`.trim();
|
|
58
|
+
const issueCount = parseIssueCount(combined);
|
|
59
|
+
|
|
60
|
+
results.push({
|
|
61
|
+
name,
|
|
62
|
+
command: v.command,
|
|
63
|
+
ok: result.ok,
|
|
64
|
+
exitCode: result.exitCode,
|
|
65
|
+
output: combined.slice(0, MAX_OUTPUT_CHARS),
|
|
66
|
+
issueCount,
|
|
67
|
+
autofix: v.autofix === true
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return results;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function formatVerifierResults(results) {
|
|
75
|
+
if (!results || results.length === 0) return "";
|
|
76
|
+
const lines = [];
|
|
77
|
+
for (const r of results) {
|
|
78
|
+
const icon = r.ok ? "✓" : "✗";
|
|
79
|
+
const count = r.issueCount !== null ? ` (${r.issueCount} issues)` : "";
|
|
80
|
+
lines.push(`${icon} ${r.name}${count}`);
|
|
81
|
+
}
|
|
82
|
+
return lines.join(" ");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function formatVerifierResultsForModel(results) {
|
|
86
|
+
if (!results || results.length === 0) return "";
|
|
87
|
+
const lines = ["Verifier results (fix any issues before continuing):"];
|
|
88
|
+
for (const r of results) {
|
|
89
|
+
if (r.ok) {
|
|
90
|
+
lines.push(`✓ ${r.name}: clean`);
|
|
91
|
+
} else {
|
|
92
|
+
lines.push(`✗ ${r.name}: ${r.output.slice(0, 1000)}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return lines.join("\n");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function hasFailures(results) {
|
|
99
|
+
return results.some((r) => !r.ok);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export function getAutofixableFailures(results) {
|
|
103
|
+
return results.filter((r) => !r.ok && r.autofix);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function buildAutofixPrompt(failures) {
|
|
107
|
+
const lines = ["Fix the following issues. Only fix what the tools reported — do not make other changes."];
|
|
108
|
+
for (const f of failures) {
|
|
109
|
+
lines.push(`\n--- ${f.name} (${f.command}) ---`);
|
|
110
|
+
lines.push(f.output.slice(0, 1500));
|
|
111
|
+
}
|
|
112
|
+
return lines.join("\n");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Run verifiers, optionally autofix, return final results.
|
|
117
|
+
*
|
|
118
|
+
* handlers: {
|
|
119
|
+
* onVerifyStart(verifierCount) — verification starting
|
|
120
|
+
* onVerifyResult(result) — single verifier finished
|
|
121
|
+
* onAutofixStart(failures) — about to autofix
|
|
122
|
+
* onAutofixEnd() — autofix complete
|
|
123
|
+
* executeAutofix(prompt) — run the model to fix issues
|
|
124
|
+
* }
|
|
125
|
+
*/
|
|
126
|
+
export async function runVerificationPass({ verifiers, cwd, handlers = {} }) {
|
|
127
|
+
if (!Array.isArray(verifiers) || verifiers.length === 0) {
|
|
128
|
+
return { results: [], fixed: false, attempts: 0 };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (handlers.onVerifyStart) handlers.onVerifyStart(verifiers.length);
|
|
132
|
+
|
|
133
|
+
let results = await runVerifiers({ verifiers, cwd });
|
|
134
|
+
let attempts = 0;
|
|
135
|
+
|
|
136
|
+
// Autofix loop
|
|
137
|
+
while (hasFailures(results) && attempts < MAX_AUTOFIX_ATTEMPTS) {
|
|
138
|
+
const fixable = getAutofixableFailures(results);
|
|
139
|
+
if (fixable.length === 0) break;
|
|
140
|
+
|
|
141
|
+
attempts++;
|
|
142
|
+
if (handlers.onAutofixStart) handlers.onAutofixStart(fixable);
|
|
143
|
+
|
|
144
|
+
const prompt = buildAutofixPrompt(fixable);
|
|
145
|
+
if (handlers.executeAutofix) {
|
|
146
|
+
try {
|
|
147
|
+
await handlers.executeAutofix(prompt);
|
|
148
|
+
} catch {
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (handlers.onAutofixEnd) handlers.onAutofixEnd();
|
|
156
|
+
|
|
157
|
+
// Re-run verifiers to check if fixes worked
|
|
158
|
+
results = await runVerifiers({ verifiers, cwd });
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
for (const r of results) {
|
|
162
|
+
if (handlers.onVerifyResult) handlers.onVerifyResult(r);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return { results, fixed: attempts > 0, attempts };
|
|
166
|
+
}
|
package/src/workflow.js
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
shouldRunFrontendReview
|
|
16
16
|
} from "./frontend.js";
|
|
17
17
|
import { runPlannerPass, formatPlanForExecutor, formatPlanForDisplay } from "./planner.js";
|
|
18
|
+
import { runVerificationPass, formatVerifierResults, hasFailures } from "./verifier.js";
|
|
18
19
|
|
|
19
20
|
export async function runBuildWorkflow({
|
|
20
21
|
agent,
|
|
@@ -99,6 +100,47 @@ export async function runBuildWorkflow({
|
|
|
99
100
|
return { response, receipt: null, impact: null, review: null };
|
|
100
101
|
}
|
|
101
102
|
|
|
103
|
+
// Verification pass: run configured linters/analyzers, autofix if possible
|
|
104
|
+
const verifiers = context.runtime?.verifiers;
|
|
105
|
+
let verifierResults = null;
|
|
106
|
+
if (receipt.mutated && Array.isArray(verifiers) && verifiers.length > 0) {
|
|
107
|
+
try {
|
|
108
|
+
const vResult = await runVerificationPass({
|
|
109
|
+
verifiers,
|
|
110
|
+
cwd: context.cwd,
|
|
111
|
+
handlers: {
|
|
112
|
+
onVerifyStart(count) {
|
|
113
|
+
if (handlers.onStateChange) handlers.onStateChange("verifying");
|
|
114
|
+
if (handlers.onVerifyStart) handlers.onVerifyStart(count);
|
|
115
|
+
},
|
|
116
|
+
onVerifyResult(result) {
|
|
117
|
+
if (handlers.onVerifyResult) handlers.onVerifyResult(result);
|
|
118
|
+
},
|
|
119
|
+
onAutofixStart(failures) {
|
|
120
|
+
if (handlers.onStateChange) handlers.onStateChange("fixing");
|
|
121
|
+
if (handlers.onAutofixStart) handlers.onAutofixStart(failures);
|
|
122
|
+
},
|
|
123
|
+
onAutofixEnd() {
|
|
124
|
+
if (handlers.onAutofixEnd) handlers.onAutofixEnd();
|
|
125
|
+
},
|
|
126
|
+
async executeAutofix(prompt) {
|
|
127
|
+
response = await agent.runBuildTurn(prompt, handlers);
|
|
128
|
+
const fixReceipt = await agent.toolRuntime.completeTurn({ signal: handlers.signal });
|
|
129
|
+
if (fixReceipt) {
|
|
130
|
+
// Merge fix receipt into main receipt
|
|
131
|
+
receipt.changedFiles = [...new Set([...(receipt.changedFiles || []), ...(fixReceipt.changedFiles || [])])];
|
|
132
|
+
if (fixReceipt.diff) receipt.diff = (receipt.diff || "") + "\n" + fixReceipt.diff;
|
|
133
|
+
if (fixReceipt.diffStat) receipt.diffStat = fixReceipt.diffStat;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
verifierResults = vResult.results;
|
|
139
|
+
} catch {
|
|
140
|
+
// Verification failure is non-fatal
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
102
144
|
async function analyze(activeReceipt, activeResponse) {
|
|
103
145
|
let impact = null;
|
|
104
146
|
if (activeReceipt.mutated && context.runtime.impact?.enabled !== false) {
|
|
@@ -276,7 +318,9 @@ export async function runBuildWorkflow({
|
|
|
276
318
|
review,
|
|
277
319
|
designReview,
|
|
278
320
|
screenshotReview,
|
|
279
|
-
impactSummary: impact ? summarizeImpactMap(impact) : null
|
|
321
|
+
impactSummary: impact ? summarizeImpactMap(impact) : null,
|
|
322
|
+
verifierResults,
|
|
323
|
+
verifierSummary: verifierResults ? formatVerifierResults(verifierResults) : null
|
|
280
324
|
};
|
|
281
325
|
}
|
|
282
326
|
|