@slowcook-ai/cli 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +7 -1
- package/dist/cli.js.map +1 -1
- package/dist/commands/brew/agent.d.ts +49 -0
- package/dist/commands/brew/agent.d.ts.map +1 -0
- package/dist/commands/brew/agent.js +741 -0
- package/dist/commands/brew/agent.js.map +1 -0
- package/dist/commands/brew/halt.d.ts +51 -0
- package/dist/commands/brew/halt.d.ts.map +1 -0
- package/dist/commands/brew/halt.js +133 -0
- package/dist/commands/brew/halt.js.map +1 -0
- package/dist/commands/brew/index.d.ts +2 -0
- package/dist/commands/brew/index.d.ts.map +1 -0
- package/dist/commands/brew/index.js +192 -0
- package/dist/commands/brew/index.js.map +1 -0
- package/dist/commands/brew/prompts.d.ts +108 -0
- package/dist/commands/brew/prompts.d.ts.map +1 -0
- package/dist/commands/brew/prompts.js +174 -0
- package/dist/commands/brew/prompts.js.map +1 -0
- package/dist/commands/init/templates.d.ts +1 -1
- package/dist/commands/init/templates.js +1 -1
- package/package.json +3 -3
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, statSync, } from "node:fs";
|
|
2
|
+
import { join, resolve, relative, isAbsolute, dirname } from "node:path";
|
|
3
|
+
import { execSync } from "node:child_process";
|
|
4
|
+
import YAML from "yaml";
|
|
5
|
+
import { runTests, validateStackConfig, } from "@slowcook-ai/stack-ts";
|
|
6
|
+
import { readSpec } from "../refine/spec-yaml.js";
|
|
7
|
+
import { BREW_SYSTEM, BREW_TOOLS, turnPrompt, } from "./prompts.js";
|
|
8
|
+
import { writeHaltReport, haltReportToMarkdown, defaultSuggestedActions, } from "./halt.js";
|
|
9
|
+
const DIFF_LINE_CAP = 200;
|
|
10
|
+
const DIFF_FILE_CAP = 5;
|
|
11
|
+
const STAGNATION_CAP = 15;
|
|
12
|
+
const PRICING_PER_M_TOKENS = {
|
|
13
|
+
"claude-opus-4-7": { input: 15, output: 75 },
|
|
14
|
+
"claude-sonnet-4-5": { input: 3, output: 15 },
|
|
15
|
+
"claude-haiku-4-5": { input: 0.8, output: 4 },
|
|
16
|
+
};
|
|
17
|
+
export async function runBrew(ctx) {
|
|
18
|
+
const startMs = ctx.now().getTime();
|
|
19
|
+
const manifestPath = join(ctx.repoRoot, ".brewing/manifests", `story-${ctx.storyId}.json`);
|
|
20
|
+
if (!existsSync(manifestPath)) {
|
|
21
|
+
return haltFor(ctx, {
|
|
22
|
+
reason: "TEST_RUNNER_BROKEN",
|
|
23
|
+
iterations: 0,
|
|
24
|
+
checkpoints: 0,
|
|
25
|
+
greenCount: 0,
|
|
26
|
+
totalCount: 0,
|
|
27
|
+
spendUsd: 0,
|
|
28
|
+
summary: `No manifest found at \`.brewing/manifests/story-${ctx.storyId}.json\`. Run testgen first.`,
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
const manifest = JSON.parse(readFileSync(manifestPath, "utf8"));
|
|
32
|
+
const expectedTestIds = new Set(manifest.tests.map((t) => t.id));
|
|
33
|
+
// Baseline: run tests once to see starting state
|
|
34
|
+
console.log("→ baseline test run…");
|
|
35
|
+
const baseline = runTestSuite(ctx);
|
|
36
|
+
if (!baseline.ran) {
|
|
37
|
+
return haltFor(ctx, {
|
|
38
|
+
reason: "TEST_RUNNER_BROKEN",
|
|
39
|
+
iterations: 0,
|
|
40
|
+
checkpoints: 0,
|
|
41
|
+
greenCount: 0,
|
|
42
|
+
totalCount: expectedTestIds.size,
|
|
43
|
+
spendUsd: 0,
|
|
44
|
+
summary: `Test runner failed to produce usable output on the baseline run. Error: ${baseline.error ?? "(unknown)"}. Fix the runner before brewing.`,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
let greenSet = new Set(baseline.tests.filter((t) => t.status === "passed").map((t) => t.id));
|
|
48
|
+
let redSet = new Set(baseline.tests.filter((t) => t.status !== "passed").map((t) => t.id));
|
|
49
|
+
console.log(`→ baseline: ${greenSet.size} green, ${redSet.size} red / ${baseline.tests.length} total`);
|
|
50
|
+
if (redSet.size === 0) {
|
|
51
|
+
return {
|
|
52
|
+
kind: "success",
|
|
53
|
+
iterations: 0,
|
|
54
|
+
checkpoints: 0,
|
|
55
|
+
spendUsd: 0,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
// Story-scoped target pool: only consider red tests from this story's manifest
|
|
59
|
+
const storyRedSet = () => new Set([...redSet].filter((t) => expectedTestIds.has(t)));
|
|
60
|
+
let spendUsd = 0;
|
|
61
|
+
let stagnation = 0;
|
|
62
|
+
const iterationLogs = [];
|
|
63
|
+
const priorAttempts = [];
|
|
64
|
+
let currentTarget = pickTarget(storyRedSet(), null);
|
|
65
|
+
if (!currentTarget) {
|
|
66
|
+
return haltFor(ctx, {
|
|
67
|
+
reason: "TESTS_NEVER_GREEN",
|
|
68
|
+
iterations: 0,
|
|
69
|
+
checkpoints: 0,
|
|
70
|
+
greenCount: greenSet.size,
|
|
71
|
+
totalCount: expectedTestIds.size,
|
|
72
|
+
spendUsd,
|
|
73
|
+
summary: `No red tests for story-${ctx.storyId} found in baseline. Either the story's tests are passing already (nothing to brew), or the manifest doesn't match what vitest discovers. Check the story's manifest vs actual test file.`,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
for (let iteration = 1; iteration <= ctx.maxIterations; iteration++) {
|
|
77
|
+
console.log(`\n=== iteration ${iteration}/${ctx.maxIterations} — target: ${currentTarget} ===`);
|
|
78
|
+
// Budget + time checks before spending
|
|
79
|
+
if (spendUsd >= ctx.budgetUsd) {
|
|
80
|
+
return haltFor(ctx, {
|
|
81
|
+
reason: "BUDGET_EXHAUSTED",
|
|
82
|
+
iterations: iteration - 1,
|
|
83
|
+
checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
|
|
84
|
+
greenCount: greenSet.size,
|
|
85
|
+
totalCount: expectedTestIds.size,
|
|
86
|
+
spendUsd,
|
|
87
|
+
iterationLogs,
|
|
88
|
+
summary: `Spent $${spendUsd.toFixed(2)} of $${ctx.budgetUsd.toFixed(2)} budget across ${iteration - 1} iterations. ${iterationLogs.filter((l) => l.outcome === "checkpoint").length} checkpoints advanced the green set. ${generateDiagnosis(iterationLogs, greenSet, expectedTestIds)}`,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
if (ctx.now().getTime() - startMs > ctx.wallClockMs) {
|
|
92
|
+
return haltFor(ctx, {
|
|
93
|
+
reason: "WALL_CLOCK",
|
|
94
|
+
iterations: iteration - 1,
|
|
95
|
+
checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
|
|
96
|
+
greenCount: greenSet.size,
|
|
97
|
+
totalCount: expectedTestIds.size,
|
|
98
|
+
spendUsd,
|
|
99
|
+
iterationLogs,
|
|
100
|
+
summary: `Wall-clock budget exceeded after ${iteration - 1} iterations.`,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
if (stagnation >= STAGNATION_CAP) {
|
|
104
|
+
return haltFor(ctx, {
|
|
105
|
+
reason: "STAGNATION_CAP",
|
|
106
|
+
iterations: iteration - 1,
|
|
107
|
+
checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
|
|
108
|
+
greenCount: greenSet.size,
|
|
109
|
+
totalCount: expectedTestIds.size,
|
|
110
|
+
spendUsd,
|
|
111
|
+
iterationLogs,
|
|
112
|
+
summary: `${STAGNATION_CAP} consecutive iterations made no progress. ${generateDiagnosis(iterationLogs, greenSet, expectedTestIds)}`,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
// Snapshot before turn (for revert)
|
|
116
|
+
const snapshot = snapshotAllowedPaths(ctx);
|
|
117
|
+
// Run one agent turn
|
|
118
|
+
const turnResult = await runTurn(ctx, {
|
|
119
|
+
iteration,
|
|
120
|
+
target: currentTarget,
|
|
121
|
+
greenList: [...greenSet],
|
|
122
|
+
redList: [...redSet],
|
|
123
|
+
priorAttempts,
|
|
124
|
+
spendUsd,
|
|
125
|
+
});
|
|
126
|
+
spendUsd += turnResult.spendDelta;
|
|
127
|
+
if (turnResult.filesTouched.length === 0 && !turnResult.overflowJustification) {
|
|
128
|
+
// Agent did nothing. Log and continue.
|
|
129
|
+
iterationLogs.push({
|
|
130
|
+
iteration,
|
|
131
|
+
target_test_id: currentTarget,
|
|
132
|
+
outcome: "reverted-no-progress",
|
|
133
|
+
note: "agent made no edits this turn",
|
|
134
|
+
files_touched: [],
|
|
135
|
+
lines_added: 0,
|
|
136
|
+
lines_removed: 0,
|
|
137
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
138
|
+
rationale: turnResult.rationale,
|
|
139
|
+
});
|
|
140
|
+
priorAttempts.push({
|
|
141
|
+
iteration,
|
|
142
|
+
outcome: "reverted-no-progress",
|
|
143
|
+
note: "agent made no edits",
|
|
144
|
+
files_touched: [],
|
|
145
|
+
});
|
|
146
|
+
stagnation += 1;
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
// Constraint checks on the applied diff
|
|
150
|
+
const diff = computeDiff(snapshot);
|
|
151
|
+
const frozenHit = diff.changedPaths.find((p) => isFrozenPath(p, ctx.frozenPaths));
|
|
152
|
+
if (frozenHit) {
|
|
153
|
+
revertToSnapshot(ctx, snapshot);
|
|
154
|
+
iterationLogs.push({
|
|
155
|
+
iteration,
|
|
156
|
+
target_test_id: currentTarget,
|
|
157
|
+
outcome: "rejected-frozen-path",
|
|
158
|
+
note: `agent wrote to frozen path: ${frozenHit}`,
|
|
159
|
+
files_touched: diff.changedPaths,
|
|
160
|
+
lines_added: diff.linesAdded,
|
|
161
|
+
lines_removed: diff.linesRemoved,
|
|
162
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
163
|
+
rationale: turnResult.rationale,
|
|
164
|
+
});
|
|
165
|
+
priorAttempts.push({
|
|
166
|
+
iteration,
|
|
167
|
+
outcome: "reverted-no-progress",
|
|
168
|
+
note: `rejected: wrote to frozen path ${frozenHit}`,
|
|
169
|
+
files_touched: diff.changedPaths,
|
|
170
|
+
});
|
|
171
|
+
stagnation += 1;
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
const scopeHit = diff.changedPaths.find((p) => !isAllowedPath(p, ctx.allowedPaths) &&
|
|
175
|
+
// always allow reading — but write outside allowed_paths is rejected
|
|
176
|
+
true);
|
|
177
|
+
if (scopeHit && ctx.allowedPaths.length > 0) {
|
|
178
|
+
revertToSnapshot(ctx, snapshot);
|
|
179
|
+
iterationLogs.push({
|
|
180
|
+
iteration,
|
|
181
|
+
target_test_id: currentTarget,
|
|
182
|
+
outcome: "rejected-frozen-path",
|
|
183
|
+
note: `agent wrote outside allowed_paths: ${scopeHit}`,
|
|
184
|
+
files_touched: diff.changedPaths,
|
|
185
|
+
lines_added: diff.linesAdded,
|
|
186
|
+
lines_removed: diff.linesRemoved,
|
|
187
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
188
|
+
rationale: turnResult.rationale,
|
|
189
|
+
});
|
|
190
|
+
priorAttempts.push({
|
|
191
|
+
iteration,
|
|
192
|
+
outcome: "reverted-no-progress",
|
|
193
|
+
note: `rejected: scope violation (${scopeHit})`,
|
|
194
|
+
files_touched: diff.changedPaths,
|
|
195
|
+
});
|
|
196
|
+
stagnation += 1;
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
const overflowed = diff.linesTotal > DIFF_LINE_CAP || diff.changedPaths.length > DIFF_FILE_CAP;
|
|
200
|
+
if (overflowed && !turnResult.overflowJustification) {
|
|
201
|
+
revertToSnapshot(ctx, snapshot);
|
|
202
|
+
iterationLogs.push({
|
|
203
|
+
iteration,
|
|
204
|
+
target_test_id: currentTarget,
|
|
205
|
+
outcome: "rejected-overflow",
|
|
206
|
+
note: `diff (${diff.linesTotal} lines, ${diff.changedPaths.length} files) exceeded soft cap without justification`,
|
|
207
|
+
files_touched: diff.changedPaths,
|
|
208
|
+
lines_added: diff.linesAdded,
|
|
209
|
+
lines_removed: diff.linesRemoved,
|
|
210
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
211
|
+
rationale: turnResult.rationale,
|
|
212
|
+
});
|
|
213
|
+
priorAttempts.push({
|
|
214
|
+
iteration,
|
|
215
|
+
outcome: "rejected-overflow",
|
|
216
|
+
note: `diff exceeded graduality cap without justify_diff_overflow call`,
|
|
217
|
+
files_touched: diff.changedPaths,
|
|
218
|
+
});
|
|
219
|
+
stagnation += 1;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
// Run tests to see the outcome of this turn
|
|
223
|
+
const result = runTestSuite(ctx);
|
|
224
|
+
if (!result.ran) {
|
|
225
|
+
revertToSnapshot(ctx, snapshot);
|
|
226
|
+
iterationLogs.push({
|
|
227
|
+
iteration,
|
|
228
|
+
target_test_id: currentTarget,
|
|
229
|
+
outcome: "test-runner-broken",
|
|
230
|
+
note: `test runner failed: ${result.error ?? "(unknown)"}`,
|
|
231
|
+
files_touched: diff.changedPaths,
|
|
232
|
+
lines_added: diff.linesAdded,
|
|
233
|
+
lines_removed: diff.linesRemoved,
|
|
234
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
235
|
+
rationale: turnResult.rationale,
|
|
236
|
+
});
|
|
237
|
+
return haltFor(ctx, {
|
|
238
|
+
reason: "TEST_RUNNER_BROKEN",
|
|
239
|
+
iterations: iteration,
|
|
240
|
+
checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
|
|
241
|
+
greenCount: greenSet.size,
|
|
242
|
+
totalCount: expectedTestIds.size,
|
|
243
|
+
spendUsd,
|
|
244
|
+
iterationLogs,
|
|
245
|
+
summary: `Test runner broke mid-brew after iteration ${iteration}. Error: ${result.error ?? "(unknown)"}.`,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
const newGreen = new Set(result.tests.filter((t) => t.status === "passed").map((t) => t.id));
|
|
249
|
+
const newRed = new Set(result.tests.filter((t) => t.status !== "passed").map((t) => t.id));
|
|
250
|
+
const regressions = [...greenSet].filter((t) => !newGreen.has(t));
|
|
251
|
+
const gains = [...newGreen].filter((t) => !greenSet.has(t));
|
|
252
|
+
if (regressions.length > 0) {
|
|
253
|
+
// Regression — revert
|
|
254
|
+
revertToSnapshot(ctx, snapshot);
|
|
255
|
+
iterationLogs.push({
|
|
256
|
+
iteration,
|
|
257
|
+
target_test_id: currentTarget,
|
|
258
|
+
outcome: "reverted-regression",
|
|
259
|
+
note: `broke ${regressions.length} previously-green test(s): ${regressions.slice(0, 3).join(", ")}${regressions.length > 3 ? ` (+${regressions.length - 3} more)` : ""}`,
|
|
260
|
+
files_touched: diff.changedPaths,
|
|
261
|
+
lines_added: diff.linesAdded,
|
|
262
|
+
lines_removed: diff.linesRemoved,
|
|
263
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
264
|
+
rationale: turnResult.rationale,
|
|
265
|
+
broken_tests: regressions,
|
|
266
|
+
});
|
|
267
|
+
priorAttempts.push({
|
|
268
|
+
iteration,
|
|
269
|
+
outcome: "reverted-regression",
|
|
270
|
+
note: `broke ${regressions.length} green test(s)`,
|
|
271
|
+
files_touched: diff.changedPaths,
|
|
272
|
+
});
|
|
273
|
+
stagnation += 1;
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
if (gains.length === 0) {
|
|
277
|
+
// No progress — revert
|
|
278
|
+
revertToSnapshot(ctx, snapshot);
|
|
279
|
+
iterationLogs.push({
|
|
280
|
+
iteration,
|
|
281
|
+
target_test_id: currentTarget,
|
|
282
|
+
outcome: "reverted-no-progress",
|
|
283
|
+
note: "no test changed from red to green",
|
|
284
|
+
files_touched: diff.changedPaths,
|
|
285
|
+
lines_added: diff.linesAdded,
|
|
286
|
+
lines_removed: diff.linesRemoved,
|
|
287
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
288
|
+
rationale: turnResult.rationale,
|
|
289
|
+
});
|
|
290
|
+
priorAttempts.push({
|
|
291
|
+
iteration,
|
|
292
|
+
outcome: "reverted-no-progress",
|
|
293
|
+
note: "no test went from red to green",
|
|
294
|
+
files_touched: diff.changedPaths,
|
|
295
|
+
});
|
|
296
|
+
stagnation += 1;
|
|
297
|
+
continue;
|
|
298
|
+
}
|
|
299
|
+
// Progress! checkpoint
|
|
300
|
+
commitCheckpoint(ctx, {
|
|
301
|
+
iteration,
|
|
302
|
+
target: currentTarget,
|
|
303
|
+
gains,
|
|
304
|
+
filesTouched: diff.changedPaths,
|
|
305
|
+
});
|
|
306
|
+
greenSet = newGreen;
|
|
307
|
+
redSet = newRed;
|
|
308
|
+
stagnation = 0;
|
|
309
|
+
iterationLogs.push({
|
|
310
|
+
iteration,
|
|
311
|
+
target_test_id: currentTarget,
|
|
312
|
+
outcome: "checkpoint",
|
|
313
|
+
note: `+${gains.length} green`,
|
|
314
|
+
files_touched: diff.changedPaths,
|
|
315
|
+
lines_added: diff.linesAdded,
|
|
316
|
+
lines_removed: diff.linesRemoved,
|
|
317
|
+
spend_delta_usd: turnResult.spendDelta,
|
|
318
|
+
rationale: turnResult.rationale,
|
|
319
|
+
});
|
|
320
|
+
priorAttempts.length = 0;
|
|
321
|
+
// Pick next target from story scope, if any remain
|
|
322
|
+
const next = pickTarget(storyRedSet(), currentTarget);
|
|
323
|
+
currentTarget = next;
|
|
324
|
+
if (!currentTarget) {
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
// Loop exited
|
|
329
|
+
const allStoryGreen = [...expectedTestIds].every((id) => greenSet.has(id));
|
|
330
|
+
if (allStoryGreen) {
|
|
331
|
+
await pushBranch(ctx);
|
|
332
|
+
return {
|
|
333
|
+
kind: "success",
|
|
334
|
+
iterations: iterationLogs.length,
|
|
335
|
+
checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
|
|
336
|
+
spendUsd,
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
return haltFor(ctx, {
|
|
340
|
+
reason: "ITERATION_CAP",
|
|
341
|
+
iterations: iterationLogs.length,
|
|
342
|
+
checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
|
|
343
|
+
greenCount: greenSet.size,
|
|
344
|
+
totalCount: expectedTestIds.size,
|
|
345
|
+
spendUsd,
|
|
346
|
+
iterationLogs,
|
|
347
|
+
summary: `Reached the ${ctx.maxIterations}-iteration cap with ${iterationLogs.filter((l) => l.outcome === "checkpoint").length} checkpoint(s). ${generateDiagnosis(iterationLogs, greenSet, expectedTestIds)}`,
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
async function runTurn(ctx, args) {
|
|
351
|
+
const specYaml = YAML.stringify(ctx.spec);
|
|
352
|
+
const targetFile = ctx.spec.story_id
|
|
353
|
+
? "(see manifest file for target test location)"
|
|
354
|
+
: "(unknown)";
|
|
355
|
+
const targetFilePath = findTargetTestFile(ctx, args.target) ?? targetFile;
|
|
356
|
+
const userMessage = turnPrompt({
|
|
357
|
+
iteration: args.iteration,
|
|
358
|
+
max_iterations: ctx.maxIterations,
|
|
359
|
+
target_test_id: args.target,
|
|
360
|
+
target_test_file: targetFilePath,
|
|
361
|
+
spec_yaml: specYaml,
|
|
362
|
+
currently_green: args.greenList,
|
|
363
|
+
currently_red: args.redList,
|
|
364
|
+
allowed_paths: ctx.allowedPaths,
|
|
365
|
+
budget_spent_usd: args.spendUsd,
|
|
366
|
+
budget_cap_usd: ctx.budgetUsd,
|
|
367
|
+
previous_attempts: args.priorAttempts.slice(-3),
|
|
368
|
+
});
|
|
369
|
+
const filesTouched = new Set();
|
|
370
|
+
let rationale = "";
|
|
371
|
+
let overflowJustification;
|
|
372
|
+
let spendDelta = 0;
|
|
373
|
+
// Tool-use loop: call the model, execute tool_use blocks, feed tool_results back, repeat
|
|
374
|
+
const messages = [
|
|
375
|
+
{ role: "user", content: userMessage },
|
|
376
|
+
];
|
|
377
|
+
// Safety cap: 12 tool rounds within a single turn (should be plenty; prevents runaway)
|
|
378
|
+
for (let round = 0; round < 12; round++) {
|
|
379
|
+
const response = await ctx.anthropic.messages.create({
|
|
380
|
+
model: ctx.model,
|
|
381
|
+
max_tokens: 4096,
|
|
382
|
+
// cache_control is accepted at runtime but older SDK type defs don't
|
|
383
|
+
// expose it on TextBlockParam; `as never` gets past the structural
|
|
384
|
+
// mismatch the same way refine/llm.ts does.
|
|
385
|
+
system: [
|
|
386
|
+
{
|
|
387
|
+
type: "text",
|
|
388
|
+
text: BREW_SYSTEM,
|
|
389
|
+
cache_control: { type: "ephemeral" },
|
|
390
|
+
},
|
|
391
|
+
],
|
|
392
|
+
tools: BREW_TOOLS,
|
|
393
|
+
messages,
|
|
394
|
+
});
|
|
395
|
+
spendDelta += costUsdForResponse(response, ctx.model);
|
|
396
|
+
// Capture the assistant turn + any final text
|
|
397
|
+
messages.push({ role: "assistant", content: response.content });
|
|
398
|
+
const toolBlocks = response.content.filter((b) => b.type === "tool_use");
|
|
399
|
+
if (toolBlocks.length === 0) {
|
|
400
|
+
// Text-only ending → extract rationale
|
|
401
|
+
const text = response.content
|
|
402
|
+
.filter((b) => b.type === "text")
|
|
403
|
+
.map((b) => b.text)
|
|
404
|
+
.join("\n")
|
|
405
|
+
.trim();
|
|
406
|
+
rationale = text.slice(0, 2000);
|
|
407
|
+
break;
|
|
408
|
+
}
|
|
409
|
+
const toolResults = [];
|
|
410
|
+
for (const tool of toolBlocks) {
|
|
411
|
+
const result = handleToolUse(ctx, tool);
|
|
412
|
+
if (tool.name === "write_file") {
|
|
413
|
+
const input = tool.input;
|
|
414
|
+
if (input.path)
|
|
415
|
+
filesTouched.add(normalizeRepoPath(ctx, input.path));
|
|
416
|
+
}
|
|
417
|
+
if (tool.name === "justify_diff_overflow") {
|
|
418
|
+
const input = tool.input;
|
|
419
|
+
if (input)
|
|
420
|
+
overflowJustification = input;
|
|
421
|
+
}
|
|
422
|
+
toolResults.push({
|
|
423
|
+
type: "tool_result",
|
|
424
|
+
tool_use_id: tool.id,
|
|
425
|
+
content: result.content,
|
|
426
|
+
is_error: result.is_error,
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
messages.push({ role: "user", content: toolResults });
|
|
430
|
+
if (response.stop_reason !== "tool_use")
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
return {
|
|
434
|
+
filesTouched: [...filesTouched],
|
|
435
|
+
rationale,
|
|
436
|
+
spendDelta,
|
|
437
|
+
...(overflowJustification ? { overflowJustification } : {}),
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
function handleToolUse(ctx, tool) {
|
|
441
|
+
const input = tool.input;
|
|
442
|
+
try {
|
|
443
|
+
switch (tool.name) {
|
|
444
|
+
case "read_file": {
|
|
445
|
+
const p = String(input["path"] ?? "");
|
|
446
|
+
const full = resolveRepoPath(ctx, p);
|
|
447
|
+
if (!existsSync(full))
|
|
448
|
+
return { content: `File not found: ${p}`, is_error: true };
|
|
449
|
+
if (!statSync(full).isFile())
|
|
450
|
+
return { content: `Not a file: ${p}`, is_error: true };
|
|
451
|
+
const txt = readFileSync(full, "utf8");
|
|
452
|
+
return { content: txt.length > 20000 ? txt.slice(0, 20000) + "\n…(truncated)" : txt, is_error: false };
|
|
453
|
+
}
|
|
454
|
+
case "list_directory": {
|
|
455
|
+
const p = String(input["path"] ?? "");
|
|
456
|
+
const full = resolveRepoPath(ctx, p);
|
|
457
|
+
if (!existsSync(full))
|
|
458
|
+
return { content: `Not found: ${p}`, is_error: true };
|
|
459
|
+
if (!statSync(full).isDirectory())
|
|
460
|
+
return { content: `Not a directory: ${p}`, is_error: true };
|
|
461
|
+
const entries = readdirSync(full, { withFileTypes: true })
|
|
462
|
+
.map((e) => `${e.name}${e.isDirectory() ? "/" : ""}`)
|
|
463
|
+
.sort()
|
|
464
|
+
.join("\n");
|
|
465
|
+
return { content: entries, is_error: false };
|
|
466
|
+
}
|
|
467
|
+
case "write_file": {
|
|
468
|
+
const p = String(input["path"] ?? "");
|
|
469
|
+
const contents = String(input["contents"] ?? "");
|
|
470
|
+
const full = resolveRepoPath(ctx, p);
|
|
471
|
+
mkdirSync(dirname(full), { recursive: true });
|
|
472
|
+
writeFileSync(full, contents, "utf8");
|
|
473
|
+
return { content: `Wrote ${contents.split("\n").length} lines to ${p}`, is_error: false };
|
|
474
|
+
}
|
|
475
|
+
case "justify_diff_overflow": {
|
|
476
|
+
return { content: "justification recorded", is_error: false };
|
|
477
|
+
}
|
|
478
|
+
default:
|
|
479
|
+
return { content: `Unknown tool: ${tool.name}`, is_error: true };
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
catch (e) {
|
|
483
|
+
return { content: `Tool error: ${e.message}`, is_error: true };
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
/** ------------------------- Path helpers ------------------------- */
|
|
487
|
+
function resolveRepoPath(ctx, p) {
|
|
488
|
+
if (isAbsolute(p)) {
|
|
489
|
+
// Must stay inside repoRoot
|
|
490
|
+
const rel = relative(ctx.repoRoot, p);
|
|
491
|
+
if (rel.startsWith(".."))
|
|
492
|
+
throw new Error(`path escapes repo: ${p}`);
|
|
493
|
+
return p;
|
|
494
|
+
}
|
|
495
|
+
return resolve(ctx.repoRoot, p);
|
|
496
|
+
}
|
|
497
|
+
function normalizeRepoPath(ctx, p) {
|
|
498
|
+
const full = resolveRepoPath(ctx, p);
|
|
499
|
+
return relative(ctx.repoRoot, full);
|
|
500
|
+
}
|
|
501
|
+
function isFrozenPath(path, frozen) {
|
|
502
|
+
if (frozen.files.includes(path))
|
|
503
|
+
return true;
|
|
504
|
+
for (const d of frozen.directories) {
|
|
505
|
+
const normalized = d.replace(/\/$/, "");
|
|
506
|
+
if (path === normalized || path.startsWith(d) || path.startsWith(normalized + "/")) {
|
|
507
|
+
return true;
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
return false;
|
|
511
|
+
}
|
|
512
|
+
function isAllowedPath(path, allowedPaths) {
|
|
513
|
+
if (allowedPaths.length === 0)
|
|
514
|
+
return true;
|
|
515
|
+
for (const ap of allowedPaths) {
|
|
516
|
+
const normalized = ap.replace(/\/$/, "");
|
|
517
|
+
if (path === normalized || path.startsWith(ap) || path.startsWith(normalized + "/")) {
|
|
518
|
+
return true;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
return false;
|
|
522
|
+
}
|
|
523
|
+
function snapshotAllowedPaths(ctx) {
|
|
524
|
+
// Cheap & safe: we snapshot lazily on write. Use an empty map.
|
|
525
|
+
// During the turn, on first write to a path, we capture its pre-write state.
|
|
526
|
+
// Actual implementation: the handleToolUse for write_file could do the snapshotting.
|
|
527
|
+
// For simplicity in this first cut, we do a single git-based diff after the turn.
|
|
528
|
+
void ctx;
|
|
529
|
+
return { files: new Map(), trackedPaths: new Set() };
|
|
530
|
+
}
|
|
531
|
+
function revertToSnapshot(ctx, _snapshot) {
|
|
532
|
+
// Hard reset the working tree to HEAD for files inside allowedPaths + frozenPaths surface,
|
|
533
|
+
// plus any untracked files the agent created. Safe because we committed everything before the turn.
|
|
534
|
+
execSync(`git -C "${ctx.repoRoot}" reset --hard HEAD`, { stdio: "ignore" });
|
|
535
|
+
execSync(`git -C "${ctx.repoRoot}" clean -fd`, { stdio: "ignore" });
|
|
536
|
+
}
|
|
537
|
+
function computeDiff(_snapshot) {
|
|
538
|
+
// Use git to see what changed since HEAD.
|
|
539
|
+
// (We rely on the caller having committed prior state before the turn started.)
|
|
540
|
+
const output = execSync("git diff --numstat HEAD 2>/dev/null || echo ''", {
|
|
541
|
+
encoding: "utf8",
|
|
542
|
+
}).trim();
|
|
543
|
+
const changedPaths = [];
|
|
544
|
+
let linesAdded = 0;
|
|
545
|
+
let linesRemoved = 0;
|
|
546
|
+
for (const line of output.split("\n")) {
|
|
547
|
+
if (!line)
|
|
548
|
+
continue;
|
|
549
|
+
const parts = line.split(/\s+/);
|
|
550
|
+
const added = parts[0] === "-" ? 0 : parseInt(parts[0] ?? "0", 10);
|
|
551
|
+
const removed = parts[1] === "-" ? 0 : parseInt(parts[1] ?? "0", 10);
|
|
552
|
+
const path = parts.slice(2).join(" ");
|
|
553
|
+
if (!path)
|
|
554
|
+
continue;
|
|
555
|
+
changedPaths.push(path);
|
|
556
|
+
linesAdded += isNaN(added) ? 0 : added;
|
|
557
|
+
linesRemoved += isNaN(removed) ? 0 : removed;
|
|
558
|
+
}
|
|
559
|
+
// Also include untracked new files
|
|
560
|
+
const untracked = execSync(`git ls-files --others --exclude-standard 2>/dev/null || echo ''`, { encoding: "utf8" }).trim();
|
|
561
|
+
for (const p of untracked.split("\n").filter(Boolean)) {
|
|
562
|
+
if (!changedPaths.includes(p)) {
|
|
563
|
+
changedPaths.push(p);
|
|
564
|
+
try {
|
|
565
|
+
const content = readFileSync(p, "utf8");
|
|
566
|
+
linesAdded += content.split("\n").length;
|
|
567
|
+
}
|
|
568
|
+
catch {
|
|
569
|
+
// skip
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
return {
|
|
574
|
+
changedPaths,
|
|
575
|
+
linesAdded,
|
|
576
|
+
linesRemoved,
|
|
577
|
+
linesTotal: linesAdded + linesRemoved,
|
|
578
|
+
};
|
|
579
|
+
}
|
|
580
|
+
function commitCheckpoint(ctx, args) {
|
|
581
|
+
execSync(`git -C "${ctx.repoRoot}" add -A`, { stdio: "ignore" });
|
|
582
|
+
const msg = `slowcook/brew iter ${args.iteration}: +${args.gains.length} green — target ${args.target}`;
|
|
583
|
+
execSync(`git -C "${ctx.repoRoot}" commit -m ${JSON.stringify(msg)}`, { stdio: "ignore" });
|
|
584
|
+
}
|
|
585
|
+
async function pushBranch(ctx) {
|
|
586
|
+
execSync(`git -C "${ctx.repoRoot}" push --set-upstream origin ${ctx.branchName}`, { stdio: "ignore" });
|
|
587
|
+
void ctx.forge;
|
|
588
|
+
}
|
|
589
|
+
/** ------------------------- Runner + parsers ------------------------- */
|
|
590
|
+
function runTestSuite(ctx) {
|
|
591
|
+
return runTests(ctx.stackConfig, { cwd: ctx.repoRoot });
|
|
592
|
+
}
|
|
593
|
+
/** ------------------------- Target selection ------------------------- */
|
|
594
|
+
function pickTarget(redTests, previous) {
|
|
595
|
+
if (redTests.size === 0)
|
|
596
|
+
return null;
|
|
597
|
+
// Prefer sticking with the previous target if it's still red
|
|
598
|
+
if (previous && redTests.has(previous))
|
|
599
|
+
return previous;
|
|
600
|
+
// Otherwise first by sorted order — deterministic
|
|
601
|
+
return [...redTests].sort()[0] ?? null;
|
|
602
|
+
}
|
|
603
|
+
function findTargetTestFile(ctx, testId) {
|
|
604
|
+
const manifestPath = join(ctx.repoRoot, ".brewing/manifests", `story-${ctx.storyId}.json`);
|
|
605
|
+
if (!existsSync(manifestPath))
|
|
606
|
+
return null;
|
|
607
|
+
try {
|
|
608
|
+
const m = JSON.parse(readFileSync(manifestPath, "utf8"));
|
|
609
|
+
return m.tests.find((t) => t.id === testId)?.file ?? null;
|
|
610
|
+
}
|
|
611
|
+
catch {
|
|
612
|
+
return null;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
/** ------------------------- Cost accounting ------------------------- */
|
|
616
|
+
function costUsdForResponse(response, model) {
|
|
617
|
+
const pricing = matchPricing(model);
|
|
618
|
+
if (!pricing)
|
|
619
|
+
return 0;
|
|
620
|
+
const input = response.usage?.input_tokens ?? 0;
|
|
621
|
+
const output = response.usage?.output_tokens ?? 0;
|
|
622
|
+
// Cache fields aren't in the older SDK type; read via loose cast.
|
|
623
|
+
const usage = response.usage;
|
|
624
|
+
const cacheRead = usage?.cache_read_input_tokens ?? 0;
|
|
625
|
+
const cacheCreate = usage?.cache_creation_input_tokens ?? 0;
|
|
626
|
+
// Anthropic caching: cache reads are ~10% of input; cache creates are ~125%.
|
|
627
|
+
// We approximate — exact pricing depends on model, but this is within ~20%.
|
|
628
|
+
const effectiveInput = (input - cacheRead - cacheCreate) + cacheRead * 0.1 + cacheCreate * 1.25;
|
|
629
|
+
return (effectiveInput / 1_000_000) * pricing.input + (output / 1_000_000) * pricing.output;
|
|
630
|
+
}
|
|
631
|
+
function matchPricing(model) {
|
|
632
|
+
// exact match first
|
|
633
|
+
if (PRICING_PER_M_TOKENS[model])
|
|
634
|
+
return PRICING_PER_M_TOKENS[model];
|
|
635
|
+
// prefix match (e.g., "claude-opus-4-7-20250912" → "claude-opus-4-7")
|
|
636
|
+
for (const key of Object.keys(PRICING_PER_M_TOKENS)) {
|
|
637
|
+
if (model.startsWith(key))
|
|
638
|
+
return PRICING_PER_M_TOKENS[key];
|
|
639
|
+
}
|
|
640
|
+
return null;
|
|
641
|
+
}
|
|
642
|
+
function haltFor(ctx, args) {
|
|
643
|
+
const last3 = (args.iterationLogs ?? [])
|
|
644
|
+
.slice(-3)
|
|
645
|
+
.map((l) => ({
|
|
646
|
+
iteration: l.iteration,
|
|
647
|
+
files_changed: l.files_touched.length,
|
|
648
|
+
lines_added: l.lines_added,
|
|
649
|
+
lines_removed: l.lines_removed,
|
|
650
|
+
outcome: l.outcome === "checkpoint"
|
|
651
|
+
? "checkpoint"
|
|
652
|
+
: l.outcome === "reverted-regression"
|
|
653
|
+
? "reverted-regression"
|
|
654
|
+
: l.outcome === "rejected-overflow"
|
|
655
|
+
? "rejected-overflow"
|
|
656
|
+
: "reverted-no-progress",
|
|
657
|
+
}));
|
|
658
|
+
const lastRationale = (args.iterationLogs ?? []).slice(-1)[0]?.rationale;
|
|
659
|
+
const report = {
|
|
660
|
+
story_id: ctx.storyId,
|
|
661
|
+
halt_reason: args.reason,
|
|
662
|
+
halt_timestamp: ctx.now().toISOString(),
|
|
663
|
+
iterations_run: args.iterations,
|
|
664
|
+
checkpoints_committed: args.checkpoints,
|
|
665
|
+
tests_green: args.greenCount,
|
|
666
|
+
tests_total: args.totalCount,
|
|
667
|
+
tokens_spent_usd: args.spendUsd,
|
|
668
|
+
budget_usd: ctx.budgetUsd,
|
|
669
|
+
model: ctx.model,
|
|
670
|
+
summary_plain_english: args.summary,
|
|
671
|
+
last_three_diffs: last3.length > 0 ? last3 : undefined,
|
|
672
|
+
last_agent_rationale: lastRationale,
|
|
673
|
+
suggested_actions: defaultSuggestedActions(args.reason, {
|
|
674
|
+
budget_usd: ctx.budgetUsd,
|
|
675
|
+
iterations_run: args.iterations,
|
|
676
|
+
}),
|
|
677
|
+
};
|
|
678
|
+
const reportPath = join(ctx.haltDir, `story-${ctx.storyId}-${report.halt_timestamp.replace(/[:.]/g, "-")}.json`);
|
|
679
|
+
writeHaltReport(reportPath, report);
|
|
680
|
+
// Attempt to push partial progress (if any checkpoints exist) so the operator can see what was tried
|
|
681
|
+
if (report.checkpoints_committed > 0) {
|
|
682
|
+
try {
|
|
683
|
+
execSync(`git -C "${ctx.repoRoot}" push --set-upstream origin ${ctx.branchName}`, { stdio: "ignore" });
|
|
684
|
+
}
|
|
685
|
+
catch {
|
|
686
|
+
// best effort
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
// Post comment to the source issue if present
|
|
690
|
+
const sourceIssue = ctx.spec.source_issue?.match(/#?(\d+)/)?.[1];
|
|
691
|
+
if (sourceIssue) {
|
|
692
|
+
ctx.forge
|
|
693
|
+
.createIssueComment(parseInt(sourceIssue, 10), haltReportToMarkdown(report))
|
|
694
|
+
.catch(() => {
|
|
695
|
+
/* best effort */
|
|
696
|
+
});
|
|
697
|
+
}
|
|
698
|
+
return { kind: "halted", report };
|
|
699
|
+
}
|
|
700
|
+
function generateDiagnosis(iterationLogs, greenSet, expected) {
|
|
701
|
+
const storyGreen = [...greenSet].filter((t) => expected.has(t)).length;
|
|
702
|
+
if (iterationLogs.length === 0) {
|
|
703
|
+
return "No iterations ran.";
|
|
704
|
+
}
|
|
705
|
+
const checkpoints = iterationLogs.filter((l) => l.outcome === "checkpoint").length;
|
|
706
|
+
const regressions = iterationLogs.filter((l) => l.outcome === "reverted-regression").length;
|
|
707
|
+
const noProgress = iterationLogs.filter((l) => l.outcome === "reverted-no-progress").length;
|
|
708
|
+
if (checkpoints === 0 && noProgress > 0 && regressions === 0) {
|
|
709
|
+
// Classic "wrong layer" signal: agent tried, nothing moved
|
|
710
|
+
return `All ${iterationLogs.length} iterations reverted for no-progress (no test changed from red to green). The target test is likely unreachable from the code the agent is editing — a layer mismatch. Common cause: HTTP-loopback tests that fetch a URL with no running server. See \`.brewing/context.md\` → Testing conventions for the tier-1 (vi.mock) style that brewing can actually ratchet against.`;
|
|
711
|
+
}
|
|
712
|
+
if (regressions > iterationLogs.length / 2) {
|
|
713
|
+
return `${regressions} iterations regressed (broke a previously-green test). Agent may be misunderstanding an invariant — consider clarifying the spec.`;
|
|
714
|
+
}
|
|
715
|
+
if (checkpoints > 0 && storyGreen < expected.size) {
|
|
716
|
+
return `${checkpoints} checkpoints committed; ${storyGreen}/${expected.size} story tests green. Progress was real but incomplete.`;
|
|
717
|
+
}
|
|
718
|
+
return `${checkpoints} checkpoint(s), ${noProgress} no-progress, ${regressions} regression(s).`;
|
|
719
|
+
}
|
|
720
|
+
/** ------------------------- Entry helpers ------------------------- */
|
|
721
|
+
export function readFrozenPaths(repoRoot) {
|
|
722
|
+
const path = join(repoRoot, ".brewing/frozen-paths.json");
|
|
723
|
+
if (!existsSync(path)) {
|
|
724
|
+
return { directories: [], files: [], partial: {} };
|
|
725
|
+
}
|
|
726
|
+
const raw = JSON.parse(readFileSync(path, "utf8"));
|
|
727
|
+
return {
|
|
728
|
+
directories: raw.directories ?? [],
|
|
729
|
+
files: raw.files ?? [],
|
|
730
|
+
partial: raw.partial ?? {},
|
|
731
|
+
};
|
|
732
|
+
}
|
|
733
|
+
export function readStackConfig(repoRoot) {
|
|
734
|
+
const path = join(repoRoot, ".brewing/stack.json");
|
|
735
|
+
const raw = JSON.parse(readFileSync(path, "utf8"));
|
|
736
|
+
return validateStackConfig(raw);
|
|
737
|
+
}
|
|
738
|
+
export function loadSpec(repoRoot, storyId) {
|
|
739
|
+
return readSpec(repoRoot, storyId);
|
|
740
|
+
}
|
|
741
|
+
//# sourceMappingURL=agent.js.map
|