@slowcook-ai/cli 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,741 @@
1
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, statSync, } from "node:fs";
2
+ import { join, resolve, relative, isAbsolute, dirname } from "node:path";
3
+ import { execSync } from "node:child_process";
4
+ import YAML from "yaml";
5
+ import { runTests, validateStackConfig, } from "@slowcook-ai/stack-ts";
6
+ import { readSpec } from "../refine/spec-yaml.js";
7
+ import { BREW_SYSTEM, BREW_TOOLS, turnPrompt, } from "./prompts.js";
8
+ import { writeHaltReport, haltReportToMarkdown, defaultSuggestedActions, } from "./halt.js";
9
+ const DIFF_LINE_CAP = 200;
10
+ const DIFF_FILE_CAP = 5;
11
+ const STAGNATION_CAP = 15;
12
+ const PRICING_PER_M_TOKENS = {
13
+ "claude-opus-4-7": { input: 15, output: 75 },
14
+ "claude-sonnet-4-5": { input: 3, output: 15 },
15
+ "claude-haiku-4-5": { input: 0.8, output: 4 },
16
+ };
17
+ export async function runBrew(ctx) {
18
+ const startMs = ctx.now().getTime();
19
+ const manifestPath = join(ctx.repoRoot, ".brewing/manifests", `story-${ctx.storyId}.json`);
20
+ if (!existsSync(manifestPath)) {
21
+ return haltFor(ctx, {
22
+ reason: "TEST_RUNNER_BROKEN",
23
+ iterations: 0,
24
+ checkpoints: 0,
25
+ greenCount: 0,
26
+ totalCount: 0,
27
+ spendUsd: 0,
28
+ summary: `No manifest found at \`.brewing/manifests/story-${ctx.storyId}.json\`. Run testgen first.`,
29
+ });
30
+ }
31
+ const manifest = JSON.parse(readFileSync(manifestPath, "utf8"));
32
+ const expectedTestIds = new Set(manifest.tests.map((t) => t.id));
33
+ // Baseline: run tests once to see starting state
34
+ console.log("→ baseline test run…");
35
+ const baseline = runTestSuite(ctx);
36
+ if (!baseline.ran) {
37
+ return haltFor(ctx, {
38
+ reason: "TEST_RUNNER_BROKEN",
39
+ iterations: 0,
40
+ checkpoints: 0,
41
+ greenCount: 0,
42
+ totalCount: expectedTestIds.size,
43
+ spendUsd: 0,
44
+ summary: `Test runner failed to produce usable output on the baseline run. Error: ${baseline.error ?? "(unknown)"}. Fix the runner before brewing.`,
45
+ });
46
+ }
47
+ let greenSet = new Set(baseline.tests.filter((t) => t.status === "passed").map((t) => t.id));
48
+ let redSet = new Set(baseline.tests.filter((t) => t.status !== "passed").map((t) => t.id));
49
+ console.log(`→ baseline: ${greenSet.size} green, ${redSet.size} red / ${baseline.tests.length} total`);
50
+ if (redSet.size === 0) {
51
+ return {
52
+ kind: "success",
53
+ iterations: 0,
54
+ checkpoints: 0,
55
+ spendUsd: 0,
56
+ };
57
+ }
58
+ // Story-scoped target pool: only consider red tests from this story's manifest
59
+ const storyRedSet = () => new Set([...redSet].filter((t) => expectedTestIds.has(t)));
60
+ let spendUsd = 0;
61
+ let stagnation = 0;
62
+ const iterationLogs = [];
63
+ const priorAttempts = [];
64
+ let currentTarget = pickTarget(storyRedSet(), null);
65
+ if (!currentTarget) {
66
+ return haltFor(ctx, {
67
+ reason: "TESTS_NEVER_GREEN",
68
+ iterations: 0,
69
+ checkpoints: 0,
70
+ greenCount: greenSet.size,
71
+ totalCount: expectedTestIds.size,
72
+ spendUsd,
73
+ summary: `No red tests for story-${ctx.storyId} found in baseline. Either the story's tests are passing already (nothing to brew), or the manifest doesn't match what vitest discovers. Check the story's manifest vs actual test file.`,
74
+ });
75
+ }
76
+ for (let iteration = 1; iteration <= ctx.maxIterations; iteration++) {
77
+ console.log(`\n=== iteration ${iteration}/${ctx.maxIterations} — target: ${currentTarget} ===`);
78
+ // Budget + time checks before spending
79
+ if (spendUsd >= ctx.budgetUsd) {
80
+ return haltFor(ctx, {
81
+ reason: "BUDGET_EXHAUSTED",
82
+ iterations: iteration - 1,
83
+ checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
84
+ greenCount: greenSet.size,
85
+ totalCount: expectedTestIds.size,
86
+ spendUsd,
87
+ iterationLogs,
88
+ summary: `Spent $${spendUsd.toFixed(2)} of $${ctx.budgetUsd.toFixed(2)} budget across ${iteration - 1} iterations. ${iterationLogs.filter((l) => l.outcome === "checkpoint").length} checkpoints advanced the green set. ${generateDiagnosis(iterationLogs, greenSet, expectedTestIds)}`,
89
+ });
90
+ }
91
+ if (ctx.now().getTime() - startMs > ctx.wallClockMs) {
92
+ return haltFor(ctx, {
93
+ reason: "WALL_CLOCK",
94
+ iterations: iteration - 1,
95
+ checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
96
+ greenCount: greenSet.size,
97
+ totalCount: expectedTestIds.size,
98
+ spendUsd,
99
+ iterationLogs,
100
+ summary: `Wall-clock budget exceeded after ${iteration - 1} iterations.`,
101
+ });
102
+ }
103
+ if (stagnation >= STAGNATION_CAP) {
104
+ return haltFor(ctx, {
105
+ reason: "STAGNATION_CAP",
106
+ iterations: iteration - 1,
107
+ checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
108
+ greenCount: greenSet.size,
109
+ totalCount: expectedTestIds.size,
110
+ spendUsd,
111
+ iterationLogs,
112
+ summary: `${STAGNATION_CAP} consecutive iterations made no progress. ${generateDiagnosis(iterationLogs, greenSet, expectedTestIds)}`,
113
+ });
114
+ }
115
+ // Snapshot before turn (for revert)
116
+ const snapshot = snapshotAllowedPaths(ctx);
117
+ // Run one agent turn
118
+ const turnResult = await runTurn(ctx, {
119
+ iteration,
120
+ target: currentTarget,
121
+ greenList: [...greenSet],
122
+ redList: [...redSet],
123
+ priorAttempts,
124
+ spendUsd,
125
+ });
126
+ spendUsd += turnResult.spendDelta;
127
+ if (turnResult.filesTouched.length === 0 && !turnResult.overflowJustification) {
128
+ // Agent did nothing. Log and continue.
129
+ iterationLogs.push({
130
+ iteration,
131
+ target_test_id: currentTarget,
132
+ outcome: "reverted-no-progress",
133
+ note: "agent made no edits this turn",
134
+ files_touched: [],
135
+ lines_added: 0,
136
+ lines_removed: 0,
137
+ spend_delta_usd: turnResult.spendDelta,
138
+ rationale: turnResult.rationale,
139
+ });
140
+ priorAttempts.push({
141
+ iteration,
142
+ outcome: "reverted-no-progress",
143
+ note: "agent made no edits",
144
+ files_touched: [],
145
+ });
146
+ stagnation += 1;
147
+ continue;
148
+ }
149
+ // Constraint checks on the applied diff
150
+ const diff = computeDiff(snapshot);
151
+ const frozenHit = diff.changedPaths.find((p) => isFrozenPath(p, ctx.frozenPaths));
152
+ if (frozenHit) {
153
+ revertToSnapshot(ctx, snapshot);
154
+ iterationLogs.push({
155
+ iteration,
156
+ target_test_id: currentTarget,
157
+ outcome: "rejected-frozen-path",
158
+ note: `agent wrote to frozen path: ${frozenHit}`,
159
+ files_touched: diff.changedPaths,
160
+ lines_added: diff.linesAdded,
161
+ lines_removed: diff.linesRemoved,
162
+ spend_delta_usd: turnResult.spendDelta,
163
+ rationale: turnResult.rationale,
164
+ });
165
+ priorAttempts.push({
166
+ iteration,
167
+ outcome: "reverted-no-progress",
168
+ note: `rejected: wrote to frozen path ${frozenHit}`,
169
+ files_touched: diff.changedPaths,
170
+ });
171
+ stagnation += 1;
172
+ continue;
173
+ }
174
+ const scopeHit = diff.changedPaths.find((p) => !isAllowedPath(p, ctx.allowedPaths) &&
175
+ // always allow reading — but write outside allowed_paths is rejected
176
+ true);
177
+ if (scopeHit && ctx.allowedPaths.length > 0) {
178
+ revertToSnapshot(ctx, snapshot);
179
+ iterationLogs.push({
180
+ iteration,
181
+ target_test_id: currentTarget,
182
+ outcome: "rejected-frozen-path",
183
+ note: `agent wrote outside allowed_paths: ${scopeHit}`,
184
+ files_touched: diff.changedPaths,
185
+ lines_added: diff.linesAdded,
186
+ lines_removed: diff.linesRemoved,
187
+ spend_delta_usd: turnResult.spendDelta,
188
+ rationale: turnResult.rationale,
189
+ });
190
+ priorAttempts.push({
191
+ iteration,
192
+ outcome: "reverted-no-progress",
193
+ note: `rejected: scope violation (${scopeHit})`,
194
+ files_touched: diff.changedPaths,
195
+ });
196
+ stagnation += 1;
197
+ continue;
198
+ }
199
+ const overflowed = diff.linesTotal > DIFF_LINE_CAP || diff.changedPaths.length > DIFF_FILE_CAP;
200
+ if (overflowed && !turnResult.overflowJustification) {
201
+ revertToSnapshot(ctx, snapshot);
202
+ iterationLogs.push({
203
+ iteration,
204
+ target_test_id: currentTarget,
205
+ outcome: "rejected-overflow",
206
+ note: `diff (${diff.linesTotal} lines, ${diff.changedPaths.length} files) exceeded soft cap without justification`,
207
+ files_touched: diff.changedPaths,
208
+ lines_added: diff.linesAdded,
209
+ lines_removed: diff.linesRemoved,
210
+ spend_delta_usd: turnResult.spendDelta,
211
+ rationale: turnResult.rationale,
212
+ });
213
+ priorAttempts.push({
214
+ iteration,
215
+ outcome: "rejected-overflow",
216
+ note: `diff exceeded graduality cap without justify_diff_overflow call`,
217
+ files_touched: diff.changedPaths,
218
+ });
219
+ stagnation += 1;
220
+ continue;
221
+ }
222
+ // Run tests to see the outcome of this turn
223
+ const result = runTestSuite(ctx);
224
+ if (!result.ran) {
225
+ revertToSnapshot(ctx, snapshot);
226
+ iterationLogs.push({
227
+ iteration,
228
+ target_test_id: currentTarget,
229
+ outcome: "test-runner-broken",
230
+ note: `test runner failed: ${result.error ?? "(unknown)"}`,
231
+ files_touched: diff.changedPaths,
232
+ lines_added: diff.linesAdded,
233
+ lines_removed: diff.linesRemoved,
234
+ spend_delta_usd: turnResult.spendDelta,
235
+ rationale: turnResult.rationale,
236
+ });
237
+ return haltFor(ctx, {
238
+ reason: "TEST_RUNNER_BROKEN",
239
+ iterations: iteration,
240
+ checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
241
+ greenCount: greenSet.size,
242
+ totalCount: expectedTestIds.size,
243
+ spendUsd,
244
+ iterationLogs,
245
+ summary: `Test runner broke mid-brew after iteration ${iteration}. Error: ${result.error ?? "(unknown)"}.`,
246
+ });
247
+ }
248
+ const newGreen = new Set(result.tests.filter((t) => t.status === "passed").map((t) => t.id));
249
+ const newRed = new Set(result.tests.filter((t) => t.status !== "passed").map((t) => t.id));
250
+ const regressions = [...greenSet].filter((t) => !newGreen.has(t));
251
+ const gains = [...newGreen].filter((t) => !greenSet.has(t));
252
+ if (regressions.length > 0) {
253
+ // Regression — revert
254
+ revertToSnapshot(ctx, snapshot);
255
+ iterationLogs.push({
256
+ iteration,
257
+ target_test_id: currentTarget,
258
+ outcome: "reverted-regression",
259
+ note: `broke ${regressions.length} previously-green test(s): ${regressions.slice(0, 3).join(", ")}${regressions.length > 3 ? ` (+${regressions.length - 3} more)` : ""}`,
260
+ files_touched: diff.changedPaths,
261
+ lines_added: diff.linesAdded,
262
+ lines_removed: diff.linesRemoved,
263
+ spend_delta_usd: turnResult.spendDelta,
264
+ rationale: turnResult.rationale,
265
+ broken_tests: regressions,
266
+ });
267
+ priorAttempts.push({
268
+ iteration,
269
+ outcome: "reverted-regression",
270
+ note: `broke ${regressions.length} green test(s)`,
271
+ files_touched: diff.changedPaths,
272
+ });
273
+ stagnation += 1;
274
+ continue;
275
+ }
276
+ if (gains.length === 0) {
277
+ // No progress — revert
278
+ revertToSnapshot(ctx, snapshot);
279
+ iterationLogs.push({
280
+ iteration,
281
+ target_test_id: currentTarget,
282
+ outcome: "reverted-no-progress",
283
+ note: "no test changed from red to green",
284
+ files_touched: diff.changedPaths,
285
+ lines_added: diff.linesAdded,
286
+ lines_removed: diff.linesRemoved,
287
+ spend_delta_usd: turnResult.spendDelta,
288
+ rationale: turnResult.rationale,
289
+ });
290
+ priorAttempts.push({
291
+ iteration,
292
+ outcome: "reverted-no-progress",
293
+ note: "no test went from red to green",
294
+ files_touched: diff.changedPaths,
295
+ });
296
+ stagnation += 1;
297
+ continue;
298
+ }
299
+ // Progress! checkpoint
300
+ commitCheckpoint(ctx, {
301
+ iteration,
302
+ target: currentTarget,
303
+ gains,
304
+ filesTouched: diff.changedPaths,
305
+ });
306
+ greenSet = newGreen;
307
+ redSet = newRed;
308
+ stagnation = 0;
309
+ iterationLogs.push({
310
+ iteration,
311
+ target_test_id: currentTarget,
312
+ outcome: "checkpoint",
313
+ note: `+${gains.length} green`,
314
+ files_touched: diff.changedPaths,
315
+ lines_added: diff.linesAdded,
316
+ lines_removed: diff.linesRemoved,
317
+ spend_delta_usd: turnResult.spendDelta,
318
+ rationale: turnResult.rationale,
319
+ });
320
+ priorAttempts.length = 0;
321
+ // Pick next target from story scope, if any remain
322
+ const next = pickTarget(storyRedSet(), currentTarget);
323
+ currentTarget = next;
324
+ if (!currentTarget) {
325
+ break;
326
+ }
327
+ }
328
+ // Loop exited
329
+ const allStoryGreen = [...expectedTestIds].every((id) => greenSet.has(id));
330
+ if (allStoryGreen) {
331
+ await pushBranch(ctx);
332
+ return {
333
+ kind: "success",
334
+ iterations: iterationLogs.length,
335
+ checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
336
+ spendUsd,
337
+ };
338
+ }
339
+ return haltFor(ctx, {
340
+ reason: "ITERATION_CAP",
341
+ iterations: iterationLogs.length,
342
+ checkpoints: iterationLogs.filter((l) => l.outcome === "checkpoint").length,
343
+ greenCount: greenSet.size,
344
+ totalCount: expectedTestIds.size,
345
+ spendUsd,
346
+ iterationLogs,
347
+ summary: `Reached the ${ctx.maxIterations}-iteration cap with ${iterationLogs.filter((l) => l.outcome === "checkpoint").length} checkpoint(s). ${generateDiagnosis(iterationLogs, greenSet, expectedTestIds)}`,
348
+ });
349
+ }
350
+ async function runTurn(ctx, args) {
351
+ const specYaml = YAML.stringify(ctx.spec);
352
+ const targetFile = ctx.spec.story_id
353
+ ? "(see manifest file for target test location)"
354
+ : "(unknown)";
355
+ const targetFilePath = findTargetTestFile(ctx, args.target) ?? targetFile;
356
+ const userMessage = turnPrompt({
357
+ iteration: args.iteration,
358
+ max_iterations: ctx.maxIterations,
359
+ target_test_id: args.target,
360
+ target_test_file: targetFilePath,
361
+ spec_yaml: specYaml,
362
+ currently_green: args.greenList,
363
+ currently_red: args.redList,
364
+ allowed_paths: ctx.allowedPaths,
365
+ budget_spent_usd: args.spendUsd,
366
+ budget_cap_usd: ctx.budgetUsd,
367
+ previous_attempts: args.priorAttempts.slice(-3),
368
+ });
369
+ const filesTouched = new Set();
370
+ let rationale = "";
371
+ let overflowJustification;
372
+ let spendDelta = 0;
373
+ // Tool-use loop: call the model, execute tool_use blocks, feed tool_results back, repeat
374
+ const messages = [
375
+ { role: "user", content: userMessage },
376
+ ];
377
+ // Safety cap: 12 tool rounds within a single turn (should be plenty; prevents runaway)
378
+ for (let round = 0; round < 12; round++) {
379
+ const response = await ctx.anthropic.messages.create({
380
+ model: ctx.model,
381
+ max_tokens: 4096,
382
+ // cache_control is accepted at runtime but older SDK type defs don't
383
+ // expose it on TextBlockParam; `as never` gets past the structural
384
+ // mismatch the same way refine/llm.ts does.
385
+ system: [
386
+ {
387
+ type: "text",
388
+ text: BREW_SYSTEM,
389
+ cache_control: { type: "ephemeral" },
390
+ },
391
+ ],
392
+ tools: BREW_TOOLS,
393
+ messages,
394
+ });
395
+ spendDelta += costUsdForResponse(response, ctx.model);
396
+ // Capture the assistant turn + any final text
397
+ messages.push({ role: "assistant", content: response.content });
398
+ const toolBlocks = response.content.filter((b) => b.type === "tool_use");
399
+ if (toolBlocks.length === 0) {
400
+ // Text-only ending → extract rationale
401
+ const text = response.content
402
+ .filter((b) => b.type === "text")
403
+ .map((b) => b.text)
404
+ .join("\n")
405
+ .trim();
406
+ rationale = text.slice(0, 2000);
407
+ break;
408
+ }
409
+ const toolResults = [];
410
+ for (const tool of toolBlocks) {
411
+ const result = handleToolUse(ctx, tool);
412
+ if (tool.name === "write_file") {
413
+ const input = tool.input;
414
+ if (input.path)
415
+ filesTouched.add(normalizeRepoPath(ctx, input.path));
416
+ }
417
+ if (tool.name === "justify_diff_overflow") {
418
+ const input = tool.input;
419
+ if (input)
420
+ overflowJustification = input;
421
+ }
422
+ toolResults.push({
423
+ type: "tool_result",
424
+ tool_use_id: tool.id,
425
+ content: result.content,
426
+ is_error: result.is_error,
427
+ });
428
+ }
429
+ messages.push({ role: "user", content: toolResults });
430
+ if (response.stop_reason !== "tool_use")
431
+ break;
432
+ }
433
+ return {
434
+ filesTouched: [...filesTouched],
435
+ rationale,
436
+ spendDelta,
437
+ ...(overflowJustification ? { overflowJustification } : {}),
438
+ };
439
+ }
440
+ function handleToolUse(ctx, tool) {
441
+ const input = tool.input;
442
+ try {
443
+ switch (tool.name) {
444
+ case "read_file": {
445
+ const p = String(input["path"] ?? "");
446
+ const full = resolveRepoPath(ctx, p);
447
+ if (!existsSync(full))
448
+ return { content: `File not found: ${p}`, is_error: true };
449
+ if (!statSync(full).isFile())
450
+ return { content: `Not a file: ${p}`, is_error: true };
451
+ const txt = readFileSync(full, "utf8");
452
+ return { content: txt.length > 20000 ? txt.slice(0, 20000) + "\n…(truncated)" : txt, is_error: false };
453
+ }
454
+ case "list_directory": {
455
+ const p = String(input["path"] ?? "");
456
+ const full = resolveRepoPath(ctx, p);
457
+ if (!existsSync(full))
458
+ return { content: `Not found: ${p}`, is_error: true };
459
+ if (!statSync(full).isDirectory())
460
+ return { content: `Not a directory: ${p}`, is_error: true };
461
+ const entries = readdirSync(full, { withFileTypes: true })
462
+ .map((e) => `${e.name}${e.isDirectory() ? "/" : ""}`)
463
+ .sort()
464
+ .join("\n");
465
+ return { content: entries, is_error: false };
466
+ }
467
+ case "write_file": {
468
+ const p = String(input["path"] ?? "");
469
+ const contents = String(input["contents"] ?? "");
470
+ const full = resolveRepoPath(ctx, p);
471
+ mkdirSync(dirname(full), { recursive: true });
472
+ writeFileSync(full, contents, "utf8");
473
+ return { content: `Wrote ${contents.split("\n").length} lines to ${p}`, is_error: false };
474
+ }
475
+ case "justify_diff_overflow": {
476
+ return { content: "justification recorded", is_error: false };
477
+ }
478
+ default:
479
+ return { content: `Unknown tool: ${tool.name}`, is_error: true };
480
+ }
481
+ }
482
+ catch (e) {
483
+ return { content: `Tool error: ${e.message}`, is_error: true };
484
+ }
485
+ }
486
+ /** ------------------------- Path helpers ------------------------- */
487
+ function resolveRepoPath(ctx, p) {
488
+ if (isAbsolute(p)) {
489
+ // Must stay inside repoRoot
490
+ const rel = relative(ctx.repoRoot, p);
491
+ if (rel.startsWith(".."))
492
+ throw new Error(`path escapes repo: ${p}`);
493
+ return p;
494
+ }
495
+ return resolve(ctx.repoRoot, p);
496
+ }
497
+ function normalizeRepoPath(ctx, p) {
498
+ const full = resolveRepoPath(ctx, p);
499
+ return relative(ctx.repoRoot, full);
500
+ }
501
+ function isFrozenPath(path, frozen) {
502
+ if (frozen.files.includes(path))
503
+ return true;
504
+ for (const d of frozen.directories) {
505
+ const normalized = d.replace(/\/$/, "");
506
+ if (path === normalized || path.startsWith(d) || path.startsWith(normalized + "/")) {
507
+ return true;
508
+ }
509
+ }
510
+ return false;
511
+ }
512
+ function isAllowedPath(path, allowedPaths) {
513
+ if (allowedPaths.length === 0)
514
+ return true;
515
+ for (const ap of allowedPaths) {
516
+ const normalized = ap.replace(/\/$/, "");
517
+ if (path === normalized || path.startsWith(ap) || path.startsWith(normalized + "/")) {
518
+ return true;
519
+ }
520
+ }
521
+ return false;
522
+ }
523
+ function snapshotAllowedPaths(ctx) {
524
+ // Cheap & safe: we snapshot lazily on write. Use an empty map.
525
+ // During the turn, on first write to a path, we capture its pre-write state.
526
+ // Actual implementation: the handleToolUse for write_file could do the snapshotting.
527
+ // For simplicity in this first cut, we do a single git-based diff after the turn.
528
+ void ctx;
529
+ return { files: new Map(), trackedPaths: new Set() };
530
+ }
531
+ function revertToSnapshot(ctx, _snapshot) {
532
+ // Hard reset the working tree to HEAD for files inside allowedPaths + frozenPaths surface,
533
+ // plus any untracked files the agent created. Safe because we committed everything before the turn.
534
+ execSync(`git -C "${ctx.repoRoot}" reset --hard HEAD`, { stdio: "ignore" });
535
+ execSync(`git -C "${ctx.repoRoot}" clean -fd`, { stdio: "ignore" });
536
+ }
537
+ function computeDiff(_snapshot) {
538
+ // Use git to see what changed since HEAD.
539
+ // (We rely on the caller having committed prior state before the turn started.)
540
+ const output = execSync("git diff --numstat HEAD 2>/dev/null || echo ''", {
541
+ encoding: "utf8",
542
+ }).trim();
543
+ const changedPaths = [];
544
+ let linesAdded = 0;
545
+ let linesRemoved = 0;
546
+ for (const line of output.split("\n")) {
547
+ if (!line)
548
+ continue;
549
+ const parts = line.split(/\s+/);
550
+ const added = parts[0] === "-" ? 0 : parseInt(parts[0] ?? "0", 10);
551
+ const removed = parts[1] === "-" ? 0 : parseInt(parts[1] ?? "0", 10);
552
+ const path = parts.slice(2).join(" ");
553
+ if (!path)
554
+ continue;
555
+ changedPaths.push(path);
556
+ linesAdded += isNaN(added) ? 0 : added;
557
+ linesRemoved += isNaN(removed) ? 0 : removed;
558
+ }
559
+ // Also include untracked new files
560
+ const untracked = execSync(`git ls-files --others --exclude-standard 2>/dev/null || echo ''`, { encoding: "utf8" }).trim();
561
+ for (const p of untracked.split("\n").filter(Boolean)) {
562
+ if (!changedPaths.includes(p)) {
563
+ changedPaths.push(p);
564
+ try {
565
+ const content = readFileSync(p, "utf8");
566
+ linesAdded += content.split("\n").length;
567
+ }
568
+ catch {
569
+ // skip
570
+ }
571
+ }
572
+ }
573
+ return {
574
+ changedPaths,
575
+ linesAdded,
576
+ linesRemoved,
577
+ linesTotal: linesAdded + linesRemoved,
578
+ };
579
+ }
580
+ function commitCheckpoint(ctx, args) {
581
+ execSync(`git -C "${ctx.repoRoot}" add -A`, { stdio: "ignore" });
582
+ const msg = `slowcook/brew iter ${args.iteration}: +${args.gains.length} green — target ${args.target}`;
583
+ execSync(`git -C "${ctx.repoRoot}" commit -m ${JSON.stringify(msg)}`, { stdio: "ignore" });
584
+ }
585
+ async function pushBranch(ctx) {
586
+ execSync(`git -C "${ctx.repoRoot}" push --set-upstream origin ${ctx.branchName}`, { stdio: "ignore" });
587
+ void ctx.forge;
588
+ }
589
+ /** ------------------------- Runner + parsers ------------------------- */
590
+ function runTestSuite(ctx) {
591
+ return runTests(ctx.stackConfig, { cwd: ctx.repoRoot });
592
+ }
593
+ /** ------------------------- Target selection ------------------------- */
594
+ function pickTarget(redTests, previous) {
595
+ if (redTests.size === 0)
596
+ return null;
597
+ // Prefer sticking with the previous target if it's still red
598
+ if (previous && redTests.has(previous))
599
+ return previous;
600
+ // Otherwise first by sorted order — deterministic
601
+ return [...redTests].sort()[0] ?? null;
602
+ }
603
+ function findTargetTestFile(ctx, testId) {
604
+ const manifestPath = join(ctx.repoRoot, ".brewing/manifests", `story-${ctx.storyId}.json`);
605
+ if (!existsSync(manifestPath))
606
+ return null;
607
+ try {
608
+ const m = JSON.parse(readFileSync(manifestPath, "utf8"));
609
+ return m.tests.find((t) => t.id === testId)?.file ?? null;
610
+ }
611
+ catch {
612
+ return null;
613
+ }
614
+ }
615
+ /** ------------------------- Cost accounting ------------------------- */
616
+ function costUsdForResponse(response, model) {
617
+ const pricing = matchPricing(model);
618
+ if (!pricing)
619
+ return 0;
620
+ const input = response.usage?.input_tokens ?? 0;
621
+ const output = response.usage?.output_tokens ?? 0;
622
+ // Cache fields aren't in the older SDK type; read via loose cast.
623
+ const usage = response.usage;
624
+ const cacheRead = usage?.cache_read_input_tokens ?? 0;
625
+ const cacheCreate = usage?.cache_creation_input_tokens ?? 0;
626
+ // Anthropic caching: cache reads are ~10% of input; cache creates are ~125%.
627
+ // We approximate — exact pricing depends on model, but this is within ~20%.
628
+ const effectiveInput = (input - cacheRead - cacheCreate) + cacheRead * 0.1 + cacheCreate * 1.25;
629
+ return (effectiveInput / 1_000_000) * pricing.input + (output / 1_000_000) * pricing.output;
630
+ }
631
+ function matchPricing(model) {
632
+ // exact match first
633
+ if (PRICING_PER_M_TOKENS[model])
634
+ return PRICING_PER_M_TOKENS[model];
635
+ // prefix match (e.g., "claude-opus-4-7-20250912" → "claude-opus-4-7")
636
+ for (const key of Object.keys(PRICING_PER_M_TOKENS)) {
637
+ if (model.startsWith(key))
638
+ return PRICING_PER_M_TOKENS[key];
639
+ }
640
+ return null;
641
+ }
642
+ function haltFor(ctx, args) {
643
+ const last3 = (args.iterationLogs ?? [])
644
+ .slice(-3)
645
+ .map((l) => ({
646
+ iteration: l.iteration,
647
+ files_changed: l.files_touched.length,
648
+ lines_added: l.lines_added,
649
+ lines_removed: l.lines_removed,
650
+ outcome: l.outcome === "checkpoint"
651
+ ? "checkpoint"
652
+ : l.outcome === "reverted-regression"
653
+ ? "reverted-regression"
654
+ : l.outcome === "rejected-overflow"
655
+ ? "rejected-overflow"
656
+ : "reverted-no-progress",
657
+ }));
658
+ const lastRationale = (args.iterationLogs ?? []).slice(-1)[0]?.rationale;
659
+ const report = {
660
+ story_id: ctx.storyId,
661
+ halt_reason: args.reason,
662
+ halt_timestamp: ctx.now().toISOString(),
663
+ iterations_run: args.iterations,
664
+ checkpoints_committed: args.checkpoints,
665
+ tests_green: args.greenCount,
666
+ tests_total: args.totalCount,
667
+ tokens_spent_usd: args.spendUsd,
668
+ budget_usd: ctx.budgetUsd,
669
+ model: ctx.model,
670
+ summary_plain_english: args.summary,
671
+ last_three_diffs: last3.length > 0 ? last3 : undefined,
672
+ last_agent_rationale: lastRationale,
673
+ suggested_actions: defaultSuggestedActions(args.reason, {
674
+ budget_usd: ctx.budgetUsd,
675
+ iterations_run: args.iterations,
676
+ }),
677
+ };
678
+ const reportPath = join(ctx.haltDir, `story-${ctx.storyId}-${report.halt_timestamp.replace(/[:.]/g, "-")}.json`);
679
+ writeHaltReport(reportPath, report);
680
+ // Attempt to push partial progress (if any checkpoints exist) so the operator can see what was tried
681
+ if (report.checkpoints_committed > 0) {
682
+ try {
683
+ execSync(`git -C "${ctx.repoRoot}" push --set-upstream origin ${ctx.branchName}`, { stdio: "ignore" });
684
+ }
685
+ catch {
686
+ // best effort
687
+ }
688
+ }
689
+ // Post comment to the source issue if present
690
+ const sourceIssue = ctx.spec.source_issue?.match(/#?(\d+)/)?.[1];
691
+ if (sourceIssue) {
692
+ ctx.forge
693
+ .createIssueComment(parseInt(sourceIssue, 10), haltReportToMarkdown(report))
694
+ .catch(() => {
695
+ /* best effort */
696
+ });
697
+ }
698
+ return { kind: "halted", report };
699
+ }
700
+ function generateDiagnosis(iterationLogs, greenSet, expected) {
701
+ const storyGreen = [...greenSet].filter((t) => expected.has(t)).length;
702
+ if (iterationLogs.length === 0) {
703
+ return "No iterations ran.";
704
+ }
705
+ const checkpoints = iterationLogs.filter((l) => l.outcome === "checkpoint").length;
706
+ const regressions = iterationLogs.filter((l) => l.outcome === "reverted-regression").length;
707
+ const noProgress = iterationLogs.filter((l) => l.outcome === "reverted-no-progress").length;
708
+ if (checkpoints === 0 && noProgress > 0 && regressions === 0) {
709
+ // Classic "wrong layer" signal: agent tried, nothing moved
710
+ return `All ${iterationLogs.length} iterations reverted for no-progress (no test changed from red to green). The target test is likely unreachable from the code the agent is editing — a layer mismatch. Common cause: HTTP-loopback tests that fetch a URL with no running server. See \`.brewing/context.md\` → Testing conventions for the tier-1 (vi.mock) style that brewing can actually ratchet against.`;
711
+ }
712
+ if (regressions > iterationLogs.length / 2) {
713
+ return `${regressions} iterations regressed (broke a previously-green test). Agent may be misunderstanding an invariant — consider clarifying the spec.`;
714
+ }
715
+ if (checkpoints > 0 && storyGreen < expected.size) {
716
+ return `${checkpoints} checkpoints committed; ${storyGreen}/${expected.size} story tests green. Progress was real but incomplete.`;
717
+ }
718
+ return `${checkpoints} checkpoint(s), ${noProgress} no-progress, ${regressions} regression(s).`;
719
+ }
720
+ /** ------------------------- Entry helpers ------------------------- */
721
+ export function readFrozenPaths(repoRoot) {
722
+ const path = join(repoRoot, ".brewing/frozen-paths.json");
723
+ if (!existsSync(path)) {
724
+ return { directories: [], files: [], partial: {} };
725
+ }
726
+ const raw = JSON.parse(readFileSync(path, "utf8"));
727
+ return {
728
+ directories: raw.directories ?? [],
729
+ files: raw.files ?? [],
730
+ partial: raw.partial ?? {},
731
+ };
732
+ }
733
+ export function readStackConfig(repoRoot) {
734
+ const path = join(repoRoot, ".brewing/stack.json");
735
+ const raw = JSON.parse(readFileSync(path, "utf8"));
736
+ return validateStackConfig(raw);
737
+ }
738
+ export function loadSpec(repoRoot, storyId) {
739
+ return readSpec(repoRoot, storyId);
740
+ }
741
+ //# sourceMappingURL=agent.js.map