@towles/tool 0.0.62 → 0.0.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/package.json +50 -57
  2. package/src/commands/agentboard.ts +176 -0
  3. package/src/commands/{auto-claude.ts → auto-claude/index.ts} +18 -28
  4. package/src/commands/auto-claude/list.ts +114 -0
  5. package/src/commands/auto-claude/retry.test.ts +138 -0
  6. package/src/commands/auto-claude/retry.ts +139 -0
  7. package/src/commands/auto-claude/status.test.ts +147 -0
  8. package/src/commands/auto-claude/status.ts +123 -0
  9. package/src/commands/base.ts +7 -2
  10. package/src/commands/config.ts +5 -7
  11. package/src/commands/doctor.ts +111 -12
  12. package/src/commands/gh/branch.ts +4 -4
  13. package/src/commands/gh/pr.ts +1 -0
  14. package/src/commands/graph/index.ts +169 -0
  15. package/src/commands/graph.test.ts +1 -1
  16. package/src/commands/install.ts +40 -68
  17. package/src/commands/journal/daily-notes.ts +3 -3
  18. package/src/commands/journal/meeting.ts +3 -3
  19. package/src/commands/journal/note.ts +3 -3
  20. package/src/lib/auto-claude/claude-cli.ts +183 -0
  21. package/src/lib/auto-claude/config.test.ts +6 -8
  22. package/src/lib/auto-claude/config.ts +3 -4
  23. package/src/lib/auto-claude/index.ts +2 -3
  24. package/src/lib/auto-claude/labels.test.ts +85 -0
  25. package/src/lib/auto-claude/labels.ts +42 -0
  26. package/src/lib/auto-claude/pipeline-execution.test.ts +129 -33
  27. package/src/lib/auto-claude/pipeline.test.ts +2 -2
  28. package/src/lib/auto-claude/pipeline.ts +120 -36
  29. package/src/lib/auto-claude/prompt-templates/01_plan.prompt.md +68 -0
  30. package/src/lib/auto-claude/prompt-templates/{05_implement.prompt.md → 02_implement.prompt.md} +3 -2
  31. package/src/lib/auto-claude/prompt-templates/03_simplify.prompt.md +52 -0
  32. package/src/lib/auto-claude/prompt-templates/{06_review.prompt.md → 04_review.prompt.md} +29 -6
  33. package/src/lib/auto-claude/prompt-templates/index.test.ts +9 -42
  34. package/src/lib/auto-claude/prompt-templates/index.ts +13 -28
  35. package/src/lib/auto-claude/run-claude.test.ts +48 -68
  36. package/src/lib/auto-claude/shell.ts +6 -0
  37. package/src/lib/auto-claude/steps/create-pr.ts +89 -25
  38. package/src/lib/auto-claude/steps/fetch-issues.ts +4 -1
  39. package/src/lib/auto-claude/steps/implement.ts +9 -16
  40. package/src/lib/auto-claude/steps/simple-steps.ts +34 -0
  41. package/src/lib/auto-claude/steps/steps.test.ts +68 -63
  42. package/src/lib/auto-claude/templates.test.ts +91 -0
  43. package/src/lib/auto-claude/templates.ts +34 -0
  44. package/src/lib/auto-claude/test-helpers.ts +2 -1
  45. package/src/lib/auto-claude/utils-execution.test.ts +9 -57
  46. package/src/lib/auto-claude/utils.test.ts +5 -9
  47. package/src/lib/auto-claude/utils.ts +27 -253
  48. package/src/lib/graph/analyzer.test.ts +451 -0
  49. package/src/lib/graph/analyzer.ts +165 -0
  50. package/src/lib/graph/index.ts +24 -0
  51. package/src/lib/graph/labels.ts +87 -0
  52. package/src/lib/graph/parser.test.ts +150 -0
  53. package/src/lib/graph/parser.ts +65 -0
  54. package/src/lib/graph/render.ts +25 -0
  55. package/src/lib/graph/server.ts +70 -0
  56. package/src/lib/graph/sessions.ts +104 -0
  57. package/src/lib/graph/tools.ts +90 -0
  58. package/src/lib/graph/treemap.ts +211 -0
  59. package/src/lib/graph/types.ts +80 -0
  60. package/src/lib/install/claude-settings.ts +64 -0
  61. package/src/lib/journal/editor.ts +33 -0
  62. package/src/lib/journal/fs.ts +13 -0
  63. package/src/lib/journal/index.ts +11 -0
  64. package/src/lib/journal/paths.ts +106 -0
  65. package/src/lib/journal/{utils.ts → templates.ts} +3 -151
  66. package/src/utils/fs.ts +19 -0
  67. package/src/utils/git/exec.ts +18 -0
  68. package/src/utils/git/gh-cli-wrapper.test.ts +47 -8
  69. package/src/utils/git/gh-cli-wrapper.ts +31 -19
  70. package/src/utils/render.ts +3 -1
  71. package/src/commands/graph.ts +0 -970
  72. package/src/lib/auto-claude/prompt-templates/01_research.prompt.md +0 -21
  73. package/src/lib/auto-claude/prompt-templates/02_plan.prompt.md +0 -27
  74. package/src/lib/auto-claude/prompt-templates/03_plan-annotations.prompt.md +0 -15
  75. package/src/lib/auto-claude/prompt-templates/04_plan-implementation.prompt.md +0 -35
  76. package/src/lib/auto-claude/prompt-templates/07_refresh.prompt.md +0 -30
  77. package/src/lib/auto-claude/steps/plan-annotations.ts +0 -54
  78. package/src/lib/auto-claude/steps/plan-implementation.ts +0 -14
  79. package/src/lib/auto-claude/steps/plan.ts +0 -14
  80. package/src/lib/auto-claude/steps/refresh.ts +0 -114
  81. package/src/lib/auto-claude/steps/remove-label.ts +0 -22
  82. package/src/lib/auto-claude/steps/research.ts +0 -21
  83. package/src/lib/auto-claude/steps/review.ts +0 -14
@@ -10,11 +10,9 @@ export const AutoClaudeConfigSchema = z.object({
10
10
  remote: z.string().default("origin"),
11
11
  maxImplementIterations: z.number().default(5),
12
12
  maxTurns: z.number().optional(),
13
+ model: z.string().default("opus"),
14
+ maxReviewRetries: z.number().default(2),
13
15
  loopIntervalMinutes: z.number().default(30),
14
- loopRetryEnabled: z.boolean().default(false),
15
- maxRetries: z.number().default(5),
16
- retryDelayMs: z.number().default(30_000),
17
- maxRetryDelayMs: z.number().default(300_000),
18
16
  });
19
17
 
20
18
  export type AutoClaudeConfig = z.infer<typeof AutoClaudeConfigSchema>;
@@ -49,6 +47,7 @@ export async function initConfig(
49
47
  });
50
48
  mainBranch = result.stdout.trim().replace("refs/remotes/origin/", "");
51
49
  } catch {
50
+ consola.debug("Could not detect default branch from origin/HEAD, defaulting to 'main'");
52
51
  mainBranch = "main";
53
52
  }
54
53
  }
@@ -1,15 +1,14 @@
1
1
  export { type AutoClaudeConfig, AutoClaudeConfigSchema, getConfig, initConfig } from "./config.js";
2
2
  export { STEP_NAMES, runPipeline } from "./pipeline.js";
3
3
  export type { StepName } from "./prompt-templates/index.js";
4
+ export { git } from "../../utils/git/exec.js";
5
+ export { sleep } from "./shell.js";
4
6
  export { fetchIssue, fetchIssues } from "./steps/fetch-issues.js";
5
- export { stepRefresh } from "./steps/refresh.js";
6
7
  export {
7
8
  type IssueContext,
8
9
  buildContextFromArtifacts,
9
10
  buildIssueContext,
10
11
  ensureBranch,
11
- git,
12
12
  log,
13
13
  logBanner,
14
- sleep,
15
14
  } from "./utils.js";
@@ -0,0 +1,85 @@
1
+ import { describe, expect, it, vi, beforeEach } from "vitest";
2
+
3
+ import { execSafe } from "../../utils/git/exec.js";
4
+ import { ensureLabelsExist, LABELS, removeLabel, setLabel } from "./labels";
5
+
6
+ vi.mock("../../utils/git/exec.js", () => ({
7
+ execSafe: vi.fn().mockResolvedValue({ stdout: "", ok: true }),
8
+ }));
9
+
10
+ const mockedExecSafe = vi.mocked(execSafe);
11
+
12
+ describe("LABELS", () => {
13
+ it("has expected label values", () => {
14
+ expect(LABELS.inProgress).toBe("auto-claude-in-progress");
15
+ expect(LABELS.review).toBe("auto-claude-review");
16
+ expect(LABELS.failed).toBe("auto-claude-failed");
17
+ expect(LABELS.success).toBe("auto-claude-success");
18
+ });
19
+
20
+ it("has exactly 4 labels", () => {
21
+ expect(Object.keys(LABELS)).toHaveLength(4);
22
+ });
23
+ });
24
+
25
+ describe("ensureLabelsExist", () => {
26
+ beforeEach(() => {
27
+ vi.clearAllMocks();
28
+ });
29
+
30
+ it("creates all labels with --force", async () => {
31
+ await ensureLabelsExist("owner/repo");
32
+
33
+ expect(mockedExecSafe).toHaveBeenCalledTimes(4);
34
+ for (const label of Object.values(LABELS)) {
35
+ expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
36
+ "label",
37
+ "create",
38
+ label,
39
+ "--repo",
40
+ "owner/repo",
41
+ "--force",
42
+ ]);
43
+ }
44
+ });
45
+ });
46
+
47
+ describe("setLabel", () => {
48
+ beforeEach(() => {
49
+ vi.clearAllMocks();
50
+ });
51
+
52
+ it("calls gh issue edit with --add-label", async () => {
53
+ await setLabel("owner/repo", 42, "auto-claude-in-progress");
54
+
55
+ expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
56
+ "issue",
57
+ "edit",
58
+ "42",
59
+ "--repo",
60
+ "owner/repo",
61
+ "--add-label",
62
+ "auto-claude-in-progress",
63
+ ]);
64
+ });
65
+ });
66
+
67
+ describe("removeLabel", () => {
68
+ beforeEach(() => {
69
+ vi.clearAllMocks();
70
+ });
71
+
72
+ it("calls gh issue edit with --remove-label", async () => {
73
+ await removeLabel("owner/repo", 42, "auto-claude-failed");
74
+
75
+ expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
76
+ "issue",
77
+ "edit",
78
+ "42",
79
+ "--repo",
80
+ "owner/repo",
81
+ "--remove-label",
82
+ "auto-claude-failed",
83
+ ]);
84
+ });
85
+ });
@@ -0,0 +1,42 @@
1
+ import { execSafe } from "../../utils/git/exec.js";
2
+
3
+ // ── Label helpers ──
4
+
5
+ export const LABELS = {
6
+ inProgress: "auto-claude-in-progress",
7
+ review: "auto-claude-review",
8
+ failed: "auto-claude-failed",
9
+ success: "auto-claude-success",
10
+ } as const;
11
+
12
+ export async function ensureLabelsExist(repo: string): Promise<void> {
13
+ await Promise.all(
14
+ Object.values(LABELS).map((label) =>
15
+ execSafe("gh", ["label", "create", label, "--repo", repo, "--force"]),
16
+ ),
17
+ );
18
+ }
19
+
20
+ export async function setLabel(repo: string, issueNumber: number, label: string): Promise<void> {
21
+ await execSafe("gh", [
22
+ "issue",
23
+ "edit",
24
+ String(issueNumber),
25
+ "--repo",
26
+ repo,
27
+ "--add-label",
28
+ label,
29
+ ]);
30
+ }
31
+
32
+ export async function removeLabel(repo: string, issueNumber: number, label: string): Promise<void> {
33
+ await execSafe("gh", [
34
+ "issue",
35
+ "edit",
36
+ String(issueNumber),
37
+ "--repo",
38
+ repo,
39
+ "--remove-label",
40
+ label,
41
+ ]);
42
+ }
@@ -22,9 +22,8 @@ consola.level = -999;
22
22
  let mockClaudeImpl: MockClaudeImpl = null;
23
23
  vi.mock("./spawn-claude", () => createSpawnClaudeMock(() => mockClaudeImpl));
24
24
 
25
- // ── Mock tinyexec: intercept "gh" calls, pass through git ──
26
-
27
- let mockGhImpl: ((args: string[]) => Promise<{ stdout: string; exitCode: number }>) | null = null;
25
+ // Track gh calls for label assertions
26
+ let ghCalls: string[][] = [];
28
27
 
29
28
  vi.mock("tinyexec", async (importOriginal) => {
30
29
  const original = await importOriginal<typeof import("tinyexec")>();
@@ -36,11 +35,10 @@ vi.mock("tinyexec", async (importOriginal) => {
36
35
  args: string[],
37
36
  opts?: Record<string, unknown>,
38
37
  ): Promise<{ stdout: string; exitCode: number }> => {
39
- if (cmd === "gh" && mockGhImpl) {
40
- return mockGhImpl(args);
41
- }
42
38
  if (cmd === "gh") {
43
- throw new Error("Unexpected gh call -- set mockGhImpl");
39
+ ghCalls.push(args);
40
+ // Return empty success for label/issue/pr commands
41
+ return { stdout: "[]", exitCode: 0 };
44
42
  }
45
43
  return original.x(cmd, args, opts as never) as unknown as Promise<{
46
44
  stdout: string;
@@ -63,11 +61,11 @@ describe("runPipeline", () => {
63
61
  await initConfig({
64
62
  repo: "test/repo",
65
63
  mainBranch: "main",
66
- maxImplementIterations: 2,
64
+ maxReviewRetries: 2,
67
65
  });
68
66
  ctx = buildTestContext(repo.dir);
69
67
  mockClaudeImpl = null;
70
- mockGhImpl = null;
68
+ ghCalls = [];
71
69
  });
72
70
 
73
71
  afterEach(() => {
@@ -109,16 +107,16 @@ describe("runPipeline", () => {
109
107
  const { runPipeline } = await import("./pipeline");
110
108
 
111
109
  let claudeCallCount = 0;
112
- const researchPath = join(ctx.issueDir, ARTIFACTS.research);
110
+ const planPath = join(ctx.issueDir, ARTIFACTS.plan);
113
111
 
114
112
  mockClaudeImpl = () => {
115
113
  claudeCallCount++;
116
114
  mkdirSync(ctx.issueDir, { recursive: true });
117
- writeFileSync(researchPath, "x".repeat(250));
115
+ writeFileSync(planPath, "# Plan\n\nDetailed plan.");
118
116
  return { stdout: successClaudeJson(), exitCode: 0 };
119
117
  };
120
118
 
121
- await runPipeline(ctx, "research");
119
+ await runPipeline(ctx, "plan");
122
120
 
123
121
  expect(claudeCallCount).toBe(1);
124
122
  });
@@ -137,7 +135,7 @@ describe("runPipeline", () => {
137
135
  expect(currentBranch).toBe("main");
138
136
  });
139
137
 
140
- it("runs all steps in order when all succeed", async () => {
138
+ it("runs all 4 steps in order when review passes", async () => {
141
139
  const { runPipeline } = await import("./pipeline");
142
140
 
143
141
  let claudeCallCount = 0;
@@ -147,45 +145,143 @@ describe("runPipeline", () => {
147
145
 
148
146
  switch (claudeCallCount) {
149
147
  case 1:
150
- writeFileSync(join(ctx.issueDir, ARTIFACTS.research), "x".repeat(250));
148
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
151
149
  break;
152
150
  case 2:
153
- writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
151
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
154
152
  break;
155
153
  case 3:
156
- writeFileSync(join(ctx.issueDir, ARTIFACTS.planImplementation), "# Impl Plan");
154
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
157
155
  break;
158
156
  case 4:
157
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nLooks good.");
158
+ break;
159
+ }
160
+ return { stdout: successClaudeJson(), exitCode: 0 };
161
+ };
162
+
163
+ await runPipeline(ctx);
164
+
165
+ expect(claudeCallCount).toBe(4);
166
+
167
+ // Verify auto-claude-success and auto-claude-review labels were set
168
+ const successLabelCall = ghCalls.find(
169
+ (args) => args.includes("--add-label") && args.includes("auto-claude-success"),
170
+ );
171
+ expect(successLabelCall).toBeDefined();
172
+ const reviewLabelCall = ghCalls.find(
173
+ (args) => args.includes("--add-label") && args.includes("auto-claude-review"),
174
+ );
175
+ expect(reviewLabelCall).toBeDefined();
176
+ });
177
+
178
+ it("retries implement→simplify→review on review fail then pass", async () => {
179
+ const { runPipeline } = await import("./pipeline");
180
+
181
+ let claudeCallCount = 0;
182
+ mockClaudeImpl = () => {
183
+ claudeCallCount++;
184
+ mkdirSync(ctx.issueDir, { recursive: true });
185
+
186
+ switch (claudeCallCount) {
187
+ case 1: // plan
188
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
189
+ break;
190
+ case 2: // implement (attempt 1)
159
191
  writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
160
192
  break;
161
- case 5:
162
- writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "# Review\nLooks good.");
193
+ case 3: // simplify (attempt 1)
194
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
195
+ break;
196
+ case 4: // review (attempt 1 - FAIL)
197
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "FAIL\n\nNeeds work.");
198
+ break;
199
+ case 5: // implement (attempt 2)
200
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done v2");
201
+ break;
202
+ case 6: // simplify (attempt 2)
203
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified v2");
204
+ break;
205
+ case 7: // review (attempt 2 - PASS)
206
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nGood now.");
163
207
  break;
164
208
  }
165
209
  return { stdout: successClaudeJson(), exitCode: 0 };
166
210
  };
167
211
 
168
- let ghCallCount = 0;
169
- mockGhImpl = async (args: string[]) => {
170
- ghCallCount++;
171
- if (args[0] === "pr" && args[1] === "list") {
172
- return { stdout: "[]", exitCode: 0 };
212
+ await runPipeline(ctx);
213
+
214
+ // 1 plan + 3 steps * 2 attempts = 7
215
+ expect(claudeCallCount).toBe(7);
216
+ });
217
+
218
+ it("sets auto-claude-failed label after max retries exhausted", async () => {
219
+ const { runPipeline } = await import("./pipeline");
220
+
221
+ let claudeCallCount = 0;
222
+ mockClaudeImpl = () => {
223
+ claudeCallCount++;
224
+ mkdirSync(ctx.issueDir, { recursive: true });
225
+
226
+ // Plan
227
+ if (claudeCallCount === 1) {
228
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
229
+ return { stdout: successClaudeJson(), exitCode: 0 };
173
230
  }
174
- if (args[0] === "pr" && args[1] === "create") {
175
- return { stdout: "https://github.com/test/repo/pull/1", exitCode: 0 };
231
+
232
+ // Each retry cycle: implement, simplify, review (always FAIL)
233
+ const stepInCycle = (claudeCallCount - 2) % 3;
234
+ switch (stepInCycle) {
235
+ case 0:
236
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
237
+ break;
238
+ case 1:
239
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
240
+ break;
241
+ case 2:
242
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "FAIL\n\nStill bad.");
243
+ break;
176
244
  }
177
- return { stdout: "", exitCode: 0 };
245
+ return { stdout: successClaudeJson(), exitCode: 0 };
178
246
  };
179
247
 
180
248
  await runPipeline(ctx);
181
249
 
182
- const prUrlPath = join(ctx.issueDir, ARTIFACTS.prUrl);
183
- if (existsSync(prUrlPath)) {
184
- const prUrl = readFileSync(prUrlPath, "utf-8");
185
- expect(prUrl).toContain("github.com");
186
- }
250
+ // 1 plan + 3 steps * 3 attempts (maxReviewRetries=2 → 3 total) = 10
251
+ expect(claudeCallCount).toBe(10);
252
+
253
+ // Verify auto-claude-failed label was set
254
+ const failedLabelCall = ghCalls.find(
255
+ (args) => args.includes("--add-label") && args.includes("auto-claude-failed"),
256
+ );
257
+ expect(failedLabelCall).toBeDefined();
258
+
259
+ // Verify issue comment was posted
260
+ const commentCall = ghCalls.find((args) => args[0] === "issue" && args[1] === "comment");
261
+ expect(commentCall).toBeDefined();
262
+ });
263
+
264
+ it("--until implement stops after implement step", async () => {
265
+ const { runPipeline } = await import("./pipeline");
266
+
267
+ let claudeCallCount = 0;
268
+ mockClaudeImpl = () => {
269
+ claudeCallCount++;
270
+ mkdirSync(ctx.issueDir, { recursive: true });
271
+
272
+ switch (claudeCallCount) {
273
+ case 1:
274
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
275
+ break;
276
+ case 2:
277
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
278
+ break;
279
+ }
280
+ return { stdout: successClaudeJson(), exitCode: 0 };
281
+ };
282
+
283
+ await runPipeline(ctx, "implement");
187
284
 
188
- expect(claudeCallCount).toBe(5);
189
- expect(ghCallCount).toBeGreaterThanOrEqual(2);
285
+ expect(claudeCallCount).toBe(2);
190
286
  });
191
287
  });
@@ -8,7 +8,7 @@ describe("STEP_NAMES", () => {
8
8
  expect(STEP_NAMES).toEqual(PIPELINE_STEPS.map((s) => s.name));
9
9
  });
10
10
 
11
- it("should have 8 steps", () => {
12
- expect(STEP_NAMES).toHaveLength(8);
11
+ it("should have 4 steps", () => {
12
+ expect(STEP_NAMES).toHaveLength(4);
13
13
  });
14
14
  });
@@ -1,33 +1,23 @@
1
+ import { rmSync } from "node:fs";
1
2
  import { join } from "node:path";
2
3
 
3
4
  import { getConfig } from "./config.js";
4
- import { ARTIFACTS, PIPELINE_STEPS } from "./prompt-templates/index.js";
5
+ import { ARTIFACTS } from "./prompt-templates/index.js";
5
6
  import type { StepName } from "./prompt-templates/index.js";
6
- import { stepCreatePR } from "./steps/create-pr.js";
7
+ import { createPr } from "./steps/create-pr.js";
7
8
  import { stepImplement } from "./steps/implement.js";
8
- import { stepPlanAnnotations } from "./steps/plan-annotations.js";
9
- import { stepPlanImplementation } from "./steps/plan-implementation.js";
10
- import { stepPlan } from "./steps/plan.js";
11
- import { stepRemoveLabel } from "./steps/remove-label.js";
12
- import { stepResearch } from "./steps/research.js";
13
- import { stepReview } from "./steps/review.js";
14
- import { ensureDir, fileExists, git, log, readFile, writeFile } from "./utils.js";
9
+ import { stepPlan, stepReview, stepSimplify } from "./steps/simple-steps.js";
10
+ import { LABELS, ensureLabelsExist, removeLabel, setLabel } from "./labels.js";
11
+ import { ensureDir, fileExists, readFile, writeFile } from "../../utils/fs.js";
12
+ import { execSafe, git } from "../../utils/git/exec.js";
13
+ import { ghRaw } from "../../utils/git/gh-cli-wrapper.js";
14
+ import { log } from "./utils.js";
15
15
  import type { IssueContext } from "./utils.js";
16
16
 
17
- const STEP_RUNNERS: Record<StepName, (ctx: IssueContext) => Promise<boolean>> = {
18
- research: stepResearch,
19
- plan: stepPlan,
20
- "plan-annotations": stepPlanAnnotations,
21
- "plan-implementation": stepPlanImplementation,
22
- implement: stepImplement,
23
- review: stepReview,
24
- "create-pr": stepCreatePR,
25
- "remove-label": stepRemoveLabel,
26
- };
27
-
28
17
  export { type StepName, STEP_NAMES } from "./prompt-templates/index.js";
29
18
 
30
19
  export async function runPipeline(ctx: IssueContext, untilStep?: StepName): Promise<void> {
20
+ const cfg = getConfig();
31
21
  log(`Pipeline starting for ${ctx.repo}#${ctx.number}: ${ctx.title}`);
32
22
 
33
23
  ensureDir(ctx.issueDir);
@@ -38,30 +28,124 @@ export async function runPipeline(ctx: IssueContext, untilStep?: StepName): Prom
38
28
  log("Saved initial-ramblings.md");
39
29
  }
40
30
 
41
- for (const step of PIPELINE_STEPS) {
42
- const runner = STEP_RUNNERS[step.name];
43
- const success = await runner(ctx);
31
+ // Label management
32
+ await ensureLabelsExist(ctx.repo);
33
+ await removeLabel(ctx.repo, ctx.number, cfg.triggerLabel);
34
+ await setLabel(ctx.repo, ctx.number, LABELS.inProgress);
44
35
 
45
- if (!success) {
46
- log(`Pipeline stopped at "${step.name}" for ${ctx.repo}#${ctx.number}`);
47
- await checkoutMain();
36
+ try {
37
+ // Step 1: Plan (runs once)
38
+ if (!(await stepPlan(ctx))) {
39
+ await handleFailure(ctx, "plan");
48
40
  return;
49
41
  }
50
-
51
- if (untilStep && step.name === untilStep) {
52
- log(`Pipeline paused after "${step.name}" (--until ${untilStep})`);
53
- await checkoutMain();
42
+ if (untilStep === "plan") {
43
+ log(`Pipeline paused after "plan" (--until plan)`);
54
44
  return;
55
45
  }
46
+
47
+ // Steps 2-4: Implement → Simplify → Review loop
48
+ const maxRetries = cfg.maxReviewRetries;
49
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
50
+ // Clear previous iteration artifacts (except plan)
51
+ if (attempt > 0) {
52
+ clearArtifact(ctx, ARTIFACTS.completedSummary);
53
+ clearArtifact(ctx, ARTIFACTS.simplifySummary);
54
+ clearArtifact(ctx, ARTIFACTS.review);
55
+ }
56
+
57
+ // Implement
58
+ if (!(await stepImplement(ctx))) {
59
+ await handleFailure(ctx, "implement");
60
+ return;
61
+ }
62
+ if (untilStep === "implement") {
63
+ log(`Pipeline paused after "implement" (--until implement)`);
64
+ return;
65
+ }
66
+
67
+ // Simplify
68
+ if (!(await stepSimplify(ctx))) {
69
+ await handleFailure(ctx, "simplify");
70
+ return;
71
+ }
72
+ if (untilStep === "simplify") {
73
+ log(`Pipeline paused after "simplify" (--until simplify)`);
74
+ return;
75
+ }
76
+
77
+ // Review
78
+ if (!(await stepReview(ctx))) {
79
+ await handleFailure(ctx, "review");
80
+ return;
81
+ }
82
+ if (untilStep === "review") {
83
+ log(`Pipeline paused after "review" (--until review)`);
84
+ return;
85
+ }
86
+
87
+ // Check review result
88
+ if (isReviewPass(ctx)) {
89
+ const prUrl = await createPr(ctx);
90
+ await removeLabel(ctx.repo, ctx.number, LABELS.inProgress);
91
+ await setLabel(ctx.repo, ctx.number, LABELS.success);
92
+ await setLabel(ctx.repo, ctx.number, LABELS.review);
93
+ log(`Pipeline complete for ${ctx.repo}#${ctx.number} — ${prUrl}`);
94
+ return;
95
+ }
96
+
97
+ // Review failed
98
+ if (attempt < maxRetries) {
99
+ log(
100
+ `Review did not pass (attempt ${attempt + 1}/${maxRetries + 1}), retrying implement→simplify→review…`,
101
+ );
102
+ }
103
+ }
104
+
105
+ // All retries exhausted
106
+ await handleFailure(
107
+ ctx,
108
+ "review",
109
+ `auto-claude: review did not pass after ${maxRetries + 1} attempts. Labelled \`${LABELS.failed}\`.`,
110
+ );
111
+ } finally {
112
+ await checkoutMain();
56
113
  }
114
+ }
115
+
116
+ function clearArtifact(ctx: IssueContext, artifact: string): void {
117
+ rmSync(join(ctx.issueDir, artifact), { force: true });
118
+ }
57
119
 
58
- const prUrlPath = join(ctx.issueDir, ARTIFACTS.prUrl);
59
- const prUrl = fileExists(prUrlPath) ? readFile(prUrlPath).trim() : "";
60
- const prSuffix = prUrl ? ` — ${prUrl}` : "";
61
- log(`Pipeline complete for ${ctx.repo}#${ctx.number}${prSuffix}`);
62
- await checkoutMain();
120
+ function isReviewPass(ctx: IssueContext): boolean {
121
+ const reviewPath = join(ctx.issueDir, ARTIFACTS.review);
122
+ if (!fileExists(reviewPath)) return false;
123
+ const content = readFile(reviewPath);
124
+ const firstLine = content.split("\n")[0].trim().toUpperCase();
125
+ return firstLine === "PASS";
126
+ }
127
+
128
+ async function handleFailure(ctx: IssueContext, stepName: string, comment?: string): Promise<void> {
129
+ await removeLabel(ctx.repo, ctx.number, LABELS.inProgress);
130
+ await setLabel(ctx.repo, ctx.number, LABELS.failed);
131
+ if (comment) {
132
+ await ghRaw(["issue", "comment", String(ctx.number), "--repo", ctx.repo, "--body", comment]);
133
+ }
134
+ log(`Pipeline stopped at "${stepName}" for ${ctx.repo}#${ctx.number}`);
63
135
  }
64
136
 
65
137
  async function checkoutMain(): Promise<void> {
66
- await git(["checkout", getConfig().mainBranch]).catch(() => {});
138
+ await git(["checkout", getConfig().mainBranch]).catch(() => {
139
+ // Best-effort checkout — may fail if branch doesn't exist locally yet
140
+ });
141
+
142
+ const stashList = await execSafe("git", ["stash", "list"]);
143
+ if (stashList.ok) {
144
+ const lines = stashList.stdout.split("\n");
145
+ const idx = lines.findIndex((l) => l.includes("auto-claude: before switching to"));
146
+ if (idx >= 0) {
147
+ await execSafe("git", ["stash", "pop", `stash@{${idx}}`]);
148
+ log("Restored stashed changes");
149
+ }
150
+ }
67
151
  }
@@ -0,0 +1,68 @@
1
+ You are a planning agent. Your job is to research the issue, explore the codebase, and produce a detailed implementation plan.
2
+
3
+ The issue is in @{{ISSUE_DIR}}/initial-ramblings.md.
4
+ The code lives primarily at `{{SCOPE_PATH}}/`.
5
+ The base branch is `{{MAIN_BRANCH}}`.
6
+
7
+ ## Phase 1: Research
8
+
9
+ 1. Read the issue description thoroughly — understand the problem, requirements, and constraints
10
+ 2. Explore the relevant areas of the codebase:
11
+ - Find files related to the feature/bug area
12
+ - Read existing implementations of similar patterns
13
+ - Identify dependencies, imports, and shared utilities
14
+ - Check for existing tests in the area
15
+ 3. Read the project's CLAUDE.md for coding conventions, test commands, and architecture guidance
16
+
17
+ ## Phase 2: Design
18
+
19
+ 1. Identify the approach — what needs to change and why
20
+ 2. Consider alternatives and trade-offs — pick the simplest path that meets requirements
21
+ 3. Identify risks, edge cases, and things that could go wrong
22
+ 4. Determine test strategy — what needs testing and how
23
+
24
+ ## Phase 3: Write the Plan
25
+
26
+ Write the plan to @{{ISSUE_DIR}}/plan.md with this structure:
27
+
28
+ ```markdown
29
+ # Plan: <concise title>
30
+
31
+ ## Summary
32
+
33
+ 1-3 sentence description of the change and why it's needed.
34
+
35
+ ## Approach
36
+
37
+ High-level description of the solution strategy.
38
+
39
+ ## Files to Change
40
+
41
+ - `path/to/file.ext` — what changes and why
42
+ - `path/to/new-file.ext` — (new) purpose
43
+ - `path/to/deleted.ext` — (delete) reason
44
+
45
+ ## Implementation Checklist
46
+
47
+ - [ ] Task 1 — specific, actionable description
48
+ - [ ] Task 2 — include file paths where relevant
49
+ - [ ] Task 3 — tests: describe what to test
50
+ - [ ] ...
51
+
52
+ ## Test Strategy
53
+
54
+ How to verify the implementation is correct. Which behaviors need test coverage.
55
+
56
+ ## Risks / Edge Cases
57
+
58
+ Anything the implementer should watch out for.
59
+ ```
60
+
61
+ ## Guidelines
62
+
63
+ - The checklist is the implementer's single source of truth — make every task actionable and unambiguous
64
+ - Include file paths in tasks so the implementer doesn't have to search
65
+ - Order tasks logically — dependencies before dependents, types/interfaces before implementations
66
+ - Include test tasks inline (not as a separate phase) — test each behavior near the task that creates it
67
+ - Keep it focused — don't over-plan. If a task is straightforward, a single line is enough
68
+ - Follow the project's coding conventions from CLAUDE.md
@@ -1,8 +1,9 @@
1
- You are an implementation agent. Follow the checklist in @{{ISSUE_DIR}}/plan-implementation.md task by task.
1
+ You are an implementation agent. Follow the checklist in @{{ISSUE_DIR}}/plan.md task by task.
2
2
 
3
- The issue is in @{{ISSUE_DIR}}/initial-ramblings.md — background context only. Your ONLY source of truth is the checklist.
3
+ The issue is in @{{ISSUE_DIR}}/initial-ramblings.md — background context only. Your ONLY source of truth is the checklist in plan.md.
4
4
 
5
5
  The code lives primarily at `{{SCOPE_PATH}}/`.
6
+ {{REVIEW_FEEDBACK}}
6
7
 
7
8
  ## How to work
8
9