npm - @towles/tool - Versions diffs - 0.0.62 → 0.0.63 - Mend

@towles/tool 0.0.62 → 0.0.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/package.json +50 -57
package/src/commands/agentboard.ts +176 -0
package/src/commands/{auto-claude.ts → auto-claude/index.ts} +18 -28
package/src/commands/auto-claude/list.ts +114 -0
package/src/commands/auto-claude/retry.test.ts +138 -0
package/src/commands/auto-claude/retry.ts +139 -0
package/src/commands/auto-claude/status.test.ts +147 -0
package/src/commands/auto-claude/status.ts +123 -0
package/src/commands/base.ts +7 -2
package/src/commands/config.ts +5 -7
package/src/commands/doctor.ts +111 -12
package/src/commands/gh/branch.ts +4 -4
package/src/commands/gh/pr.ts +1 -0
package/src/commands/graph/index.ts +169 -0
package/src/commands/graph.test.ts +1 -1
package/src/commands/install.ts +40 -68
package/src/commands/journal/daily-notes.ts +3 -3
package/src/commands/journal/meeting.ts +3 -3
package/src/commands/journal/note.ts +3 -3
package/src/lib/auto-claude/claude-cli.ts +183 -0
package/src/lib/auto-claude/config.test.ts +6 -8
package/src/lib/auto-claude/config.ts +3 -4
package/src/lib/auto-claude/index.ts +2 -3
package/src/lib/auto-claude/labels.test.ts +85 -0
package/src/lib/auto-claude/labels.ts +42 -0
package/src/lib/auto-claude/pipeline-execution.test.ts +129 -33
package/src/lib/auto-claude/pipeline.test.ts +2 -2
package/src/lib/auto-claude/pipeline.ts +120 -36
package/src/lib/auto-claude/prompt-templates/01_plan.prompt.md +68 -0
package/src/lib/auto-claude/prompt-templates/{05_implement.prompt.md → 02_implement.prompt.md} +3 -2
package/src/lib/auto-claude/prompt-templates/03_simplify.prompt.md +52 -0
package/src/lib/auto-claude/prompt-templates/{06_review.prompt.md → 04_review.prompt.md} +29 -6
package/src/lib/auto-claude/prompt-templates/index.test.ts +9 -42
package/src/lib/auto-claude/prompt-templates/index.ts +13 -28
package/src/lib/auto-claude/run-claude.test.ts +48 -68
package/src/lib/auto-claude/shell.ts +6 -0
package/src/lib/auto-claude/steps/create-pr.ts +89 -25
package/src/lib/auto-claude/steps/fetch-issues.ts +4 -1
package/src/lib/auto-claude/steps/implement.ts +9 -16
package/src/lib/auto-claude/steps/simple-steps.ts +34 -0
package/src/lib/auto-claude/steps/steps.test.ts +68 -63
package/src/lib/auto-claude/templates.test.ts +91 -0
package/src/lib/auto-claude/templates.ts +34 -0
package/src/lib/auto-claude/test-helpers.ts +2 -1
package/src/lib/auto-claude/utils-execution.test.ts +9 -57
package/src/lib/auto-claude/utils.test.ts +5 -9
package/src/lib/auto-claude/utils.ts +27 -253
package/src/lib/graph/analyzer.test.ts +451 -0
package/src/lib/graph/analyzer.ts +165 -0
package/src/lib/graph/index.ts +24 -0
package/src/lib/graph/labels.ts +87 -0
package/src/lib/graph/parser.test.ts +150 -0
package/src/lib/graph/parser.ts +65 -0
package/src/lib/graph/render.ts +25 -0
package/src/lib/graph/server.ts +70 -0
package/src/lib/graph/sessions.ts +104 -0
package/src/lib/graph/tools.ts +90 -0
package/src/lib/graph/treemap.ts +211 -0
package/src/lib/graph/types.ts +80 -0
package/src/lib/install/claude-settings.ts +64 -0
package/src/lib/journal/editor.ts +33 -0
package/src/lib/journal/fs.ts +13 -0
package/src/lib/journal/index.ts +11 -0
package/src/lib/journal/paths.ts +106 -0
package/src/lib/journal/{utils.ts → templates.ts} +3 -151
package/src/utils/fs.ts +19 -0
package/src/utils/git/exec.ts +18 -0
package/src/utils/git/gh-cli-wrapper.test.ts +47 -8
package/src/utils/git/gh-cli-wrapper.ts +31 -19
package/src/utils/render.ts +3 -1
package/src/commands/graph.ts +0 -970
package/src/lib/auto-claude/prompt-templates/01_research.prompt.md +0 -21
package/src/lib/auto-claude/prompt-templates/02_plan.prompt.md +0 -27
package/src/lib/auto-claude/prompt-templates/03_plan-annotations.prompt.md +0 -15
package/src/lib/auto-claude/prompt-templates/04_plan-implementation.prompt.md +0 -35
package/src/lib/auto-claude/prompt-templates/07_refresh.prompt.md +0 -30
package/src/lib/auto-claude/steps/plan-annotations.ts +0 -54
package/src/lib/auto-claude/steps/plan-implementation.ts +0 -14
package/src/lib/auto-claude/steps/plan.ts +0 -14
package/src/lib/auto-claude/steps/refresh.ts +0 -114
package/src/lib/auto-claude/steps/remove-label.ts +0 -22
package/src/lib/auto-claude/steps/research.ts +0 -21
package/src/lib/auto-claude/steps/review.ts +0 -14

package/src/lib/auto-claude/config.ts CHANGED Viewed

@@ -10,11 +10,9 @@ export const AutoClaudeConfigSchema = z.object({
   remote: z.string().default("origin"),
   maxImplementIterations: z.number().default(5),
   maxTurns: z.number().optional(),
+  model: z.string().default("opus"),
+  maxReviewRetries: z.number().default(2),
   loopIntervalMinutes: z.number().default(30),
-  loopRetryEnabled: z.boolean().default(false),
-  maxRetries: z.number().default(5),
-  retryDelayMs: z.number().default(30_000),
-  maxRetryDelayMs: z.number().default(300_000),
 });
 export type AutoClaudeConfig = z.infer<typeof AutoClaudeConfigSchema>;
@@ -49,6 +47,7 @@ export async function initConfig(
       });
       mainBranch = result.stdout.trim().replace("refs/remotes/origin/", "");
     } catch {
+      consola.debug("Could not detect default branch from origin/HEAD, defaulting to 'main'");
       mainBranch = "main";
     }
   }

package/src/lib/auto-claude/index.ts CHANGED Viewed

@@ -1,15 +1,14 @@
 export { type AutoClaudeConfig, AutoClaudeConfigSchema, getConfig, initConfig } from "./config.js";
 export { STEP_NAMES, runPipeline } from "./pipeline.js";
 export type { StepName } from "./prompt-templates/index.js";
+export { git } from "../../utils/git/exec.js";
+export { sleep } from "./shell.js";
 export { fetchIssue, fetchIssues } from "./steps/fetch-issues.js";
-export { stepRefresh } from "./steps/refresh.js";
 export {
   type IssueContext,
   buildContextFromArtifacts,
   buildIssueContext,
   ensureBranch,
-  git,
   log,
   logBanner,
-  sleep,
 } from "./utils.js";

package/src/lib/auto-claude/labels.test.ts ADDED Viewed

@@ -0,0 +1,85 @@
+import { describe, expect, it, vi, beforeEach } from "vitest";
+import { execSafe } from "../../utils/git/exec.js";
+import { ensureLabelsExist, LABELS, removeLabel, setLabel } from "./labels";
+vi.mock("../../utils/git/exec.js", () => ({
+  execSafe: vi.fn().mockResolvedValue({ stdout: "", ok: true }),
+}));
+const mockedExecSafe = vi.mocked(execSafe);
+describe("LABELS", () => {
+  it("has expected label values", () => {
+    expect(LABELS.inProgress).toBe("auto-claude-in-progress");
+    expect(LABELS.review).toBe("auto-claude-review");
+    expect(LABELS.failed).toBe("auto-claude-failed");
+    expect(LABELS.success).toBe("auto-claude-success");
+  });
+  it("has exactly 4 labels", () => {
+    expect(Object.keys(LABELS)).toHaveLength(4);
+  });
+});
+describe("ensureLabelsExist", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+  it("creates all labels with --force", async () => {
+    await ensureLabelsExist("owner/repo");
+    expect(mockedExecSafe).toHaveBeenCalledTimes(4);
+    for (const label of Object.values(LABELS)) {
+      expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
+        "label",
+        "create",
+        label,
+        "--repo",
+        "owner/repo",
+        "--force",
+      ]);
+    }
+  });
+});
+describe("setLabel", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+  it("calls gh issue edit with --add-label", async () => {
+    await setLabel("owner/repo", 42, "auto-claude-in-progress");
+    expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
+      "issue",
+      "edit",
+      "42",
+      "--repo",
+      "owner/repo",
+      "--add-label",
+      "auto-claude-in-progress",
+    ]);
+  });
+});
+describe("removeLabel", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+  it("calls gh issue edit with --remove-label", async () => {
+    await removeLabel("owner/repo", 42, "auto-claude-failed");
+    expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
+      "issue",
+      "edit",
+      "42",
+      "--repo",
+      "owner/repo",
+      "--remove-label",
+      "auto-claude-failed",
+    ]);
+  });
+});

package/src/lib/auto-claude/labels.ts ADDED Viewed

@@ -0,0 +1,42 @@
+import { execSafe } from "../../utils/git/exec.js";
+// ── Label helpers ──
+export const LABELS = {
+  inProgress: "auto-claude-in-progress",
+  review: "auto-claude-review",
+  failed: "auto-claude-failed",
+  success: "auto-claude-success",
+} as const;
+export async function ensureLabelsExist(repo: string): Promise<void> {
+  await Promise.all(
+    Object.values(LABELS).map((label) =>
+      execSafe("gh", ["label", "create", label, "--repo", repo, "--force"]),
+    ),
+  );
+}
+export async function setLabel(repo: string, issueNumber: number, label: string): Promise<void> {
+  await execSafe("gh", [
+    "issue",
+    "edit",
+    String(issueNumber),
+    "--repo",
+    repo,
+    "--add-label",
+    label,
+  ]);
+}
+export async function removeLabel(repo: string, issueNumber: number, label: string): Promise<void> {
+  await execSafe("gh", [
+    "issue",
+    "edit",
+    String(issueNumber),
+    "--repo",
+    repo,
+    "--remove-label",
+    label,
+  ]);
+}

package/src/lib/auto-claude/pipeline-execution.test.ts CHANGED Viewed

@@ -22,9 +22,8 @@ consola.level = -999;
 let mockClaudeImpl: MockClaudeImpl = null;
 vi.mock("./spawn-claude", () => createSpawnClaudeMock(() => mockClaudeImpl));
-// ── Mock tinyexec: intercept "gh" calls, pass through git ──
-let mockGhImpl: ((args: string[]) => Promise<{ stdout: string; exitCode: number }>) | null = null;
+// Track gh calls for label assertions
+let ghCalls: string[][] = [];
 vi.mock("tinyexec", async (importOriginal) => {
   const original = await importOriginal<typeof import("tinyexec")>();
@@ -36,11 +35,10 @@ vi.mock("tinyexec", async (importOriginal) => {
         args: string[],
         opts?: Record<string, unknown>,
       ): Promise<{ stdout: string; exitCode: number }> => {
-        if (cmd === "gh" && mockGhImpl) {
-          return mockGhImpl(args);
-        }
         if (cmd === "gh") {
-          throw new Error("Unexpected gh call -- set mockGhImpl");
+          ghCalls.push(args);
+          // Return empty success for label/issue/pr commands
+          return { stdout: "[]", exitCode: 0 };
         }
         return original.x(cmd, args, opts as never) as unknown as Promise<{
           stdout: string;
@@ -63,11 +61,11 @@ describe("runPipeline", () => {
     await initConfig({
       repo: "test/repo",
       mainBranch: "main",
-      maxImplementIterations: 2,
+      maxReviewRetries: 2,
     });
     ctx = buildTestContext(repo.dir);
     mockClaudeImpl = null;
-    mockGhImpl = null;
+    ghCalls = [];
   });
   afterEach(() => {
@@ -109,16 +107,16 @@ describe("runPipeline", () => {
     const { runPipeline } = await import("./pipeline");
     let claudeCallCount = 0;
-    const researchPath = join(ctx.issueDir, ARTIFACTS.research);
+    const planPath = join(ctx.issueDir, ARTIFACTS.plan);
     mockClaudeImpl = () => {
       claudeCallCount++;
       mkdirSync(ctx.issueDir, { recursive: true });
-      writeFileSync(researchPath, "x".repeat(250));
+      writeFileSync(planPath, "# Plan\n\nDetailed plan.");
       return { stdout: successClaudeJson(), exitCode: 0 };
     };
-    await runPipeline(ctx, "research");
+    await runPipeline(ctx, "plan");
     expect(claudeCallCount).toBe(1);
   });
@@ -137,7 +135,7 @@ describe("runPipeline", () => {
     expect(currentBranch).toBe("main");
   });
-  it("runs all steps in order when all succeed", async () => {
+  it("runs all 4 steps in order when review passes", async () => {
     const { runPipeline } = await import("./pipeline");
     let claudeCallCount = 0;
@@ -147,45 +145,143 @@ describe("runPipeline", () => {
       switch (claudeCallCount) {
         case 1:
-          writeFileSync(join(ctx.issueDir, ARTIFACTS.research), "x".repeat(250));
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
           break;
         case 2:
-          writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
           break;
         case 3:
-          writeFileSync(join(ctx.issueDir, ARTIFACTS.planImplementation), "# Impl Plan");
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
           break;
         case 4:
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nLooks good.");
+          break;
+      }
+      return { stdout: successClaudeJson(), exitCode: 0 };
+    };
+    await runPipeline(ctx);
+    expect(claudeCallCount).toBe(4);
+    // Verify auto-claude-success and auto-claude-review labels were set
+    const successLabelCall = ghCalls.find(
+      (args) => args.includes("--add-label") && args.includes("auto-claude-success"),
+    );
+    expect(successLabelCall).toBeDefined();
+    const reviewLabelCall = ghCalls.find(
+      (args) => args.includes("--add-label") && args.includes("auto-claude-review"),
+    );
+    expect(reviewLabelCall).toBeDefined();
+  });
+  it("retries implement→simplify→review on review fail then pass", async () => {
+    const { runPipeline } = await import("./pipeline");
+    let claudeCallCount = 0;
+    mockClaudeImpl = () => {
+      claudeCallCount++;
+      mkdirSync(ctx.issueDir, { recursive: true });
+      switch (claudeCallCount) {
+        case 1: // plan
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
+          break;
+        case 2: // implement (attempt 1)
           writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
           break;
-        case 5:
-          writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "# Review\nLooks good.");
+        case 3: // simplify (attempt 1)
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
+          break;
+        case 4: // review (attempt 1 - FAIL)
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "FAIL\n\nNeeds work.");
+          break;
+        case 5: // implement (attempt 2)
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done v2");
+          break;
+        case 6: // simplify (attempt 2)
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified v2");
+          break;
+        case 7: // review (attempt 2 - PASS)
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nGood now.");
           break;
       }
       return { stdout: successClaudeJson(), exitCode: 0 };
     };
-    let ghCallCount = 0;
-    mockGhImpl = async (args: string[]) => {
-      ghCallCount++;
-      if (args[0] === "pr" && args[1] === "list") {
-        return { stdout: "[]", exitCode: 0 };
+    await runPipeline(ctx);
+    // 1 plan + 3 steps * 2 attempts = 7
+    expect(claudeCallCount).toBe(7);
+  });
+  it("sets auto-claude-failed label after max retries exhausted", async () => {
+    const { runPipeline } = await import("./pipeline");
+    let claudeCallCount = 0;
+    mockClaudeImpl = () => {
+      claudeCallCount++;
+      mkdirSync(ctx.issueDir, { recursive: true });
+      // Plan
+      if (claudeCallCount === 1) {
+        writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
+        return { stdout: successClaudeJson(), exitCode: 0 };
       }
-      if (args[0] === "pr" && args[1] === "create") {
-        return { stdout: "https://github.com/test/repo/pull/1", exitCode: 0 };
+      // Each retry cycle: implement, simplify, review (always FAIL)
+      const stepInCycle = (claudeCallCount - 2) % 3;
+      switch (stepInCycle) {
+        case 0:
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
+          break;
+        case 1:
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
+          break;
+        case 2:
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "FAIL\n\nStill bad.");
+          break;
       }
-      return { stdout: "", exitCode: 0 };
+      return { stdout: successClaudeJson(), exitCode: 0 };
     };
     await runPipeline(ctx);
-    const prUrlPath = join(ctx.issueDir, ARTIFACTS.prUrl);
-    if (existsSync(prUrlPath)) {
-      const prUrl = readFileSync(prUrlPath, "utf-8");
-      expect(prUrl).toContain("github.com");
-    }
+    // 1 plan + 3 steps * 3 attempts (maxReviewRetries=2 → 3 total) = 10
+    expect(claudeCallCount).toBe(10);
+    // Verify auto-claude-failed label was set
+    const failedLabelCall = ghCalls.find(
+      (args) => args.includes("--add-label") && args.includes("auto-claude-failed"),
+    );
+    expect(failedLabelCall).toBeDefined();
+    // Verify issue comment was posted
+    const commentCall = ghCalls.find((args) => args[0] === "issue" && args[1] === "comment");
+    expect(commentCall).toBeDefined();
+  });
+  it("--until implement stops after implement step", async () => {
+    const { runPipeline } = await import("./pipeline");
+    let claudeCallCount = 0;
+    mockClaudeImpl = () => {
+      claudeCallCount++;
+      mkdirSync(ctx.issueDir, { recursive: true });
+      switch (claudeCallCount) {
+        case 1:
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
+          break;
+        case 2:
+          writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
+          break;
+      }
+      return { stdout: successClaudeJson(), exitCode: 0 };
+    };
+    await runPipeline(ctx, "implement");
-    expect(claudeCallCount).toBe(5);
-    expect(ghCallCount).toBeGreaterThanOrEqual(2);
+    expect(claudeCallCount).toBe(2);
   });
 });

package/src/lib/auto-claude/pipeline.test.ts CHANGED Viewed

@@ -8,7 +8,7 @@ describe("STEP_NAMES", () => {
     expect(STEP_NAMES).toEqual(PIPELINE_STEPS.map((s) => s.name));
   });
-  it("should have 8 steps", () => {
-    expect(STEP_NAMES).toHaveLength(8);
+  it("should have 4 steps", () => {
+    expect(STEP_NAMES).toHaveLength(4);
   });
 });

package/src/lib/auto-claude/pipeline.ts CHANGED Viewed

@@ -1,33 +1,23 @@
+import { rmSync } from "node:fs";
 import { join } from "node:path";
 import { getConfig } from "./config.js";
-import { ARTIFACTS, PIPELINE_STEPS } from "./prompt-templates/index.js";
+import { ARTIFACTS } from "./prompt-templates/index.js";
 import type { StepName } from "./prompt-templates/index.js";
-import { stepCreatePR } from "./steps/create-pr.js";
+import { createPr } from "./steps/create-pr.js";
 import { stepImplement } from "./steps/implement.js";
-import { stepPlanAnnotations } from "./steps/plan-annotations.js";
-import { stepPlanImplementation } from "./steps/plan-implementation.js";
-import { stepPlan } from "./steps/plan.js";
-import { stepRemoveLabel } from "./steps/remove-label.js";
-import { stepResearch } from "./steps/research.js";
-import { stepReview } from "./steps/review.js";
-import { ensureDir, fileExists, git, log, readFile, writeFile } from "./utils.js";
+import { stepPlan, stepReview, stepSimplify } from "./steps/simple-steps.js";
+import { LABELS, ensureLabelsExist, removeLabel, setLabel } from "./labels.js";
+import { ensureDir, fileExists, readFile, writeFile } from "../../utils/fs.js";
+import { execSafe, git } from "../../utils/git/exec.js";
+import { ghRaw } from "../../utils/git/gh-cli-wrapper.js";
+import { log } from "./utils.js";
 import type { IssueContext } from "./utils.js";
-const STEP_RUNNERS: Record<StepName, (ctx: IssueContext) => Promise<boolean>> = {
-  research: stepResearch,
-  plan: stepPlan,
-  "plan-annotations": stepPlanAnnotations,
-  "plan-implementation": stepPlanImplementation,
-  implement: stepImplement,
-  review: stepReview,
-  "create-pr": stepCreatePR,
-  "remove-label": stepRemoveLabel,
-};
 export { type StepName, STEP_NAMES } from "./prompt-templates/index.js";
 export async function runPipeline(ctx: IssueContext, untilStep?: StepName): Promise<void> {
+  const cfg = getConfig();
   log(`Pipeline starting for ${ctx.repo}#${ctx.number}: ${ctx.title}`);
   ensureDir(ctx.issueDir);
@@ -38,30 +28,124 @@ export async function runPipeline(ctx: IssueContext, untilStep?: StepName): Prom
     log("Saved initial-ramblings.md");
   }
-  for (const step of PIPELINE_STEPS) {
-    const runner = STEP_RUNNERS[step.name];
-    const success = await runner(ctx);
+  // Label management
+  await ensureLabelsExist(ctx.repo);
+  await removeLabel(ctx.repo, ctx.number, cfg.triggerLabel);
+  await setLabel(ctx.repo, ctx.number, LABELS.inProgress);
-    if (!success) {
-      log(`Pipeline stopped at "${step.name}" for ${ctx.repo}#${ctx.number}`);
-      await checkoutMain();
+  try {
+    // Step 1: Plan (runs once)
+    if (!(await stepPlan(ctx))) {
+      await handleFailure(ctx, "plan");
       return;
     }
-    if (untilStep && step.name === untilStep) {
-      log(`Pipeline paused after "${step.name}" (--until ${untilStep})`);
-      await checkoutMain();
+    if (untilStep === "plan") {
+      log(`Pipeline paused after "plan" (--until plan)`);
       return;
     }
+    // Steps 2-4: Implement → Simplify → Review loop
+    const maxRetries = cfg.maxReviewRetries;
+    for (let attempt = 0; attempt <= maxRetries; attempt++) {
+      // Clear previous iteration artifacts (except plan)
+      if (attempt > 0) {
+        clearArtifact(ctx, ARTIFACTS.completedSummary);
+        clearArtifact(ctx, ARTIFACTS.simplifySummary);
+        clearArtifact(ctx, ARTIFACTS.review);
+      }
+      // Implement
+      if (!(await stepImplement(ctx))) {
+        await handleFailure(ctx, "implement");
+        return;
+      }
+      if (untilStep === "implement") {
+        log(`Pipeline paused after "implement" (--until implement)`);
+        return;
+      }
+      // Simplify
+      if (!(await stepSimplify(ctx))) {
+        await handleFailure(ctx, "simplify");
+        return;
+      }
+      if (untilStep === "simplify") {
+        log(`Pipeline paused after "simplify" (--until simplify)`);
+        return;
+      }
+      // Review
+      if (!(await stepReview(ctx))) {
+        await handleFailure(ctx, "review");
+        return;
+      }
+      if (untilStep === "review") {
+        log(`Pipeline paused after "review" (--until review)`);
+        return;
+      }
+      // Check review result
+      if (isReviewPass(ctx)) {
+        const prUrl = await createPr(ctx);
+        await removeLabel(ctx.repo, ctx.number, LABELS.inProgress);
+        await setLabel(ctx.repo, ctx.number, LABELS.success);
+        await setLabel(ctx.repo, ctx.number, LABELS.review);
+        log(`Pipeline complete for ${ctx.repo}#${ctx.number} — ${prUrl}`);
+        return;
+      }
+      // Review failed
+      if (attempt < maxRetries) {
+        log(
+          `Review did not pass (attempt ${attempt + 1}/${maxRetries + 1}), retrying implement→simplify→review…`,
+        );
+      }
+    }
+    // All retries exhausted
+    await handleFailure(
+      ctx,
+      "review",
+      `auto-claude: review did not pass after ${maxRetries + 1} attempts. Labelled \`${LABELS.failed}\`.`,
+    );
+  } finally {
+    await checkoutMain();
   }
+}
+function clearArtifact(ctx: IssueContext, artifact: string): void {
+  rmSync(join(ctx.issueDir, artifact), { force: true });
+}
-  const prUrlPath = join(ctx.issueDir, ARTIFACTS.prUrl);
-  const prUrl = fileExists(prUrlPath) ? readFile(prUrlPath).trim() : "";
-  const prSuffix = prUrl ? ` — ${prUrl}` : "";
-  log(`Pipeline complete for ${ctx.repo}#${ctx.number}${prSuffix}`);
-  await checkoutMain();
+function isReviewPass(ctx: IssueContext): boolean {
+  const reviewPath = join(ctx.issueDir, ARTIFACTS.review);
+  if (!fileExists(reviewPath)) return false;
+  const content = readFile(reviewPath);
+  const firstLine = content.split("\n")[0].trim().toUpperCase();
+  return firstLine === "PASS";
+}
+async function handleFailure(ctx: IssueContext, stepName: string, comment?: string): Promise<void> {
+  await removeLabel(ctx.repo, ctx.number, LABELS.inProgress);
+  await setLabel(ctx.repo, ctx.number, LABELS.failed);
+  if (comment) {
+    await ghRaw(["issue", "comment", String(ctx.number), "--repo", ctx.repo, "--body", comment]);
+  }
+  log(`Pipeline stopped at "${stepName}" for ${ctx.repo}#${ctx.number}`);
 }
 async function checkoutMain(): Promise<void> {
-  await git(["checkout", getConfig().mainBranch]).catch(() => {});
+  await git(["checkout", getConfig().mainBranch]).catch(() => {
+    // Best-effort checkout — may fail if branch doesn't exist locally yet
+  });
+  const stashList = await execSafe("git", ["stash", "list"]);
+  if (stashList.ok) {
+    const lines = stashList.stdout.split("\n");
+    const idx = lines.findIndex((l) => l.includes("auto-claude: before switching to"));
+    if (idx >= 0) {
+      await execSafe("git", ["stash", "pop", `stash@{${idx}}`]);
+      log("Restored stashed changes");
+    }
+  }
 }

package/src/lib/auto-claude/prompt-templates/01_plan.prompt.md ADDED Viewed

@@ -0,0 +1,68 @@
+You are a planning agent. Your job is to research the issue, explore the codebase, and produce a detailed implementation plan.
+The issue is in @{{ISSUE_DIR}}/initial-ramblings.md.
+The code lives primarily at `{{SCOPE_PATH}}/`.
+The base branch is `{{MAIN_BRANCH}}`.
+## Phase 1: Research
+1. Read the issue description thoroughly — understand the problem, requirements, and constraints
+2. Explore the relevant areas of the codebase:
+   - Find files related to the feature/bug area
+   - Read existing implementations of similar patterns
+   - Identify dependencies, imports, and shared utilities
+   - Check for existing tests in the area
+3. Read the project's CLAUDE.md for coding conventions, test commands, and architecture guidance
+## Phase 2: Design
+1. Identify the approach — what needs to change and why
+2. Consider alternatives and trade-offs — pick the simplest path that meets requirements
+3. Identify risks, edge cases, and things that could go wrong
+4. Determine test strategy — what needs testing and how
+## Phase 3: Write the Plan
+Write the plan to @{{ISSUE_DIR}}/plan.md with this structure:
+```markdown
+# Plan: <concise title>
+## Summary
+1-3 sentence description of the change and why it's needed.
+## Approach
+High-level description of the solution strategy.
+## Files to Change
+- `path/to/file.ext` — what changes and why
+- `path/to/new-file.ext` — (new) purpose
+- `path/to/deleted.ext` — (delete) reason
+## Implementation Checklist
+- [ ] Task 1 — specific, actionable description
+- [ ] Task 2 — include file paths where relevant
+- [ ] Task 3 — tests: describe what to test
+- [ ] ...
+## Test Strategy
+How to verify the implementation is correct. Which behaviors need test coverage.
+## Risks / Edge Cases
+Anything the implementer should watch out for.
+```
+## Guidelines
+- The checklist is the implementer's single source of truth — make every task actionable and unambiguous
+- Include file paths in tasks so the implementer doesn't have to search
+- Order tasks logically — dependencies before dependents, types/interfaces before implementations
+- Include test tasks inline (not as a separate phase) — test each behavior near the task that creates it
+- Keep it focused — don't over-plan. If a task is straightforward, a single line is enough
+- Follow the project's coding conventions from CLAUDE.md

package/src/lib/auto-claude/prompt-templates/{05_implement.prompt.md → 02_implement.prompt.md} RENAMED Viewed

@@ -1,8 +1,9 @@
-You are an implementation agent. Follow the checklist in @{{ISSUE_DIR}}/plan-implementation.md task by task.
+You are an implementation agent. Follow the checklist in @{{ISSUE_DIR}}/plan.md task by task.
-The issue is in @{{ISSUE_DIR}}/initial-ramblings.md — background context only. Your ONLY source of truth is the checklist.
+The issue is in @{{ISSUE_DIR}}/initial-ramblings.md — background context only. Your ONLY source of truth is the checklist in plan.md.
 The code lives primarily at `{{SCOPE_PATH}}/`.
+{{REVIEW_FEEDBACK}}
 ## How to work