@towles/tool 0.0.62 → 0.0.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +50 -57
- package/src/commands/agentboard.ts +176 -0
- package/src/commands/{auto-claude.ts → auto-claude/index.ts} +18 -28
- package/src/commands/auto-claude/list.ts +114 -0
- package/src/commands/auto-claude/retry.test.ts +138 -0
- package/src/commands/auto-claude/retry.ts +139 -0
- package/src/commands/auto-claude/status.test.ts +147 -0
- package/src/commands/auto-claude/status.ts +123 -0
- package/src/commands/base.ts +7 -2
- package/src/commands/config.ts +5 -7
- package/src/commands/doctor.ts +111 -12
- package/src/commands/gh/branch.ts +4 -4
- package/src/commands/gh/pr.ts +1 -0
- package/src/commands/graph/index.ts +169 -0
- package/src/commands/graph.test.ts +1 -1
- package/src/commands/install.ts +40 -68
- package/src/commands/journal/daily-notes.ts +3 -3
- package/src/commands/journal/meeting.ts +3 -3
- package/src/commands/journal/note.ts +3 -3
- package/src/lib/auto-claude/claude-cli.ts +183 -0
- package/src/lib/auto-claude/config.test.ts +6 -8
- package/src/lib/auto-claude/config.ts +3 -4
- package/src/lib/auto-claude/index.ts +2 -3
- package/src/lib/auto-claude/labels.test.ts +85 -0
- package/src/lib/auto-claude/labels.ts +42 -0
- package/src/lib/auto-claude/pipeline-execution.test.ts +129 -33
- package/src/lib/auto-claude/pipeline.test.ts +2 -2
- package/src/lib/auto-claude/pipeline.ts +120 -36
- package/src/lib/auto-claude/prompt-templates/01_plan.prompt.md +68 -0
- package/src/lib/auto-claude/prompt-templates/{05_implement.prompt.md → 02_implement.prompt.md} +3 -2
- package/src/lib/auto-claude/prompt-templates/03_simplify.prompt.md +52 -0
- package/src/lib/auto-claude/prompt-templates/{06_review.prompt.md → 04_review.prompt.md} +29 -6
- package/src/lib/auto-claude/prompt-templates/index.test.ts +9 -42
- package/src/lib/auto-claude/prompt-templates/index.ts +13 -28
- package/src/lib/auto-claude/run-claude.test.ts +48 -68
- package/src/lib/auto-claude/shell.ts +6 -0
- package/src/lib/auto-claude/steps/create-pr.ts +89 -25
- package/src/lib/auto-claude/steps/fetch-issues.ts +4 -1
- package/src/lib/auto-claude/steps/implement.ts +9 -16
- package/src/lib/auto-claude/steps/simple-steps.ts +34 -0
- package/src/lib/auto-claude/steps/steps.test.ts +68 -63
- package/src/lib/auto-claude/templates.test.ts +91 -0
- package/src/lib/auto-claude/templates.ts +34 -0
- package/src/lib/auto-claude/test-helpers.ts +2 -1
- package/src/lib/auto-claude/utils-execution.test.ts +9 -57
- package/src/lib/auto-claude/utils.test.ts +5 -9
- package/src/lib/auto-claude/utils.ts +27 -253
- package/src/lib/graph/analyzer.test.ts +451 -0
- package/src/lib/graph/analyzer.ts +165 -0
- package/src/lib/graph/index.ts +24 -0
- package/src/lib/graph/labels.ts +87 -0
- package/src/lib/graph/parser.test.ts +150 -0
- package/src/lib/graph/parser.ts +65 -0
- package/src/lib/graph/render.ts +25 -0
- package/src/lib/graph/server.ts +70 -0
- package/src/lib/graph/sessions.ts +104 -0
- package/src/lib/graph/tools.ts +90 -0
- package/src/lib/graph/treemap.ts +211 -0
- package/src/lib/graph/types.ts +80 -0
- package/src/lib/install/claude-settings.ts +64 -0
- package/src/lib/journal/editor.ts +33 -0
- package/src/lib/journal/fs.ts +13 -0
- package/src/lib/journal/index.ts +11 -0
- package/src/lib/journal/paths.ts +106 -0
- package/src/lib/journal/{utils.ts → templates.ts} +3 -151
- package/src/utils/fs.ts +19 -0
- package/src/utils/git/exec.ts +18 -0
- package/src/utils/git/gh-cli-wrapper.test.ts +47 -8
- package/src/utils/git/gh-cli-wrapper.ts +31 -19
- package/src/utils/render.ts +3 -1
- package/src/commands/graph.ts +0 -970
- package/src/lib/auto-claude/prompt-templates/01_research.prompt.md +0 -21
- package/src/lib/auto-claude/prompt-templates/02_plan.prompt.md +0 -27
- package/src/lib/auto-claude/prompt-templates/03_plan-annotations.prompt.md +0 -15
- package/src/lib/auto-claude/prompt-templates/04_plan-implementation.prompt.md +0 -35
- package/src/lib/auto-claude/prompt-templates/07_refresh.prompt.md +0 -30
- package/src/lib/auto-claude/steps/plan-annotations.ts +0 -54
- package/src/lib/auto-claude/steps/plan-implementation.ts +0 -14
- package/src/lib/auto-claude/steps/plan.ts +0 -14
- package/src/lib/auto-claude/steps/refresh.ts +0 -114
- package/src/lib/auto-claude/steps/remove-label.ts +0 -22
- package/src/lib/auto-claude/steps/research.ts +0 -21
- package/src/lib/auto-claude/steps/review.ts +0 -14
|
@@ -10,11 +10,9 @@ export const AutoClaudeConfigSchema = z.object({
|
|
|
10
10
|
remote: z.string().default("origin"),
|
|
11
11
|
maxImplementIterations: z.number().default(5),
|
|
12
12
|
maxTurns: z.number().optional(),
|
|
13
|
+
model: z.string().default("opus"),
|
|
14
|
+
maxReviewRetries: z.number().default(2),
|
|
13
15
|
loopIntervalMinutes: z.number().default(30),
|
|
14
|
-
loopRetryEnabled: z.boolean().default(false),
|
|
15
|
-
maxRetries: z.number().default(5),
|
|
16
|
-
retryDelayMs: z.number().default(30_000),
|
|
17
|
-
maxRetryDelayMs: z.number().default(300_000),
|
|
18
16
|
});
|
|
19
17
|
|
|
20
18
|
export type AutoClaudeConfig = z.infer<typeof AutoClaudeConfigSchema>;
|
|
@@ -49,6 +47,7 @@ export async function initConfig(
|
|
|
49
47
|
});
|
|
50
48
|
mainBranch = result.stdout.trim().replace("refs/remotes/origin/", "");
|
|
51
49
|
} catch {
|
|
50
|
+
consola.debug("Could not detect default branch from origin/HEAD, defaulting to 'main'");
|
|
52
51
|
mainBranch = "main";
|
|
53
52
|
}
|
|
54
53
|
}
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
export { type AutoClaudeConfig, AutoClaudeConfigSchema, getConfig, initConfig } from "./config.js";
|
|
2
2
|
export { STEP_NAMES, runPipeline } from "./pipeline.js";
|
|
3
3
|
export type { StepName } from "./prompt-templates/index.js";
|
|
4
|
+
export { git } from "../../utils/git/exec.js";
|
|
5
|
+
export { sleep } from "./shell.js";
|
|
4
6
|
export { fetchIssue, fetchIssues } from "./steps/fetch-issues.js";
|
|
5
|
-
export { stepRefresh } from "./steps/refresh.js";
|
|
6
7
|
export {
|
|
7
8
|
type IssueContext,
|
|
8
9
|
buildContextFromArtifacts,
|
|
9
10
|
buildIssueContext,
|
|
10
11
|
ensureBranch,
|
|
11
|
-
git,
|
|
12
12
|
log,
|
|
13
13
|
logBanner,
|
|
14
|
-
sleep,
|
|
15
14
|
} from "./utils.js";
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { describe, expect, it, vi, beforeEach } from "vitest";
|
|
2
|
+
|
|
3
|
+
import { execSafe } from "../../utils/git/exec.js";
|
|
4
|
+
import { ensureLabelsExist, LABELS, removeLabel, setLabel } from "./labels";
|
|
5
|
+
|
|
6
|
+
vi.mock("../../utils/git/exec.js", () => ({
|
|
7
|
+
execSafe: vi.fn().mockResolvedValue({ stdout: "", ok: true }),
|
|
8
|
+
}));
|
|
9
|
+
|
|
10
|
+
const mockedExecSafe = vi.mocked(execSafe);
|
|
11
|
+
|
|
12
|
+
describe("LABELS", () => {
|
|
13
|
+
it("has expected label values", () => {
|
|
14
|
+
expect(LABELS.inProgress).toBe("auto-claude-in-progress");
|
|
15
|
+
expect(LABELS.review).toBe("auto-claude-review");
|
|
16
|
+
expect(LABELS.failed).toBe("auto-claude-failed");
|
|
17
|
+
expect(LABELS.success).toBe("auto-claude-success");
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("has exactly 4 labels", () => {
|
|
21
|
+
expect(Object.keys(LABELS)).toHaveLength(4);
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
describe("ensureLabelsExist", () => {
|
|
26
|
+
beforeEach(() => {
|
|
27
|
+
vi.clearAllMocks();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("creates all labels with --force", async () => {
|
|
31
|
+
await ensureLabelsExist("owner/repo");
|
|
32
|
+
|
|
33
|
+
expect(mockedExecSafe).toHaveBeenCalledTimes(4);
|
|
34
|
+
for (const label of Object.values(LABELS)) {
|
|
35
|
+
expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
|
|
36
|
+
"label",
|
|
37
|
+
"create",
|
|
38
|
+
label,
|
|
39
|
+
"--repo",
|
|
40
|
+
"owner/repo",
|
|
41
|
+
"--force",
|
|
42
|
+
]);
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe("setLabel", () => {
|
|
48
|
+
beforeEach(() => {
|
|
49
|
+
vi.clearAllMocks();
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("calls gh issue edit with --add-label", async () => {
|
|
53
|
+
await setLabel("owner/repo", 42, "auto-claude-in-progress");
|
|
54
|
+
|
|
55
|
+
expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
|
|
56
|
+
"issue",
|
|
57
|
+
"edit",
|
|
58
|
+
"42",
|
|
59
|
+
"--repo",
|
|
60
|
+
"owner/repo",
|
|
61
|
+
"--add-label",
|
|
62
|
+
"auto-claude-in-progress",
|
|
63
|
+
]);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe("removeLabel", () => {
|
|
68
|
+
beforeEach(() => {
|
|
69
|
+
vi.clearAllMocks();
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("calls gh issue edit with --remove-label", async () => {
|
|
73
|
+
await removeLabel("owner/repo", 42, "auto-claude-failed");
|
|
74
|
+
|
|
75
|
+
expect(mockedExecSafe).toHaveBeenCalledWith("gh", [
|
|
76
|
+
"issue",
|
|
77
|
+
"edit",
|
|
78
|
+
"42",
|
|
79
|
+
"--repo",
|
|
80
|
+
"owner/repo",
|
|
81
|
+
"--remove-label",
|
|
82
|
+
"auto-claude-failed",
|
|
83
|
+
]);
|
|
84
|
+
});
|
|
85
|
+
});
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { execSafe } from "../../utils/git/exec.js";
|
|
2
|
+
|
|
3
|
+
// ── Label helpers ──
|
|
4
|
+
|
|
5
|
+
export const LABELS = {
|
|
6
|
+
inProgress: "auto-claude-in-progress",
|
|
7
|
+
review: "auto-claude-review",
|
|
8
|
+
failed: "auto-claude-failed",
|
|
9
|
+
success: "auto-claude-success",
|
|
10
|
+
} as const;
|
|
11
|
+
|
|
12
|
+
export async function ensureLabelsExist(repo: string): Promise<void> {
|
|
13
|
+
await Promise.all(
|
|
14
|
+
Object.values(LABELS).map((label) =>
|
|
15
|
+
execSafe("gh", ["label", "create", label, "--repo", repo, "--force"]),
|
|
16
|
+
),
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export async function setLabel(repo: string, issueNumber: number, label: string): Promise<void> {
|
|
21
|
+
await execSafe("gh", [
|
|
22
|
+
"issue",
|
|
23
|
+
"edit",
|
|
24
|
+
String(issueNumber),
|
|
25
|
+
"--repo",
|
|
26
|
+
repo,
|
|
27
|
+
"--add-label",
|
|
28
|
+
label,
|
|
29
|
+
]);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function removeLabel(repo: string, issueNumber: number, label: string): Promise<void> {
|
|
33
|
+
await execSafe("gh", [
|
|
34
|
+
"issue",
|
|
35
|
+
"edit",
|
|
36
|
+
String(issueNumber),
|
|
37
|
+
"--repo",
|
|
38
|
+
repo,
|
|
39
|
+
"--remove-label",
|
|
40
|
+
label,
|
|
41
|
+
]);
|
|
42
|
+
}
|
|
@@ -22,9 +22,8 @@ consola.level = -999;
|
|
|
22
22
|
let mockClaudeImpl: MockClaudeImpl = null;
|
|
23
23
|
vi.mock("./spawn-claude", () => createSpawnClaudeMock(() => mockClaudeImpl));
|
|
24
24
|
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
let mockGhImpl: ((args: string[]) => Promise<{ stdout: string; exitCode: number }>) | null = null;
|
|
25
|
+
// Track gh calls for label assertions
|
|
26
|
+
let ghCalls: string[][] = [];
|
|
28
27
|
|
|
29
28
|
vi.mock("tinyexec", async (importOriginal) => {
|
|
30
29
|
const original = await importOriginal<typeof import("tinyexec")>();
|
|
@@ -36,11 +35,10 @@ vi.mock("tinyexec", async (importOriginal) => {
|
|
|
36
35
|
args: string[],
|
|
37
36
|
opts?: Record<string, unknown>,
|
|
38
37
|
): Promise<{ stdout: string; exitCode: number }> => {
|
|
39
|
-
if (cmd === "gh" && mockGhImpl) {
|
|
40
|
-
return mockGhImpl(args);
|
|
41
|
-
}
|
|
42
38
|
if (cmd === "gh") {
|
|
43
|
-
|
|
39
|
+
ghCalls.push(args);
|
|
40
|
+
// Return empty success for label/issue/pr commands
|
|
41
|
+
return { stdout: "[]", exitCode: 0 };
|
|
44
42
|
}
|
|
45
43
|
return original.x(cmd, args, opts as never) as unknown as Promise<{
|
|
46
44
|
stdout: string;
|
|
@@ -63,11 +61,11 @@ describe("runPipeline", () => {
|
|
|
63
61
|
await initConfig({
|
|
64
62
|
repo: "test/repo",
|
|
65
63
|
mainBranch: "main",
|
|
66
|
-
|
|
64
|
+
maxReviewRetries: 2,
|
|
67
65
|
});
|
|
68
66
|
ctx = buildTestContext(repo.dir);
|
|
69
67
|
mockClaudeImpl = null;
|
|
70
|
-
|
|
68
|
+
ghCalls = [];
|
|
71
69
|
});
|
|
72
70
|
|
|
73
71
|
afterEach(() => {
|
|
@@ -109,16 +107,16 @@ describe("runPipeline", () => {
|
|
|
109
107
|
const { runPipeline } = await import("./pipeline");
|
|
110
108
|
|
|
111
109
|
let claudeCallCount = 0;
|
|
112
|
-
const
|
|
110
|
+
const planPath = join(ctx.issueDir, ARTIFACTS.plan);
|
|
113
111
|
|
|
114
112
|
mockClaudeImpl = () => {
|
|
115
113
|
claudeCallCount++;
|
|
116
114
|
mkdirSync(ctx.issueDir, { recursive: true });
|
|
117
|
-
writeFileSync(
|
|
115
|
+
writeFileSync(planPath, "# Plan\n\nDetailed plan.");
|
|
118
116
|
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
119
117
|
};
|
|
120
118
|
|
|
121
|
-
await runPipeline(ctx, "
|
|
119
|
+
await runPipeline(ctx, "plan");
|
|
122
120
|
|
|
123
121
|
expect(claudeCallCount).toBe(1);
|
|
124
122
|
});
|
|
@@ -137,7 +135,7 @@ describe("runPipeline", () => {
|
|
|
137
135
|
expect(currentBranch).toBe("main");
|
|
138
136
|
});
|
|
139
137
|
|
|
140
|
-
it("runs all steps in order when
|
|
138
|
+
it("runs all 4 steps in order when review passes", async () => {
|
|
141
139
|
const { runPipeline } = await import("./pipeline");
|
|
142
140
|
|
|
143
141
|
let claudeCallCount = 0;
|
|
@@ -147,45 +145,143 @@ describe("runPipeline", () => {
|
|
|
147
145
|
|
|
148
146
|
switch (claudeCallCount) {
|
|
149
147
|
case 1:
|
|
150
|
-
writeFileSync(join(ctx.issueDir, ARTIFACTS.
|
|
148
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
|
|
151
149
|
break;
|
|
152
150
|
case 2:
|
|
153
|
-
writeFileSync(join(ctx.issueDir, ARTIFACTS.
|
|
151
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
|
|
154
152
|
break;
|
|
155
153
|
case 3:
|
|
156
|
-
writeFileSync(join(ctx.issueDir, ARTIFACTS.
|
|
154
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
|
|
157
155
|
break;
|
|
158
156
|
case 4:
|
|
157
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nLooks good.");
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
await runPipeline(ctx);
|
|
164
|
+
|
|
165
|
+
expect(claudeCallCount).toBe(4);
|
|
166
|
+
|
|
167
|
+
// Verify auto-claude-success and auto-claude-review labels were set
|
|
168
|
+
const successLabelCall = ghCalls.find(
|
|
169
|
+
(args) => args.includes("--add-label") && args.includes("auto-claude-success"),
|
|
170
|
+
);
|
|
171
|
+
expect(successLabelCall).toBeDefined();
|
|
172
|
+
const reviewLabelCall = ghCalls.find(
|
|
173
|
+
(args) => args.includes("--add-label") && args.includes("auto-claude-review"),
|
|
174
|
+
);
|
|
175
|
+
expect(reviewLabelCall).toBeDefined();
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it("retries implement→simplify→review on review fail then pass", async () => {
|
|
179
|
+
const { runPipeline } = await import("./pipeline");
|
|
180
|
+
|
|
181
|
+
let claudeCallCount = 0;
|
|
182
|
+
mockClaudeImpl = () => {
|
|
183
|
+
claudeCallCount++;
|
|
184
|
+
mkdirSync(ctx.issueDir, { recursive: true });
|
|
185
|
+
|
|
186
|
+
switch (claudeCallCount) {
|
|
187
|
+
case 1: // plan
|
|
188
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
|
|
189
|
+
break;
|
|
190
|
+
case 2: // implement (attempt 1)
|
|
159
191
|
writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
|
|
160
192
|
break;
|
|
161
|
-
case
|
|
162
|
-
writeFileSync(join(ctx.issueDir, ARTIFACTS.
|
|
193
|
+
case 3: // simplify (attempt 1)
|
|
194
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
|
|
195
|
+
break;
|
|
196
|
+
case 4: // review (attempt 1 - FAIL)
|
|
197
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "FAIL\n\nNeeds work.");
|
|
198
|
+
break;
|
|
199
|
+
case 5: // implement (attempt 2)
|
|
200
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done v2");
|
|
201
|
+
break;
|
|
202
|
+
case 6: // simplify (attempt 2)
|
|
203
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified v2");
|
|
204
|
+
break;
|
|
205
|
+
case 7: // review (attempt 2 - PASS)
|
|
206
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nGood now.");
|
|
163
207
|
break;
|
|
164
208
|
}
|
|
165
209
|
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
166
210
|
};
|
|
167
211
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
212
|
+
await runPipeline(ctx);
|
|
213
|
+
|
|
214
|
+
// 1 plan + 3 steps * 2 attempts = 7
|
|
215
|
+
expect(claudeCallCount).toBe(7);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it("sets auto-claude-failed label after max retries exhausted", async () => {
|
|
219
|
+
const { runPipeline } = await import("./pipeline");
|
|
220
|
+
|
|
221
|
+
let claudeCallCount = 0;
|
|
222
|
+
mockClaudeImpl = () => {
|
|
223
|
+
claudeCallCount++;
|
|
224
|
+
mkdirSync(ctx.issueDir, { recursive: true });
|
|
225
|
+
|
|
226
|
+
// Plan
|
|
227
|
+
if (claudeCallCount === 1) {
|
|
228
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
|
|
229
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
173
230
|
}
|
|
174
|
-
|
|
175
|
-
|
|
231
|
+
|
|
232
|
+
// Each retry cycle: implement, simplify, review (always FAIL)
|
|
233
|
+
const stepInCycle = (claudeCallCount - 2) % 3;
|
|
234
|
+
switch (stepInCycle) {
|
|
235
|
+
case 0:
|
|
236
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
|
|
237
|
+
break;
|
|
238
|
+
case 1:
|
|
239
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
|
|
240
|
+
break;
|
|
241
|
+
case 2:
|
|
242
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "FAIL\n\nStill bad.");
|
|
243
|
+
break;
|
|
176
244
|
}
|
|
177
|
-
return { stdout:
|
|
245
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
178
246
|
};
|
|
179
247
|
|
|
180
248
|
await runPipeline(ctx);
|
|
181
249
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
250
|
+
// 1 plan + 3 steps * 3 attempts (maxReviewRetries=2 → 3 total) = 10
|
|
251
|
+
expect(claudeCallCount).toBe(10);
|
|
252
|
+
|
|
253
|
+
// Verify auto-claude-failed label was set
|
|
254
|
+
const failedLabelCall = ghCalls.find(
|
|
255
|
+
(args) => args.includes("--add-label") && args.includes("auto-claude-failed"),
|
|
256
|
+
);
|
|
257
|
+
expect(failedLabelCall).toBeDefined();
|
|
258
|
+
|
|
259
|
+
// Verify issue comment was posted
|
|
260
|
+
const commentCall = ghCalls.find((args) => args[0] === "issue" && args[1] === "comment");
|
|
261
|
+
expect(commentCall).toBeDefined();
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
it("--until implement stops after implement step", async () => {
|
|
265
|
+
const { runPipeline } = await import("./pipeline");
|
|
266
|
+
|
|
267
|
+
let claudeCallCount = 0;
|
|
268
|
+
mockClaudeImpl = () => {
|
|
269
|
+
claudeCallCount++;
|
|
270
|
+
mkdirSync(ctx.issueDir, { recursive: true });
|
|
271
|
+
|
|
272
|
+
switch (claudeCallCount) {
|
|
273
|
+
case 1:
|
|
274
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
|
|
275
|
+
break;
|
|
276
|
+
case 2:
|
|
277
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
await runPipeline(ctx, "implement");
|
|
187
284
|
|
|
188
|
-
expect(claudeCallCount).toBe(
|
|
189
|
-
expect(ghCallCount).toBeGreaterThanOrEqual(2);
|
|
285
|
+
expect(claudeCallCount).toBe(2);
|
|
190
286
|
});
|
|
191
287
|
});
|
|
@@ -8,7 +8,7 @@ describe("STEP_NAMES", () => {
|
|
|
8
8
|
expect(STEP_NAMES).toEqual(PIPELINE_STEPS.map((s) => s.name));
|
|
9
9
|
});
|
|
10
10
|
|
|
11
|
-
it("should have
|
|
12
|
-
expect(STEP_NAMES).toHaveLength(
|
|
11
|
+
it("should have 4 steps", () => {
|
|
12
|
+
expect(STEP_NAMES).toHaveLength(4);
|
|
13
13
|
});
|
|
14
14
|
});
|
|
@@ -1,33 +1,23 @@
|
|
|
1
|
+
import { rmSync } from "node:fs";
|
|
1
2
|
import { join } from "node:path";
|
|
2
3
|
|
|
3
4
|
import { getConfig } from "./config.js";
|
|
4
|
-
import { ARTIFACTS
|
|
5
|
+
import { ARTIFACTS } from "./prompt-templates/index.js";
|
|
5
6
|
import type { StepName } from "./prompt-templates/index.js";
|
|
6
|
-
import {
|
|
7
|
+
import { createPr } from "./steps/create-pr.js";
|
|
7
8
|
import { stepImplement } from "./steps/implement.js";
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import { ensureDir, fileExists, git, log, readFile, writeFile } from "./utils.js";
|
|
9
|
+
import { stepPlan, stepReview, stepSimplify } from "./steps/simple-steps.js";
|
|
10
|
+
import { LABELS, ensureLabelsExist, removeLabel, setLabel } from "./labels.js";
|
|
11
|
+
import { ensureDir, fileExists, readFile, writeFile } from "../../utils/fs.js";
|
|
12
|
+
import { execSafe, git } from "../../utils/git/exec.js";
|
|
13
|
+
import { ghRaw } from "../../utils/git/gh-cli-wrapper.js";
|
|
14
|
+
import { log } from "./utils.js";
|
|
15
15
|
import type { IssueContext } from "./utils.js";
|
|
16
16
|
|
|
17
|
-
const STEP_RUNNERS: Record<StepName, (ctx: IssueContext) => Promise<boolean>> = {
|
|
18
|
-
research: stepResearch,
|
|
19
|
-
plan: stepPlan,
|
|
20
|
-
"plan-annotations": stepPlanAnnotations,
|
|
21
|
-
"plan-implementation": stepPlanImplementation,
|
|
22
|
-
implement: stepImplement,
|
|
23
|
-
review: stepReview,
|
|
24
|
-
"create-pr": stepCreatePR,
|
|
25
|
-
"remove-label": stepRemoveLabel,
|
|
26
|
-
};
|
|
27
|
-
|
|
28
17
|
export { type StepName, STEP_NAMES } from "./prompt-templates/index.js";
|
|
29
18
|
|
|
30
19
|
export async function runPipeline(ctx: IssueContext, untilStep?: StepName): Promise<void> {
|
|
20
|
+
const cfg = getConfig();
|
|
31
21
|
log(`Pipeline starting for ${ctx.repo}#${ctx.number}: ${ctx.title}`);
|
|
32
22
|
|
|
33
23
|
ensureDir(ctx.issueDir);
|
|
@@ -38,30 +28,124 @@ export async function runPipeline(ctx: IssueContext, untilStep?: StepName): Prom
|
|
|
38
28
|
log("Saved initial-ramblings.md");
|
|
39
29
|
}
|
|
40
30
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
31
|
+
// Label management
|
|
32
|
+
await ensureLabelsExist(ctx.repo);
|
|
33
|
+
await removeLabel(ctx.repo, ctx.number, cfg.triggerLabel);
|
|
34
|
+
await setLabel(ctx.repo, ctx.number, LABELS.inProgress);
|
|
44
35
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
36
|
+
try {
|
|
37
|
+
// Step 1: Plan (runs once)
|
|
38
|
+
if (!(await stepPlan(ctx))) {
|
|
39
|
+
await handleFailure(ctx, "plan");
|
|
48
40
|
return;
|
|
49
41
|
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
log(`Pipeline paused after "${step.name}" (--until ${untilStep})`);
|
|
53
|
-
await checkoutMain();
|
|
42
|
+
if (untilStep === "plan") {
|
|
43
|
+
log(`Pipeline paused after "plan" (--until plan)`);
|
|
54
44
|
return;
|
|
55
45
|
}
|
|
46
|
+
|
|
47
|
+
// Steps 2-4: Implement → Simplify → Review loop
|
|
48
|
+
const maxRetries = cfg.maxReviewRetries;
|
|
49
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
50
|
+
// Clear previous iteration artifacts (except plan)
|
|
51
|
+
if (attempt > 0) {
|
|
52
|
+
clearArtifact(ctx, ARTIFACTS.completedSummary);
|
|
53
|
+
clearArtifact(ctx, ARTIFACTS.simplifySummary);
|
|
54
|
+
clearArtifact(ctx, ARTIFACTS.review);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Implement
|
|
58
|
+
if (!(await stepImplement(ctx))) {
|
|
59
|
+
await handleFailure(ctx, "implement");
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
if (untilStep === "implement") {
|
|
63
|
+
log(`Pipeline paused after "implement" (--until implement)`);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Simplify
|
|
68
|
+
if (!(await stepSimplify(ctx))) {
|
|
69
|
+
await handleFailure(ctx, "simplify");
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
if (untilStep === "simplify") {
|
|
73
|
+
log(`Pipeline paused after "simplify" (--until simplify)`);
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Review
|
|
78
|
+
if (!(await stepReview(ctx))) {
|
|
79
|
+
await handleFailure(ctx, "review");
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (untilStep === "review") {
|
|
83
|
+
log(`Pipeline paused after "review" (--until review)`);
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Check review result
|
|
88
|
+
if (isReviewPass(ctx)) {
|
|
89
|
+
const prUrl = await createPr(ctx);
|
|
90
|
+
await removeLabel(ctx.repo, ctx.number, LABELS.inProgress);
|
|
91
|
+
await setLabel(ctx.repo, ctx.number, LABELS.success);
|
|
92
|
+
await setLabel(ctx.repo, ctx.number, LABELS.review);
|
|
93
|
+
log(`Pipeline complete for ${ctx.repo}#${ctx.number} — ${prUrl}`);
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Review failed
|
|
98
|
+
if (attempt < maxRetries) {
|
|
99
|
+
log(
|
|
100
|
+
`Review did not pass (attempt ${attempt + 1}/${maxRetries + 1}), retrying implement→simplify→review…`,
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// All retries exhausted
|
|
106
|
+
await handleFailure(
|
|
107
|
+
ctx,
|
|
108
|
+
"review",
|
|
109
|
+
`auto-claude: review did not pass after ${maxRetries + 1} attempts. Labelled \`${LABELS.failed}\`.`,
|
|
110
|
+
);
|
|
111
|
+
} finally {
|
|
112
|
+
await checkoutMain();
|
|
56
113
|
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function clearArtifact(ctx: IssueContext, artifact: string): void {
|
|
117
|
+
rmSync(join(ctx.issueDir, artifact), { force: true });
|
|
118
|
+
}
|
|
57
119
|
|
|
58
|
-
|
|
59
|
-
const
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
120
|
+
function isReviewPass(ctx: IssueContext): boolean {
|
|
121
|
+
const reviewPath = join(ctx.issueDir, ARTIFACTS.review);
|
|
122
|
+
if (!fileExists(reviewPath)) return false;
|
|
123
|
+
const content = readFile(reviewPath);
|
|
124
|
+
const firstLine = content.split("\n")[0].trim().toUpperCase();
|
|
125
|
+
return firstLine === "PASS";
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
async function handleFailure(ctx: IssueContext, stepName: string, comment?: string): Promise<void> {
|
|
129
|
+
await removeLabel(ctx.repo, ctx.number, LABELS.inProgress);
|
|
130
|
+
await setLabel(ctx.repo, ctx.number, LABELS.failed);
|
|
131
|
+
if (comment) {
|
|
132
|
+
await ghRaw(["issue", "comment", String(ctx.number), "--repo", ctx.repo, "--body", comment]);
|
|
133
|
+
}
|
|
134
|
+
log(`Pipeline stopped at "${stepName}" for ${ctx.repo}#${ctx.number}`);
|
|
63
135
|
}
|
|
64
136
|
|
|
65
137
|
async function checkoutMain(): Promise<void> {
|
|
66
|
-
await git(["checkout", getConfig().mainBranch]).catch(() => {
|
|
138
|
+
await git(["checkout", getConfig().mainBranch]).catch(() => {
|
|
139
|
+
// Best-effort checkout — may fail if branch doesn't exist locally yet
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
const stashList = await execSafe("git", ["stash", "list"]);
|
|
143
|
+
if (stashList.ok) {
|
|
144
|
+
const lines = stashList.stdout.split("\n");
|
|
145
|
+
const idx = lines.findIndex((l) => l.includes("auto-claude: before switching to"));
|
|
146
|
+
if (idx >= 0) {
|
|
147
|
+
await execSafe("git", ["stash", "pop", `stash@{${idx}}`]);
|
|
148
|
+
log("Restored stashed changes");
|
|
149
|
+
}
|
|
150
|
+
}
|
|
67
151
|
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
You are a planning agent. Your job is to research the issue, explore the codebase, and produce a detailed implementation plan.
|
|
2
|
+
|
|
3
|
+
The issue is in @{{ISSUE_DIR}}/initial-ramblings.md.
|
|
4
|
+
The code lives primarily at `{{SCOPE_PATH}}/`.
|
|
5
|
+
The base branch is `{{MAIN_BRANCH}}`.
|
|
6
|
+
|
|
7
|
+
## Phase 1: Research
|
|
8
|
+
|
|
9
|
+
1. Read the issue description thoroughly — understand the problem, requirements, and constraints
|
|
10
|
+
2. Explore the relevant areas of the codebase:
|
|
11
|
+
- Find files related to the feature/bug area
|
|
12
|
+
- Read existing implementations of similar patterns
|
|
13
|
+
- Identify dependencies, imports, and shared utilities
|
|
14
|
+
- Check for existing tests in the area
|
|
15
|
+
3. Read the project's CLAUDE.md for coding conventions, test commands, and architecture guidance
|
|
16
|
+
|
|
17
|
+
## Phase 2: Design
|
|
18
|
+
|
|
19
|
+
1. Identify the approach — what needs to change and why
|
|
20
|
+
2. Consider alternatives and trade-offs — pick the simplest path that meets requirements
|
|
21
|
+
3. Identify risks, edge cases, and things that could go wrong
|
|
22
|
+
4. Determine test strategy — what needs testing and how
|
|
23
|
+
|
|
24
|
+
## Phase 3: Write the Plan
|
|
25
|
+
|
|
26
|
+
Write the plan to @{{ISSUE_DIR}}/plan.md with this structure:
|
|
27
|
+
|
|
28
|
+
```markdown
|
|
29
|
+
# Plan: <concise title>
|
|
30
|
+
|
|
31
|
+
## Summary
|
|
32
|
+
|
|
33
|
+
1-3 sentence description of the change and why it's needed.
|
|
34
|
+
|
|
35
|
+
## Approach
|
|
36
|
+
|
|
37
|
+
High-level description of the solution strategy.
|
|
38
|
+
|
|
39
|
+
## Files to Change
|
|
40
|
+
|
|
41
|
+
- `path/to/file.ext` — what changes and why
|
|
42
|
+
- `path/to/new-file.ext` — (new) purpose
|
|
43
|
+
- `path/to/deleted.ext` — (delete) reason
|
|
44
|
+
|
|
45
|
+
## Implementation Checklist
|
|
46
|
+
|
|
47
|
+
- [ ] Task 1 — specific, actionable description
|
|
48
|
+
- [ ] Task 2 — include file paths where relevant
|
|
49
|
+
- [ ] Task 3 — tests: describe what to test
|
|
50
|
+
- [ ] ...
|
|
51
|
+
|
|
52
|
+
## Test Strategy
|
|
53
|
+
|
|
54
|
+
How to verify the implementation is correct. Which behaviors need test coverage.
|
|
55
|
+
|
|
56
|
+
## Risks / Edge Cases
|
|
57
|
+
|
|
58
|
+
Anything the implementer should watch out for.
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Guidelines
|
|
62
|
+
|
|
63
|
+
- The checklist is the implementer's single source of truth — make every task actionable and unambiguous
|
|
64
|
+
- Include file paths in tasks so the implementer doesn't have to search
|
|
65
|
+
- Order tasks logically — dependencies before dependents, types/interfaces before implementations
|
|
66
|
+
- Include test tasks inline (not as a separate phase) — test each behavior near the task that creates it
|
|
67
|
+
- Keep it focused — don't over-plan. If a task is straightforward, a single line is enough
|
|
68
|
+
- Follow the project's coding conventions from CLAUDE.md
|
package/src/lib/auto-claude/prompt-templates/{05_implement.prompt.md → 02_implement.prompt.md}
RENAMED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
You are an implementation agent. Follow the checklist in @{{ISSUE_DIR}}/plan
|
|
1
|
+
You are an implementation agent. Follow the checklist in @{{ISSUE_DIR}}/plan.md task by task.
|
|
2
2
|
|
|
3
|
-
The issue is in @{{ISSUE_DIR}}/initial-ramblings.md — background context only. Your ONLY source of truth is the checklist.
|
|
3
|
+
The issue is in @{{ISSUE_DIR}}/initial-ramblings.md — background context only. Your ONLY source of truth is the checklist in plan.md.
|
|
4
4
|
|
|
5
5
|
The code lives primarily at `{{SCOPE_PATH}}/`.
|
|
6
|
+
{{REVIEW_FEEDBACK}}
|
|
6
7
|
|
|
7
8
|
## How to work
|
|
8
9
|
|