@towles/tool 0.0.59 → 0.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/gh/branch.test.ts +107 -108
- package/src/commands/gh/branch.ts +39 -36
- package/src/lib/auto-claude/pipeline-execution.test.ts +195 -0
- package/src/lib/auto-claude/prompt-templates/01_research.prompt.md +21 -0
- package/src/lib/auto-claude/prompt-templates/02_plan.prompt.md +27 -0
- package/src/lib/auto-claude/prompt-templates/03_plan-annotations.prompt.md +15 -0
- package/src/lib/auto-claude/prompt-templates/04_plan-implementation.prompt.md +35 -0
- package/src/lib/auto-claude/prompt-templates/05_implement.prompt.md +36 -0
- package/src/lib/auto-claude/prompt-templates/06_review.prompt.md +32 -0
- package/src/lib/auto-claude/prompt-templates/07_refresh.prompt.md +30 -0
- package/src/lib/auto-claude/prompt-templates/CLAUDE.md +12 -0
- package/src/lib/auto-claude/prompt-templates/index.test.ts +2 -2
- package/src/lib/auto-claude/prompt-templates/index.ts +7 -7
- package/src/lib/auto-claude/run-claude.test.ts +160 -0
- package/src/lib/auto-claude/steps/steps.test.ts +330 -0
- package/src/lib/auto-claude/test-helpers.ts +86 -0
- package/src/lib/auto-claude/utils-execution.test.ts +152 -0
- package/src/lib/auto-claude/utils.test.ts +7 -7
- package/src/lib/auto-claude/utils.ts +2 -1
- package/src/utils/git/branch-name.test.ts +83 -0
- package/src/utils/git/branch-name.ts +10 -0
- package/src/lib/auto-claude/prompt-templates/01-prompt-research.md +0 -28
- package/src/lib/auto-claude/prompt-templates/02-prompt-plan.md +0 -28
- package/src/lib/auto-claude/prompt-templates/03-prompt-plan-annotations.md +0 -21
- package/src/lib/auto-claude/prompt-templates/04-prompt-plan-implementation.md +0 -33
- package/src/lib/auto-claude/prompt-templates/05-prompt-implement.md +0 -31
- package/src/lib/auto-claude/prompt-templates/06-prompt-review.md +0 -30
- package/src/lib/auto-claude/prompt-templates/07-prompt-refresh.md +0 -39
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
You are an implementation agent. Follow the checklist in @{{ISSUE_DIR}}/plan-implementation.md task by task.
|
|
2
|
+
|
|
3
|
+
The issue is in @{{ISSUE_DIR}}/initial-ramblings.md — background context only. Your ONLY source of truth is the checklist.
|
|
4
|
+
|
|
5
|
+
The code lives primarily at `{{SCOPE_PATH}}/`.
|
|
6
|
+
|
|
7
|
+
## How to work
|
|
8
|
+
|
|
9
|
+
1. Find the next unchecked (`- [ ]`) task in order, top to bottom
|
|
10
|
+
2. If the task includes tests, use **red/green TDD**:
|
|
11
|
+
- Write the test → run tests → confirm it **fails** (red)
|
|
12
|
+
- Implement the change → run tests → confirm it **passes** (green)
|
|
13
|
+
3. If no tests, execute the task directly
|
|
14
|
+
4. Update the checklist: `- [ ]` → `- [x]`
|
|
15
|
+
5. Commit: `feat(scope): description` or `fix(scope): description`
|
|
16
|
+
6. Repeat until all tasks are done
|
|
17
|
+
|
|
18
|
+
Do NOT push to remote. Do NOT stop until all tasks are completed.
|
|
19
|
+
|
|
20
|
+
## Mandatory verification
|
|
21
|
+
|
|
22
|
+
Before writing completed-summary.md, run the project's type-check, test, and lint commands. Fix any errors and re-run until all pass.
|
|
23
|
+
|
|
24
|
+
## When ALL checkboxes are `- [x]`
|
|
25
|
+
|
|
26
|
+
Write @{{ISSUE_DIR}}/completed-summary.md — brief summary of everything implemented. Do NOT create it if ANY tasks remain unchecked.
|
|
27
|
+
|
|
28
|
+
## Code quality
|
|
29
|
+
|
|
30
|
+
- Follow the project's coding conventions from CLAUDE.md.
|
|
31
|
+
- No unnecessary comments or jsdocs. Proper typing — no `any`.
|
|
32
|
+
- Prefer real implementations over mocks — only mock at external boundaries (network, filesystem, third-party APIs).
|
|
33
|
+
- Do not write tests for things the type system or compiler already enforces.
|
|
34
|
+
- Follow the checklist literally. Do NOT skip tasks.
|
|
35
|
+
- If something unexpected happens, document it on the task and proceed.
|
|
36
|
+
- Fix bugs you encounter in the area you're working on. Reuse existing abstractions.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
You are reviewing code changes for the issue in @{{ISSUE_DIR}}/initial-ramblings.md. The plan is in @{{ISSUE_DIR}}/plan.md and checklist in @{{ISSUE_DIR}}/plan-implementation.md.
|
|
2
|
+
|
|
3
|
+
## Automated checks
|
|
4
|
+
|
|
5
|
+
Run the project's type-check, test, and lint commands first. Fix any failures before manual review.
|
|
6
|
+
|
|
7
|
+
## Manual review
|
|
8
|
+
|
|
9
|
+
Run `git diff {{MAIN_BRANCH}}...HEAD` and check:
|
|
10
|
+
|
|
11
|
+
1. **Correctness** — does it implement what the plan describes?
|
|
12
|
+
2. **Imports** — all present and correct?
|
|
13
|
+
3. **Type errors** — any obvious issues?
|
|
14
|
+
4. **Unused code** — variables, imports, or functions added but never used?
|
|
15
|
+
5. **Pattern consistency** — follows existing codebase conventions?
|
|
16
|
+
6. **Security** — any injection vulnerabilities or unsafe operations?
|
|
17
|
+
7. **Edge cases** — anything that could break under unusual input?
|
|
18
|
+
8. **Incomplete work** — TODOs, placeholders, unfinished implementations?
|
|
19
|
+
9. **Test coverage** — new behaviors covered by tests? Existing tests updated?
|
|
20
|
+
|
|
21
|
+
After review, run the code-simplify skill on changed files. Apply simplifications that improve clarity without changing behavior — commit separately.
|
|
22
|
+
|
|
23
|
+
Fix issues directly, commit as `fix(scope): review fixes for issue #N`.
|
|
24
|
+
|
|
25
|
+
## Write @{{ISSUE_DIR}}/review.md
|
|
26
|
+
|
|
27
|
+
- **Status**: PASS, PASS WITH FIXES, or FAIL
|
|
28
|
+
- FAIL = fundamentally broken (wrong approach, missing core functionality, unfixable regressions). Explain what's wrong.
|
|
29
|
+
- **Issues found** — what was wrong and what you fixed
|
|
30
|
+
- **Confidence level** — high/medium/low
|
|
31
|
+
- **Notes** — anything the PR reviewer should check
|
|
32
|
+
- **Recommended follow-ups** — only if genuinely valuable. Omit if nothing worth flagging.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
You are updating an existing auto-claude branch to be compatible with the latest {{MAIN_BRANCH}}.
|
|
2
|
+
|
|
3
|
+
## Context
|
|
4
|
+
|
|
5
|
+
- Original issue: @{{ISSUE_DIR}}/initial-ramblings.md
|
|
6
|
+
- Checklist: @{{ISSUE_DIR}}/plan-implementation.md
|
|
7
|
+
- What was implemented: @{{ISSUE_DIR}}/completed-summary.md
|
|
8
|
+
|
|
9
|
+
The code lives primarily at `{{SCOPE_PATH}}/`.
|
|
10
|
+
|
|
11
|
+
## Your task
|
|
12
|
+
|
|
13
|
+
1. `git diff {{MAIN_BRANCH}}...HEAD` — what this PR changes
|
|
14
|
+
2. `git log {{MAIN_BRANCH}}..HEAD --oneline` — PR's commit history
|
|
15
|
+
3. `git diff HEAD...{{MAIN_BRANCH}}` — what {{MAIN_BRANCH}} changed since divergence
|
|
16
|
+
4. Check: do imports resolve? Do types/APIs still match? Conflicts with {{MAIN_BRANCH}}?
|
|
17
|
+
5. Fix issues directly. For merge conflicts, preserve PR intent while adopting {{MAIN_BRANCH}}'s patterns. Commit: `fix(scope): adapt to {{MAIN_BRANCH}} changes`
|
|
18
|
+
6. Run the project's type-check, test, and lint commands. Fix any errors.
|
|
19
|
+
|
|
20
|
+
**CRITICAL:**
|
|
21
|
+
|
|
22
|
+
- Do NOT re-implement. Only fix what broke due to {{MAIN_BRANCH}} changes.
|
|
23
|
+
- If {{MAIN_BRANCH}} changed so fundamentally the PR's approach is no longer viable, report `NEEDS-ATTENTION`. Do NOT force a fix.
|
|
24
|
+
- Do NOT push to remote. Do NOT modify plan or research files.
|
|
25
|
+
|
|
26
|
+
## Write @{{ISSUE_DIR}}/refresh-summary.md
|
|
27
|
+
|
|
28
|
+
- **Status**: UP-TO-DATE, ADAPTED, or NEEDS-ATTENTION
|
|
29
|
+
- **Changes made** — what broke and how you fixed it
|
|
30
|
+
- **Risk areas** — anything a reviewer should double-check
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Prompt Templates
|
|
2
|
+
|
|
3
|
+
These templates are **language and toolchain agnostic**. They must not reference specific tools (pnpm, vitest, cargo, pytest, etc.), languages (TypeScript, Rust, Python), or framework conventions (oclif, Zod, consola).
|
|
4
|
+
|
|
5
|
+
Project-specific details (test commands, lint commands, type-check commands, coding conventions) come from the **target repo's CLAUDE.md**, which Claude Code loads automatically at runtime.
|
|
6
|
+
|
|
7
|
+
When editing templates:
|
|
8
|
+
|
|
9
|
+
- Say "run the project's test/lint/type-check commands" — not `pnpm test` or `cargo test`
|
|
10
|
+
- Say "test files" — not `*.test.ts` or `*_test.py`
|
|
11
|
+
- Say "schema validation" — not "Zod schemas"
|
|
12
|
+
- Say "follow the project's coding conventions from CLAUDE.md" — not "use consola" or "use import type"
|
|
@@ -28,9 +28,9 @@ describe("TEMPLATES", () => {
|
|
|
28
28
|
}
|
|
29
29
|
});
|
|
30
30
|
|
|
31
|
-
it("filenames should follow
|
|
31
|
+
it("filenames should follow NN_step.prompt.md pattern", () => {
|
|
32
32
|
for (const filename of Object.values(TEMPLATES)) {
|
|
33
|
-
expect(filename).toMatch(/^\d{2}
|
|
33
|
+
expect(filename).toMatch(/^\d{2}_.+\.prompt\.md$/);
|
|
34
34
|
}
|
|
35
35
|
});
|
|
36
36
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
export const TEMPLATES = {
|
|
2
|
-
research: "
|
|
3
|
-
plan: "
|
|
4
|
-
planAnnotations: "
|
|
5
|
-
planImplementation: "
|
|
6
|
-
implement: "
|
|
7
|
-
review: "
|
|
8
|
-
refresh: "
|
|
2
|
+
research: "01_research.prompt.md",
|
|
3
|
+
plan: "02_plan.prompt.md",
|
|
4
|
+
planAnnotations: "03_plan-annotations.prompt.md",
|
|
5
|
+
planImplementation: "04_plan-implementation.prompt.md",
|
|
6
|
+
implement: "05_implement.prompt.md",
|
|
7
|
+
review: "06_review.prompt.md",
|
|
8
|
+
refresh: "07_refresh.prompt.md",
|
|
9
9
|
} as const;
|
|
10
10
|
|
|
11
11
|
export const PIPELINE_STEPS = [
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import consola from "consola";
|
|
2
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
|
3
|
+
|
|
4
|
+
import { initConfig } from "./config";
|
|
5
|
+
import { createTestRepo } from "./test-helpers";
|
|
6
|
+
import type { TestRepo } from "./test-helpers";
|
|
7
|
+
|
|
8
|
+
consola.level = -999;
|
|
9
|
+
|
|
10
|
+
// ── File-level tinyexec mock -- intercepts all x() calls ──
|
|
11
|
+
|
|
12
|
+
let mockXImpl: ((...args: unknown[]) => unknown) | null = null;
|
|
13
|
+
|
|
14
|
+
vi.mock("tinyexec", () => ({
|
|
15
|
+
x: vi.fn((...args: unknown[]) => {
|
|
16
|
+
if (mockXImpl) return mockXImpl(...args);
|
|
17
|
+
throw new Error("mockXImpl not set");
|
|
18
|
+
}),
|
|
19
|
+
}));
|
|
20
|
+
|
|
21
|
+
describe("runClaude (mocked tinyexec)", () => {
|
|
22
|
+
let originalCwd: string;
|
|
23
|
+
let repo: TestRepo;
|
|
24
|
+
|
|
25
|
+
beforeAll(async () => {
|
|
26
|
+
originalCwd = process.cwd();
|
|
27
|
+
repo = createTestRepo();
|
|
28
|
+
process.chdir(repo.dir);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
afterAll(() => {
|
|
32
|
+
process.chdir(originalCwd);
|
|
33
|
+
repo.cleanup();
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
beforeEach(() => {
|
|
37
|
+
mockXImpl = null;
|
|
38
|
+
vi.clearAllMocks();
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("constructs correct args and parses JSON result", async () => {
|
|
42
|
+
mockXImpl = vi.fn().mockResolvedValue({
|
|
43
|
+
stdout: JSON.stringify({
|
|
44
|
+
result: "All done",
|
|
45
|
+
is_error: false,
|
|
46
|
+
total_cost_usd: 0.05,
|
|
47
|
+
num_turns: 3,
|
|
48
|
+
}),
|
|
49
|
+
exitCode: 0,
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
await initConfig({ repo: "test/repo", mainBranch: "main" });
|
|
53
|
+
|
|
54
|
+
const { runClaude } = await import("./utils");
|
|
55
|
+
|
|
56
|
+
const result = await runClaude({
|
|
57
|
+
promptFile: "test-prompt.md",
|
|
58
|
+
permissionMode: "plan",
|
|
59
|
+
maxTurns: 10,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
expect(result.result).toBe("All done");
|
|
63
|
+
expect(result.is_error).toBe(false);
|
|
64
|
+
expect(result.num_turns).toBe(3);
|
|
65
|
+
|
|
66
|
+
expect(mockXImpl).toHaveBeenCalledWith(
|
|
67
|
+
"claude",
|
|
68
|
+
expect.arrayContaining([
|
|
69
|
+
"-p",
|
|
70
|
+
"--output-format",
|
|
71
|
+
"json",
|
|
72
|
+
"--permission-mode",
|
|
73
|
+
"plan",
|
|
74
|
+
"--max-turns",
|
|
75
|
+
"10",
|
|
76
|
+
"@test-prompt.md",
|
|
77
|
+
]),
|
|
78
|
+
expect.any(Object),
|
|
79
|
+
);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("returns fallback when JSON parsing fails", async () => {
|
|
83
|
+
mockXImpl = vi.fn().mockResolvedValue({ stdout: "not json output", exitCode: 0 });
|
|
84
|
+
|
|
85
|
+
await initConfig({ repo: "test/repo", mainBranch: "main" });
|
|
86
|
+
|
|
87
|
+
const { runClaude } = await import("./utils");
|
|
88
|
+
|
|
89
|
+
const result = await runClaude({
|
|
90
|
+
promptFile: "test.md",
|
|
91
|
+
permissionMode: "acceptEdits",
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
expect(result.result).toBe("not json output");
|
|
95
|
+
expect(result.is_error).toBe(false);
|
|
96
|
+
expect(result.total_cost_usd).toBe(0);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it("retries on failure when retry is enabled", async () => {
|
|
100
|
+
let callCount = 0;
|
|
101
|
+
mockXImpl = vi.fn().mockImplementation(() => {
|
|
102
|
+
callCount++;
|
|
103
|
+
if (callCount < 3) {
|
|
104
|
+
throw new Error("Claude process failed");
|
|
105
|
+
}
|
|
106
|
+
return Promise.resolve({
|
|
107
|
+
stdout: JSON.stringify({
|
|
108
|
+
result: "ok",
|
|
109
|
+
is_error: false,
|
|
110
|
+
total_cost_usd: 0,
|
|
111
|
+
num_turns: 1,
|
|
112
|
+
}),
|
|
113
|
+
exitCode: 0,
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
await initConfig({
|
|
118
|
+
repo: "test/repo",
|
|
119
|
+
mainBranch: "main",
|
|
120
|
+
loopRetryEnabled: true,
|
|
121
|
+
maxRetries: 5,
|
|
122
|
+
retryDelayMs: 1,
|
|
123
|
+
maxRetryDelayMs: 5,
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const { runClaude } = await import("./utils");
|
|
127
|
+
|
|
128
|
+
const result = await runClaude({
|
|
129
|
+
promptFile: "test.md",
|
|
130
|
+
permissionMode: "plan",
|
|
131
|
+
retry: true,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
expect(result.result).toBe("ok");
|
|
135
|
+
expect(callCount).toBe(3);
|
|
136
|
+
}, 10_000);
|
|
137
|
+
|
|
138
|
+
it("throws after max retries exhausted", async () => {
|
|
139
|
+
mockXImpl = vi.fn().mockRejectedValue(new Error("Claude crash"));
|
|
140
|
+
|
|
141
|
+
await initConfig({
|
|
142
|
+
repo: "test/repo",
|
|
143
|
+
mainBranch: "main",
|
|
144
|
+
loopRetryEnabled: true,
|
|
145
|
+
maxRetries: 2,
|
|
146
|
+
retryDelayMs: 1,
|
|
147
|
+
maxRetryDelayMs: 5,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
const { runClaude } = await import("./utils");
|
|
151
|
+
|
|
152
|
+
await expect(
|
|
153
|
+
runClaude({
|
|
154
|
+
promptFile: "test.md",
|
|
155
|
+
permissionMode: "plan",
|
|
156
|
+
retry: true,
|
|
157
|
+
}),
|
|
158
|
+
).rejects.toThrow("Claude failed after 2 retries");
|
|
159
|
+
}, 10_000);
|
|
160
|
+
});
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
|
|
5
|
+
import consola from "consola";
|
|
6
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
7
|
+
|
|
8
|
+
import { initConfig } from "../config";
|
|
9
|
+
import { ARTIFACTS } from "../prompt-templates/index";
|
|
10
|
+
import {
|
|
11
|
+
buildTestContext,
|
|
12
|
+
createTestRepoWithRemote,
|
|
13
|
+
errorClaudeJson,
|
|
14
|
+
successClaudeJson,
|
|
15
|
+
} from "../test-helpers";
|
|
16
|
+
import type { TestRepo } from "../test-helpers";
|
|
17
|
+
import type { IssueContext } from "../utils";
|
|
18
|
+
|
|
19
|
+
consola.level = -999;
|
|
20
|
+
|
|
21
|
+
// ── Mock tinyexec: intercept "claude" calls, pass through everything else ──
|
|
22
|
+
|
|
23
|
+
let mockClaudeImpl: ((args: string[]) => Promise<{ stdout: string; exitCode: number }>) | null =
|
|
24
|
+
null;
|
|
25
|
+
|
|
26
|
+
vi.mock("tinyexec", async (importOriginal) => {
|
|
27
|
+
const original = await importOriginal<typeof import("tinyexec")>();
|
|
28
|
+
return {
|
|
29
|
+
...original,
|
|
30
|
+
x: vi.fn(
|
|
31
|
+
async (
|
|
32
|
+
cmd: string,
|
|
33
|
+
args: string[],
|
|
34
|
+
opts?: Record<string, unknown>,
|
|
35
|
+
): Promise<{ stdout: string; exitCode: number }> => {
|
|
36
|
+
if (cmd === "claude" && mockClaudeImpl) {
|
|
37
|
+
return mockClaudeImpl(args);
|
|
38
|
+
}
|
|
39
|
+
if (cmd === "claude") {
|
|
40
|
+
throw new Error("Unexpected claude call -- set mockClaudeImpl before running");
|
|
41
|
+
}
|
|
42
|
+
return original.x(cmd, args, opts as never) as unknown as Promise<{
|
|
43
|
+
stdout: string;
|
|
44
|
+
exitCode: number;
|
|
45
|
+
}>;
|
|
46
|
+
},
|
|
47
|
+
),
|
|
48
|
+
};
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// ── Shared setup/teardown for all step tests ──
|
|
52
|
+
|
|
53
|
+
function setupStepTest(): { originalCwd: string; repo: TestRepo; ctx: IssueContext } {
|
|
54
|
+
const originalCwd = process.cwd();
|
|
55
|
+
const repo = createTestRepoWithRemote();
|
|
56
|
+
process.chdir(repo.dir);
|
|
57
|
+
const ctx = buildTestContext(repo.dir);
|
|
58
|
+
mkdirSync(ctx.issueDir, { recursive: true });
|
|
59
|
+
mockClaudeImpl = null;
|
|
60
|
+
return { originalCwd, repo, ctx };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function teardownStepTest(originalCwd: string, repo: TestRepo): void {
|
|
64
|
+
process.chdir(originalCwd);
|
|
65
|
+
repo.cleanup();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ── runStepWithArtifact ──
|
|
69
|
+
|
|
70
|
+
describe("runStepWithArtifact", () => {
|
|
71
|
+
let originalCwd: string;
|
|
72
|
+
let repo: TestRepo;
|
|
73
|
+
let ctx: IssueContext;
|
|
74
|
+
|
|
75
|
+
beforeEach(async () => {
|
|
76
|
+
({ originalCwd, repo, ctx } = setupStepTest());
|
|
77
|
+
await initConfig({ repo: "test/repo", mainBranch: "main" });
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
afterEach(() => teardownStepTest(originalCwd, repo));
|
|
81
|
+
|
|
82
|
+
it("skips when artifact already exists", async () => {
|
|
83
|
+
const { runStepWithArtifact } = await import("../utils");
|
|
84
|
+
const artifactPath = join(ctx.issueDir, "test-artifact.md");
|
|
85
|
+
writeFileSync(artifactPath, "# Existing artifact content");
|
|
86
|
+
|
|
87
|
+
const result = await runStepWithArtifact({
|
|
88
|
+
stepName: "Test Step",
|
|
89
|
+
ctx,
|
|
90
|
+
artifactPath,
|
|
91
|
+
templateName: "01_research.prompt.md",
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
expect(result).toBe(true);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it("returns false when Claude returns is_error", async () => {
|
|
98
|
+
mockClaudeImpl = async () => ({ stdout: errorClaudeJson(), exitCode: 0 });
|
|
99
|
+
|
|
100
|
+
const { runStepWithArtifact } = await import("../utils");
|
|
101
|
+
const artifactPath = join(ctx.issueDir, "missing-artifact.md");
|
|
102
|
+
|
|
103
|
+
const result = await runStepWithArtifact({
|
|
104
|
+
stepName: "Test Step",
|
|
105
|
+
ctx,
|
|
106
|
+
artifactPath,
|
|
107
|
+
templateName: "01_research.prompt.md",
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
expect(result).toBe(false);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("returns false when artifact not produced after Claude run", async () => {
|
|
114
|
+
mockClaudeImpl = async () => ({ stdout: successClaudeJson(), exitCode: 0 });
|
|
115
|
+
|
|
116
|
+
const { runStepWithArtifact } = await import("../utils");
|
|
117
|
+
const artifactPath = join(ctx.issueDir, "never-created.md");
|
|
118
|
+
|
|
119
|
+
const result = await runStepWithArtifact({
|
|
120
|
+
stepName: "Test Step",
|
|
121
|
+
ctx,
|
|
122
|
+
artifactPath,
|
|
123
|
+
templateName: "01_research.prompt.md",
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
expect(result).toBe(false);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it("succeeds and commits when Claude produces artifact", async () => {
|
|
130
|
+
const { runStepWithArtifact } = await import("../utils");
|
|
131
|
+
const artifactPath = join(ctx.issueDir, "plan.md");
|
|
132
|
+
|
|
133
|
+
mockClaudeImpl = async () => {
|
|
134
|
+
writeFileSync(artifactPath, "# Plan\n\nDetailed plan content here.");
|
|
135
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const result = await runStepWithArtifact({
|
|
139
|
+
stepName: "Test Step",
|
|
140
|
+
ctx,
|
|
141
|
+
artifactPath,
|
|
142
|
+
templateName: "01_research.prompt.md",
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
expect(result).toBe(true);
|
|
146
|
+
|
|
147
|
+
const log = execSync("git log --oneline", { cwd: repo.dir, encoding: "utf-8" });
|
|
148
|
+
expect(log).toContain("chore(auto-claude)");
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
// ── stepResearch ──
|
|
153
|
+
|
|
154
|
+
describe("stepResearch", () => {
|
|
155
|
+
let originalCwd: string;
|
|
156
|
+
let repo: TestRepo;
|
|
157
|
+
let ctx: IssueContext;
|
|
158
|
+
|
|
159
|
+
beforeEach(async () => {
|
|
160
|
+
({ originalCwd, repo, ctx } = setupStepTest());
|
|
161
|
+
await initConfig({ repo: "test/repo", mainBranch: "main" });
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
afterEach(() => teardownStepTest(originalCwd, repo));
|
|
165
|
+
|
|
166
|
+
it("skips when research.md exists and is > 200 chars", async () => {
|
|
167
|
+
const { stepResearch } = await import("./research");
|
|
168
|
+
|
|
169
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.research), "x".repeat(250));
|
|
170
|
+
|
|
171
|
+
const result = await stepResearch(ctx);
|
|
172
|
+
expect(result).toBe(true);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it("does NOT skip when research.md exists but is < 200 chars", async () => {
|
|
176
|
+
const { stepResearch } = await import("./research");
|
|
177
|
+
|
|
178
|
+
const researchPath = join(ctx.issueDir, ARTIFACTS.research);
|
|
179
|
+
writeFileSync(researchPath, "short");
|
|
180
|
+
|
|
181
|
+
let claudeCalled = false;
|
|
182
|
+
mockClaudeImpl = async () => {
|
|
183
|
+
claudeCalled = true;
|
|
184
|
+
writeFileSync(researchPath, "x".repeat(250));
|
|
185
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
const result = await stepResearch(ctx);
|
|
189
|
+
expect(claudeCalled).toBe(true);
|
|
190
|
+
expect(result).toBe(true);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it("calls ensureBranch (real git branch creation)", async () => {
|
|
194
|
+
const { stepResearch } = await import("./research");
|
|
195
|
+
|
|
196
|
+
const researchPath = join(ctx.issueDir, ARTIFACTS.research);
|
|
197
|
+
mockClaudeImpl = async () => {
|
|
198
|
+
writeFileSync(researchPath, "x".repeat(250));
|
|
199
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
await stepResearch(ctx);
|
|
203
|
+
|
|
204
|
+
const branches = execSync("git branch", { cwd: repo.dir, encoding: "utf-8" });
|
|
205
|
+
expect(branches).toContain(ctx.branch.split("/").pop());
|
|
206
|
+
});
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// ── stepPlanAnnotations ──
|
|
210
|
+
|
|
211
|
+
describe("stepPlanAnnotations", () => {
|
|
212
|
+
let originalCwd: string;
|
|
213
|
+
let repo: TestRepo;
|
|
214
|
+
let ctx: IssueContext;
|
|
215
|
+
|
|
216
|
+
beforeEach(async () => {
|
|
217
|
+
({ originalCwd, repo, ctx } = setupStepTest());
|
|
218
|
+
await initConfig({ repo: "test/repo", mainBranch: "main" });
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
afterEach(() => teardownStepTest(originalCwd, repo));
|
|
222
|
+
|
|
223
|
+
it("returns true when no plan-annotations.md exists", async () => {
|
|
224
|
+
const { stepPlanAnnotations } = await import("./plan-annotations");
|
|
225
|
+
|
|
226
|
+
const result = await stepPlanAnnotations(ctx);
|
|
227
|
+
expect(result).toBe(true);
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it("skips when plan-annotations-addressed.md already exists", async () => {
|
|
231
|
+
const { stepPlanAnnotations } = await import("./plan-annotations");
|
|
232
|
+
|
|
233
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.planAnnotations), "# Annotations");
|
|
234
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.planAnnotationsAddressed), "# Addressed");
|
|
235
|
+
|
|
236
|
+
const result = await stepPlanAnnotations(ctx);
|
|
237
|
+
expect(result).toBe(true);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it("renames file after Claude runs successfully", async () => {
|
|
241
|
+
const { stepPlanAnnotations } = await import("./plan-annotations");
|
|
242
|
+
|
|
243
|
+
const annotationsPath = join(ctx.issueDir, ARTIFACTS.planAnnotations);
|
|
244
|
+
const addressedPath = join(ctx.issueDir, ARTIFACTS.planAnnotationsAddressed);
|
|
245
|
+
writeFileSync(annotationsPath, "# Annotations\n\nSome feedback here.");
|
|
246
|
+
|
|
247
|
+
mockClaudeImpl = async () => ({ stdout: successClaudeJson(), exitCode: 0 });
|
|
248
|
+
|
|
249
|
+
const result = await stepPlanAnnotations(ctx);
|
|
250
|
+
|
|
251
|
+
expect(result).toBe(true);
|
|
252
|
+
expect(existsSync(addressedPath)).toBe(true);
|
|
253
|
+
expect(existsSync(annotationsPath)).toBe(false);
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
// ── stepImplement ──
|
|
258
|
+
|
|
259
|
+
describe("stepImplement", () => {
|
|
260
|
+
let originalCwd: string;
|
|
261
|
+
let repo: TestRepo;
|
|
262
|
+
let ctx: IssueContext;
|
|
263
|
+
|
|
264
|
+
beforeEach(async () => {
|
|
265
|
+
({ originalCwd, repo, ctx } = setupStepTest());
|
|
266
|
+
await initConfig({
|
|
267
|
+
repo: "test/repo",
|
|
268
|
+
mainBranch: "main",
|
|
269
|
+
maxImplementIterations: 3,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
// stepImplement does `git checkout ctx.branch`, so create the branch
|
|
273
|
+
execSync(`git checkout -b ${ctx.branch}`, { cwd: repo.dir, stdio: "ignore" });
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
afterEach(() => teardownStepTest(originalCwd, repo));
|
|
277
|
+
|
|
278
|
+
it("skips when completed-summary.md exists", async () => {
|
|
279
|
+
const { stepImplement } = await import("./implement");
|
|
280
|
+
|
|
281
|
+
writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
|
|
282
|
+
|
|
283
|
+
const result = await stepImplement(ctx);
|
|
284
|
+
expect(result).toBe(true);
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
it("returns false after maxImplementIterations exhausted", async () => {
|
|
288
|
+
const { stepImplement } = await import("./implement");
|
|
289
|
+
|
|
290
|
+
let callCount = 0;
|
|
291
|
+
mockClaudeImpl = async () => {
|
|
292
|
+
callCount++;
|
|
293
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
const result = await stepImplement(ctx);
|
|
297
|
+
expect(result).toBe(false);
|
|
298
|
+
expect(callCount).toBe(3);
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
it("stops looping when completed-summary.md appears", async () => {
|
|
302
|
+
const { stepImplement } = await import("./implement");
|
|
303
|
+
|
|
304
|
+
let callCount = 0;
|
|
305
|
+
const completedPath = join(ctx.issueDir, ARTIFACTS.completedSummary);
|
|
306
|
+
|
|
307
|
+
mockClaudeImpl = async () => {
|
|
308
|
+
callCount++;
|
|
309
|
+
if (callCount === 2) {
|
|
310
|
+
writeFileSync(completedPath, "# Implementation Complete\n\nAll tasks done.");
|
|
311
|
+
}
|
|
312
|
+
return { stdout: successClaudeJson(), exitCode: 0 };
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
const result = await stepImplement(ctx);
|
|
316
|
+
expect(result).toBe(true);
|
|
317
|
+
expect(callCount).toBe(2);
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
// ── stepCreatePR / stepRemoveLabel -- skip in CI (needs gh) ──
|
|
322
|
+
|
|
323
|
+
describe.skipIf(!!process.env.CI)("stepCreatePR (requires gh)", () => {
|
|
324
|
+
it.todo("skips when open PR already exists");
|
|
325
|
+
it.todo("creates PR and writes pr-url.txt");
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
describe.skipIf(!!process.env.CI)("stepRemoveLabel (requires gh)", () => {
|
|
329
|
+
it.todo("calls gh with correct args");
|
|
330
|
+
});
|