@towles/tool 0.0.69 → 0.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@towles/tool",
3
- "version": "0.0.69",
3
+ "version": "0.0.70",
4
4
  "description": "One off quality of life scripts that I use on a daily basis.",
5
5
  "homepage": "https://github.com/ChrisTowles/towles-tool#readme",
6
6
  "bugs": {
@@ -0,0 +1,885 @@
1
+ /**
2
+ * End-to-end tests for auto-claude pipeline.
3
+ *
4
+ * These tests verify the complete pipeline flow including:
5
+ * - Full lifecycle with all 4 steps
6
+ * - Branch creation and git operations
7
+ * - Artifact creation and persistence
8
+ * - Label state transitions
9
+ * - Retry loop behavior
10
+ * - Edge cases and failure scenarios
11
+ */
12
+ import { execSync } from "node:child_process";
13
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
14
+ import { join } from "node:path";
15
+
16
+ import consola from "consola";
17
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
18
+
19
+ import { initConfig } from "./config";
20
+ import { LABELS } from "./labels";
21
+ import type { ExecSafeFn } from "./labels";
22
+ import { runPipeline } from "./pipeline";
23
+ import { ARTIFACTS } from "./prompt-templates/index";
24
+ import type { SpawnClaudeFn } from "./spawn-claude";
25
+ import {
26
+ buildTestContext,
27
+ createMockClaudeProcess,
28
+ createTestRepoWithRemote,
29
+ errorClaudeJson,
30
+ successClaudeJson,
31
+ } from "./test-helpers";
32
+ import type { MockClaudeImpl, TestRepo } from "./test-helpers";
33
+ import type { IssueContext } from "./utils";
34
+
35
+ consola.level = -999;
36
+
37
+ describe("auto-claude e2e: full pipeline lifecycle", () => {
38
+ let originalCwd: string;
39
+ let repo: TestRepo;
40
+ let ctx: IssueContext;
41
+ let mockClaudeImpl: MockClaudeImpl;
42
+ let ghCalls: string[][];
43
+ let mockSpawnFn: SpawnClaudeFn;
44
+ let mockExec: ExecSafeFn;
45
+
46
+ beforeEach(async () => {
47
+ originalCwd = process.cwd();
48
+ repo = createTestRepoWithRemote();
49
+ process.chdir(repo.dir);
50
+ await initConfig({
51
+ repo: "test/repo",
52
+ mainBranch: "main",
53
+ maxReviewRetries: 2,
54
+ });
55
+ ctx = buildTestContext(repo.dir);
56
+ mockClaudeImpl = null;
57
+ ghCalls = [];
58
+
59
+ mockSpawnFn = vi.fn((args: string[]) => {
60
+ if (mockClaudeImpl) {
61
+ const { stdout, exitCode } = mockClaudeImpl(args);
62
+ return createMockClaudeProcess(stdout, exitCode);
63
+ }
64
+ throw new Error("Unexpected spawnClaude call -- set mockClaudeImpl");
65
+ }) as SpawnClaudeFn;
66
+
67
+ mockExec = vi.fn(async (cmd: string, args: string[]) => {
68
+ if (cmd === "gh") {
69
+ ghCalls.push(args);
70
+ // Return empty array for PR list checks
71
+ if (args.includes("pr") && args.includes("list")) {
72
+ return { stdout: "[]", ok: true };
73
+ }
74
+ // Return mock PR URL for PR create
75
+ if (args.includes("pr") && args.includes("create")) {
76
+ return { stdout: "https://github.com/test/repo/pull/1", ok: true };
77
+ }
78
+ return { stdout: "", ok: true };
79
+ }
80
+ // Pass through non-gh commands to real exec
81
+ const { execSafe } = await import("../../utils/git/exec");
82
+ return execSafe(cmd, args);
83
+ }) as ExecSafeFn;
84
+ });
85
+
86
+ afterEach(() => {
87
+ process.chdir(originalCwd);
88
+ repo.cleanup();
89
+ });
90
+
91
+ it("creates feature branch and commits artifacts", async () => {
92
+ let claudeCallCount = 0;
93
+ mockClaudeImpl = () => {
94
+ claudeCallCount++;
95
+ mkdirSync(ctx.issueDir, { recursive: true });
96
+
97
+ switch (claudeCallCount) {
98
+ case 1:
99
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan\n\nStep-by-step plan.");
100
+ break;
101
+ case 2:
102
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done\n\nImpl complete.");
103
+ break;
104
+ case 3:
105
+ writeFileSync(
106
+ join(ctx.issueDir, ARTIFACTS.simplifySummary),
107
+ "# Simplified\n\nCode simplified.",
108
+ );
109
+ break;
110
+ case 4:
111
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nAll checks pass.");
112
+ break;
113
+ }
114
+ return { stdout: successClaudeJson(), exitCode: 0 };
115
+ };
116
+
117
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
118
+
119
+ // Verify branch was created and pushed
120
+ const branches = execSync("git branch -a", { cwd: repo.dir, encoding: "utf-8" });
121
+ expect(branches).toContain(ctx.branch);
122
+
123
+ // Verify all artifacts exist
124
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.initialRamblings))).toBe(true);
125
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.plan))).toBe(true);
126
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.completedSummary))).toBe(true);
127
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.simplifySummary))).toBe(true);
128
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.review))).toBe(true);
129
+ });
130
+
131
+ it("preserves issue context in initial-ramblings.md", async () => {
132
+ mockClaudeImpl = () => ({ stdout: errorClaudeJson(), exitCode: 0 });
133
+
134
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
135
+
136
+ const ramblingsPath = join(ctx.issueDir, ARTIFACTS.initialRamblings);
137
+ const content = readFileSync(ramblingsPath, "utf-8");
138
+
139
+ expect(content).toContain("# Test Issue");
140
+ expect(content).toContain("test/repo#1");
141
+ expect(content).toContain("Test body");
142
+ });
143
+
144
+ it("creates PR with correct body after successful review", async () => {
145
+ let claudeCallCount = 0;
146
+ mockClaudeImpl = () => {
147
+ claudeCallCount++;
148
+ mkdirSync(ctx.issueDir, { recursive: true });
149
+
150
+ switch (claudeCallCount) {
151
+ case 1:
152
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
153
+ break;
154
+ case 2:
155
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
156
+ break;
157
+ case 3:
158
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
159
+ break;
160
+ case 4:
161
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nAll good.");
162
+ break;
163
+ }
164
+ return { stdout: successClaudeJson(), exitCode: 0 };
165
+ };
166
+
167
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
168
+
169
+ // Verify PR create was called
170
+ const prCreateCall = ghCalls.find((args) => args[0] === "pr" && args[1] === "create");
171
+ expect(prCreateCall).toBeDefined();
172
+
173
+ // Verify PR body contains required elements
174
+ const bodyIdx = prCreateCall!.indexOf("--body");
175
+ const body = prCreateCall![bodyIdx + 1];
176
+ expect(body).toContain("Closes #1");
177
+ expect(body).toContain("Test Issue");
178
+ expect(body).toContain(".auto-claude/issue-1/plan.md");
179
+ });
180
+ });
181
+
182
+ describe("auto-claude e2e: label state transitions", () => {
183
+ let originalCwd: string;
184
+ let repo: TestRepo;
185
+ let ctx: IssueContext;
186
+ let mockClaudeImpl: MockClaudeImpl;
187
+ let ghCalls: string[][];
188
+ let mockSpawnFn: SpawnClaudeFn;
189
+ let mockExec: ExecSafeFn;
190
+
191
+ beforeEach(async () => {
192
+ originalCwd = process.cwd();
193
+ repo = createTestRepoWithRemote();
194
+ process.chdir(repo.dir);
195
+ await initConfig({
196
+ repo: "test/repo",
197
+ mainBranch: "main",
198
+ triggerLabel: "auto-claude",
199
+ maxReviewRetries: 1,
200
+ });
201
+ ctx = buildTestContext(repo.dir);
202
+ mockClaudeImpl = null;
203
+ ghCalls = [];
204
+
205
+ mockSpawnFn = vi.fn((args: string[]) => {
206
+ if (mockClaudeImpl) {
207
+ const { stdout, exitCode } = mockClaudeImpl(args);
208
+ return createMockClaudeProcess(stdout, exitCode);
209
+ }
210
+ throw new Error("Unexpected spawnClaude call");
211
+ }) as SpawnClaudeFn;
212
+
213
+ mockExec = vi.fn(async (cmd: string, args: string[]) => {
214
+ if (cmd === "gh") {
215
+ ghCalls.push(args);
216
+ if (args.includes("pr") && args.includes("list")) {
217
+ return { stdout: "[]", ok: true };
218
+ }
219
+ if (args.includes("pr") && args.includes("create")) {
220
+ return { stdout: "https://github.com/test/repo/pull/1", ok: true };
221
+ }
222
+ return { stdout: "", ok: true };
223
+ }
224
+ const { execSafe } = await import("../../utils/git/exec");
225
+ return execSafe(cmd, args);
226
+ }) as ExecSafeFn;
227
+ });
228
+
229
+ afterEach(() => {
230
+ process.chdir(originalCwd);
231
+ repo.cleanup();
232
+ });
233
+
234
+ it("removes trigger label and adds in-progress at start", async () => {
235
+ mockClaudeImpl = () => ({ stdout: errorClaudeJson(), exitCode: 0 });
236
+
237
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
238
+
239
+ // Verify trigger label removed
240
+ const removeTriggerCall = ghCalls.find(
241
+ (args) => args.includes("--remove-label") && args.includes("auto-claude"),
242
+ );
243
+ expect(removeTriggerCall).toBeDefined();
244
+
245
+ // Verify in-progress label added
246
+ const addInProgressCall = ghCalls.find(
247
+ (args) => args.includes("--add-label") && args.includes(LABELS.inProgress),
248
+ );
249
+ expect(addInProgressCall).toBeDefined();
250
+ });
251
+
252
+ it("sets success + review labels on successful completion", async () => {
253
+ let claudeCallCount = 0;
254
+ mockClaudeImpl = () => {
255
+ claudeCallCount++;
256
+ mkdirSync(ctx.issueDir, { recursive: true });
257
+
258
+ switch (claudeCallCount) {
259
+ case 1:
260
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
261
+ break;
262
+ case 2:
263
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
264
+ break;
265
+ case 3:
266
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
267
+ break;
268
+ case 4:
269
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS");
270
+ break;
271
+ }
272
+ return { stdout: successClaudeJson(), exitCode: 0 };
273
+ };
274
+
275
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
276
+
277
+ // Verify success label added
278
+ const successLabelCall = ghCalls.find(
279
+ (args) => args.includes("--add-label") && args.includes(LABELS.success),
280
+ );
281
+ expect(successLabelCall).toBeDefined();
282
+
283
+ // Verify review label added
284
+ const reviewLabelCall = ghCalls.find(
285
+ (args) => args.includes("--add-label") && args.includes(LABELS.review),
286
+ );
287
+ expect(reviewLabelCall).toBeDefined();
288
+
289
+ // Verify in-progress removed
290
+ const removeInProgressCall = ghCalls.find(
291
+ (args) => args.includes("--remove-label") && args.includes(LABELS.inProgress),
292
+ );
293
+ expect(removeInProgressCall).toBeDefined();
294
+ });
295
+
296
+ it("sets failed label on step failure", async () => {
297
+ mockClaudeImpl = () => ({ stdout: errorClaudeJson(), exitCode: 0 });
298
+
299
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
300
+
301
+ // Verify failed label added
302
+ const failedLabelCall = ghCalls.find(
303
+ (args) => args.includes("--add-label") && args.includes(LABELS.failed),
304
+ );
305
+ expect(failedLabelCall).toBeDefined();
306
+
307
+ // Verify in-progress removed
308
+ const removeInProgressCall = ghCalls.find(
309
+ (args) => args.includes("--remove-label") && args.includes(LABELS.inProgress),
310
+ );
311
+ expect(removeInProgressCall).toBeDefined();
312
+ });
313
+
314
+ it("ensures all labels exist before pipeline starts", async () => {
315
+ mockClaudeImpl = () => ({ stdout: errorClaudeJson(), exitCode: 0 });
316
+
317
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
318
+
319
+ // Verify all labels were created
320
+ const labelCreateCalls = ghCalls.filter((args) => args[0] === "label" && args[1] === "create");
321
+ expect(labelCreateCalls.length).toBe(4);
322
+
323
+ const createdLabels = labelCreateCalls.map((args) => args[2]);
324
+ expect(createdLabels).toContain(LABELS.inProgress);
325
+ expect(createdLabels).toContain(LABELS.review);
326
+ expect(createdLabels).toContain(LABELS.failed);
327
+ expect(createdLabels).toContain(LABELS.success);
328
+ });
329
+ });
330
+
331
+ describe("auto-claude e2e: retry loop behavior", () => {
332
+ let originalCwd: string;
333
+ let repo: TestRepo;
334
+ let ctx: IssueContext;
335
+ let mockClaudeImpl: MockClaudeImpl;
336
+ let ghCalls: string[][];
337
+ let mockSpawnFn: SpawnClaudeFn;
338
+ let mockExec: ExecSafeFn;
339
+
340
+ beforeEach(async () => {
341
+ originalCwd = process.cwd();
342
+ repo = createTestRepoWithRemote();
343
+ process.chdir(repo.dir);
344
+ await initConfig({
345
+ repo: "test/repo",
346
+ mainBranch: "main",
347
+ maxReviewRetries: 2,
348
+ });
349
+ ctx = buildTestContext(repo.dir);
350
+ mockClaudeImpl = null;
351
+ ghCalls = [];
352
+
353
+ mockSpawnFn = vi.fn((args: string[]) => {
354
+ if (mockClaudeImpl) {
355
+ const { stdout, exitCode } = mockClaudeImpl(args);
356
+ return createMockClaudeProcess(stdout, exitCode);
357
+ }
358
+ throw new Error("Unexpected spawnClaude call");
359
+ }) as SpawnClaudeFn;
360
+
361
+ mockExec = vi.fn(async (cmd: string, args: string[]) => {
362
+ if (cmd === "gh") {
363
+ ghCalls.push(args);
364
+ if (args.includes("pr") && args.includes("list")) {
365
+ return { stdout: "[]", ok: true };
366
+ }
367
+ if (args.includes("pr") && args.includes("create")) {
368
+ return { stdout: "https://github.com/test/repo/pull/1", ok: true };
369
+ }
370
+ return { stdout: "", ok: true };
371
+ }
372
+ const { execSafe } = await import("../../utils/git/exec");
373
+ return execSafe(cmd, args);
374
+ }) as ExecSafeFn;
375
+ });
376
+
377
+ afterEach(() => {
378
+ process.chdir(originalCwd);
379
+ repo.cleanup();
380
+ });
381
+
382
+ it("clears artifacts between retry attempts", async () => {
383
+ let claudeCallCount = 0;
384
+ const artifactContents: Record<string, string[]> = {
385
+ completedSummary: [],
386
+ simplifySummary: [],
387
+ review: [],
388
+ };
389
+
390
+ mockClaudeImpl = () => {
391
+ claudeCallCount++;
392
+ mkdirSync(ctx.issueDir, { recursive: true });
393
+
394
+ // Plan (once)
395
+ if (claudeCallCount === 1) {
396
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
397
+ return { stdout: successClaudeJson(), exitCode: 0 };
398
+ }
399
+
400
+ // Each retry cycle
401
+ const stepInCycle = (claudeCallCount - 2) % 3;
402
+ switch (stepInCycle) {
403
+ case 0: {
404
+ const content = `# Done attempt ${Math.ceil((claudeCallCount - 1) / 3)}`;
405
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), content);
406
+ artifactContents.completedSummary.push(content);
407
+ break;
408
+ }
409
+ case 1: {
410
+ const content = `# Simplified attempt ${Math.ceil((claudeCallCount - 1) / 3)}`;
411
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), content);
412
+ artifactContents.simplifySummary.push(content);
413
+ break;
414
+ }
415
+ case 2: {
416
+ // First two reviews FAIL, third PASS
417
+ const attempt = Math.ceil((claudeCallCount - 1) / 3);
418
+ const content = attempt < 3 ? "FAIL\n\nNeeds more work." : "PASS\n\nGood now.";
419
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), content);
420
+ artifactContents.review.push(content);
421
+ break;
422
+ }
423
+ }
424
+ return { stdout: successClaudeJson(), exitCode: 0 };
425
+ };
426
+
427
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
428
+
429
+ // Verify 3 attempts (2 retries + 1 initial)
430
+ expect(artifactContents.completedSummary.length).toBe(3);
431
+ expect(artifactContents.simplifySummary.length).toBe(3);
432
+ expect(artifactContents.review.length).toBe(3);
433
+
434
+ // Final artifacts should be from attempt 3
435
+ const finalCompletedSummary = readFileSync(
436
+ join(ctx.issueDir, ARTIFACTS.completedSummary),
437
+ "utf-8",
438
+ );
439
+ expect(finalCompletedSummary).toContain("attempt 3");
440
+ });
441
+
442
+ it("posts comment with retry count on max retries exhausted", async () => {
443
+ let claudeCallCount = 0;
444
+ mockClaudeImpl = () => {
445
+ claudeCallCount++;
446
+ mkdirSync(ctx.issueDir, { recursive: true });
447
+
448
+ if (claudeCallCount === 1) {
449
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
450
+ return { stdout: successClaudeJson(), exitCode: 0 };
451
+ }
452
+
453
+ const stepInCycle = (claudeCallCount - 2) % 3;
454
+ switch (stepInCycle) {
455
+ case 0:
456
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
457
+ break;
458
+ case 1:
459
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
460
+ break;
461
+ case 2:
462
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "FAIL\n\nStill failing.");
463
+ break;
464
+ }
465
+ return { stdout: successClaudeJson(), exitCode: 0 };
466
+ };
467
+
468
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
469
+
470
+ // Verify comment was posted
471
+ const commentCall = ghCalls.find((args) => args[0] === "issue" && args[1] === "comment");
472
+ expect(commentCall).toBeDefined();
473
+
474
+ // Verify comment mentions retry count
475
+ const bodyIdx = commentCall!.indexOf("--body");
476
+ const body = commentCall![bodyIdx + 1];
477
+ expect(body).toContain("3 attempts");
478
+ });
479
+ });
480
+
481
+ describe("auto-claude e2e: --until flag behavior", () => {
482
+ let originalCwd: string;
483
+ let repo: TestRepo;
484
+ let ctx: IssueContext;
485
+ let mockClaudeImpl: MockClaudeImpl;
486
+ let mockSpawnFn: SpawnClaudeFn;
487
+ let mockExec: ExecSafeFn;
488
+ let claudeCallCount: number;
489
+
490
+ beforeEach(async () => {
491
+ originalCwd = process.cwd();
492
+ repo = createTestRepoWithRemote();
493
+ process.chdir(repo.dir);
494
+ await initConfig({
495
+ repo: "test/repo",
496
+ mainBranch: "main",
497
+ });
498
+ ctx = buildTestContext(repo.dir);
499
+ mockClaudeImpl = null;
500
+ claudeCallCount = 0;
501
+
502
+ mockSpawnFn = vi.fn((args: string[]) => {
503
+ if (mockClaudeImpl) {
504
+ const { stdout, exitCode } = mockClaudeImpl(args);
505
+ return createMockClaudeProcess(stdout, exitCode);
506
+ }
507
+ throw new Error("Unexpected spawnClaude call");
508
+ }) as SpawnClaudeFn;
509
+
510
+ mockExec = vi.fn(async (cmd: string, args: string[]) => {
511
+ if (cmd === "gh") {
512
+ return { stdout: "[]", ok: true };
513
+ }
514
+ const { execSafe } = await import("../../utils/git/exec");
515
+ return execSafe(cmd, args);
516
+ }) as ExecSafeFn;
517
+ });
518
+
519
+ afterEach(() => {
520
+ process.chdir(originalCwd);
521
+ repo.cleanup();
522
+ });
523
+
524
+ it("--until plan stops after plan step", async () => {
525
+ mockClaudeImpl = () => {
526
+ claudeCallCount++;
527
+ mkdirSync(ctx.issueDir, { recursive: true });
528
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
529
+ return { stdout: successClaudeJson(), exitCode: 0 };
530
+ };
531
+
532
+ await runPipeline(ctx, "plan", { spawnFn: mockSpawnFn, exec: mockExec });
533
+
534
+ expect(claudeCallCount).toBe(1);
535
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.plan))).toBe(true);
536
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.completedSummary))).toBe(false);
537
+ });
538
+
539
+ it("--until simplify stops after simplify step", async () => {
540
+ mockClaudeImpl = () => {
541
+ claudeCallCount++;
542
+ mkdirSync(ctx.issueDir, { recursive: true });
543
+
544
+ switch (claudeCallCount) {
545
+ case 1:
546
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
547
+ break;
548
+ case 2:
549
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
550
+ break;
551
+ case 3:
552
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
553
+ break;
554
+ }
555
+ return { stdout: successClaudeJson(), exitCode: 0 };
556
+ };
557
+
558
+ await runPipeline(ctx, "simplify", { spawnFn: mockSpawnFn, exec: mockExec });
559
+
560
+ expect(claudeCallCount).toBe(3);
561
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.simplifySummary))).toBe(true);
562
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.review))).toBe(false);
563
+ });
564
+
565
+ it("--until review stops before PR creation", async () => {
566
+ mockClaudeImpl = () => {
567
+ claudeCallCount++;
568
+ mkdirSync(ctx.issueDir, { recursive: true });
569
+
570
+ switch (claudeCallCount) {
571
+ case 1:
572
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
573
+ break;
574
+ case 2:
575
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
576
+ break;
577
+ case 3:
578
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
579
+ break;
580
+ case 4:
581
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS\n\nLooks good.");
582
+ break;
583
+ }
584
+ return { stdout: successClaudeJson(), exitCode: 0 };
585
+ };
586
+
587
+ await runPipeline(ctx, "review", { spawnFn: mockSpawnFn, exec: mockExec });
588
+
589
+ expect(claudeCallCount).toBe(4);
590
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.review))).toBe(true);
591
+ // PR should not be created with --until review
592
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.prUrl))).toBe(false);
593
+ });
594
+
595
+ it("resumes from existing artifacts", async () => {
596
+ // Pre-create plan artifact
597
+ mkdirSync(ctx.issueDir, { recursive: true });
598
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Pre-existing plan");
599
+
600
+ mockClaudeImpl = () => {
601
+ claudeCallCount++;
602
+ switch (claudeCallCount) {
603
+ case 1:
604
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
605
+ break;
606
+ case 2:
607
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
608
+ break;
609
+ case 3:
610
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS");
611
+ break;
612
+ }
613
+ return { stdout: successClaudeJson(), exitCode: 0 };
614
+ };
615
+
616
+ await runPipeline(ctx, "review", { spawnFn: mockSpawnFn, exec: mockExec });
617
+
618
+ // Plan step should be skipped since artifact exists
619
+ expect(claudeCallCount).toBe(3);
620
+
621
+ // Original plan should be preserved
622
+ const planContent = readFileSync(join(ctx.issueDir, ARTIFACTS.plan), "utf-8");
623
+ expect(planContent).toBe("# Pre-existing plan");
624
+ });
625
+ });
626
+
627
+ describe("auto-claude e2e: git operations", () => {
628
+ let originalCwd: string;
629
+ let repo: TestRepo;
630
+ let ctx: IssueContext;
631
+ let mockClaudeImpl: MockClaudeImpl;
632
+ let mockSpawnFn: SpawnClaudeFn;
633
+ let mockExec: ExecSafeFn;
634
+
635
+ beforeEach(async () => {
636
+ originalCwd = process.cwd();
637
+ repo = createTestRepoWithRemote();
638
+ process.chdir(repo.dir);
639
+ await initConfig({
640
+ repo: "test/repo",
641
+ mainBranch: "main",
642
+ });
643
+ ctx = buildTestContext(repo.dir);
644
+ mockClaudeImpl = null;
645
+
646
+ mockSpawnFn = vi.fn((args: string[]) => {
647
+ if (mockClaudeImpl) {
648
+ const { stdout, exitCode } = mockClaudeImpl(args);
649
+ return createMockClaudeProcess(stdout, exitCode);
650
+ }
651
+ throw new Error("Unexpected spawnClaude call");
652
+ }) as SpawnClaudeFn;
653
+
654
+ mockExec = vi.fn(async (cmd: string, args: string[]) => {
655
+ if (cmd === "gh") {
656
+ if (args.includes("pr") && args.includes("list")) {
657
+ return { stdout: "[]", ok: true };
658
+ }
659
+ if (args.includes("pr") && args.includes("create")) {
660
+ return { stdout: "https://github.com/test/repo/pull/1", ok: true };
661
+ }
662
+ return { stdout: "", ok: true };
663
+ }
664
+ const { execSafe } = await import("../../utils/git/exec");
665
+ return execSafe(cmd, args);
666
+ }) as ExecSafeFn;
667
+ });
668
+
669
+ afterEach(() => {
670
+ process.chdir(originalCwd);
671
+ repo.cleanup();
672
+ });
673
+
674
+ it("checks out main branch after pipeline completion", async () => {
675
+ let claudeCallCount = 0;
676
+ mockClaudeImpl = () => {
677
+ claudeCallCount++;
678
+ mkdirSync(ctx.issueDir, { recursive: true });
679
+
680
+ switch (claudeCallCount) {
681
+ case 1:
682
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
683
+ break;
684
+ case 2:
685
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
686
+ break;
687
+ case 3:
688
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
689
+ break;
690
+ case 4:
691
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS");
692
+ break;
693
+ }
694
+ return { stdout: successClaudeJson(), exitCode: 0 };
695
+ };
696
+
697
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
698
+
699
+ const currentBranch = execSync("git branch --show-current", {
700
+ cwd: repo.dir,
701
+ encoding: "utf-8",
702
+ }).trim();
703
+ expect(currentBranch).toBe("main");
704
+ });
705
+
706
+ it("checks out main branch after failure", async () => {
707
+ mockClaudeImpl = () => ({ stdout: errorClaudeJson(), exitCode: 0 });
708
+
709
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
710
+
711
+ const currentBranch = execSync("git branch --show-current", {
712
+ cwd: repo.dir,
713
+ encoding: "utf-8",
714
+ }).trim();
715
+ expect(currentBranch).toBe("main");
716
+ });
717
+
718
+ it("creates branch with correct naming convention", async () => {
719
+ mockClaudeImpl = () => {
720
+ mkdirSync(ctx.issueDir, { recursive: true });
721
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
722
+ return { stdout: successClaudeJson(), exitCode: 0 };
723
+ };
724
+
725
+ await runPipeline(ctx, "plan", { spawnFn: mockSpawnFn, exec: mockExec });
726
+
727
+ const branches = execSync("git branch", { cwd: repo.dir, encoding: "utf-8" });
728
+ expect(branches).toContain("feature/1-test-issue");
729
+ });
730
+
731
+ it("pushes branch to remote", async () => {
732
+ let claudeCallCount = 0;
733
+ mockClaudeImpl = () => {
734
+ claudeCallCount++;
735
+ mkdirSync(ctx.issueDir, { recursive: true });
736
+
737
+ switch (claudeCallCount) {
738
+ case 1:
739
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
740
+ break;
741
+ case 2:
742
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
743
+ break;
744
+ case 3:
745
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
746
+ break;
747
+ case 4:
748
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "PASS");
749
+ break;
750
+ }
751
+ return { stdout: successClaudeJson(), exitCode: 0 };
752
+ };
753
+
754
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
755
+
756
+ // Verify branch exists on remote
757
+ const remoteBranches = execSync("git branch -r", { cwd: repo.dir, encoding: "utf-8" });
758
+ expect(remoteBranches).toContain(`origin/${ctx.branch}`);
759
+ });
760
+ });
761
+
762
+ describe("auto-claude e2e: edge cases", () => {
763
+ let originalCwd: string;
764
+ let repo: TestRepo;
765
+ let ctx: IssueContext;
766
+ let mockClaudeImpl: MockClaudeImpl;
767
+ let mockSpawnFn: SpawnClaudeFn;
768
+ let mockExec: ExecSafeFn;
769
+
770
+ beforeEach(async () => {
771
+ originalCwd = process.cwd();
772
+ repo = createTestRepoWithRemote();
773
+ process.chdir(repo.dir);
774
+ await initConfig({
775
+ repo: "test/repo",
776
+ mainBranch: "main",
777
+ maxImplementIterations: 2,
778
+ });
779
+ ctx = buildTestContext(repo.dir);
780
+ mockClaudeImpl = null;
781
+
782
+ mockSpawnFn = vi.fn((args: string[]) => {
783
+ if (mockClaudeImpl) {
784
+ const { stdout, exitCode } = mockClaudeImpl(args);
785
+ return createMockClaudeProcess(stdout, exitCode);
786
+ }
787
+ throw new Error("Unexpected spawnClaude call");
788
+ }) as SpawnClaudeFn;
789
+
790
+ mockExec = vi.fn(async (cmd: string, args: string[]) => {
791
+ if (cmd === "gh") {
792
+ return { stdout: "[]", ok: true };
793
+ }
794
+ const { execSafe } = await import("../../utils/git/exec");
795
+ return execSafe(cmd, args);
796
+ }) as ExecSafeFn;
797
+ });
798
+
799
+ afterEach(() => {
800
+ process.chdir(originalCwd);
801
+ repo.cleanup();
802
+ });
803
+
804
+ it("handles implement step exhausting max iterations", async () => {
805
+ let claudeCallCount = 0;
806
+ mockClaudeImpl = () => {
807
+ claudeCallCount++;
808
+ mkdirSync(ctx.issueDir, { recursive: true });
809
+
810
+ if (claudeCallCount === 1) {
811
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
812
+ return { stdout: successClaudeJson(), exitCode: 0 };
813
+ }
814
+
815
+ // Implement never produces artifact (max iterations exhausted)
816
+ return { stdout: successClaudeJson(), exitCode: 0 };
817
+ };
818
+
819
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
820
+
821
+ // 1 plan + 2 implement iterations = 3
822
+ expect(claudeCallCount).toBe(3);
823
+ expect(existsSync(join(ctx.issueDir, ARTIFACTS.completedSummary))).toBe(false);
824
+ });
825
+
826
+ it("handles empty issue body", async () => {
827
+ ctx = { ...ctx, body: "" };
828
+
829
+ mockClaudeImpl = () => ({ stdout: errorClaudeJson(), exitCode: 0 });
830
+
831
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
832
+
833
+ const ramblingsPath = join(ctx.issueDir, ARTIFACTS.initialRamblings);
834
+ const content = readFileSync(ramblingsPath, "utf-8");
835
+ expect(content).toContain("# Test Issue");
836
+ expect(content).toContain("test/repo#1");
837
+ });
838
+
839
+ it("handles issue body with special characters", async () => {
840
+ ctx = {
841
+ ...ctx,
842
+ body: 'Body with "quotes", <tags>, and $variables',
843
+ };
844
+
845
+ mockClaudeImpl = () => ({ stdout: errorClaudeJson(), exitCode: 0 });
846
+
847
+ await runPipeline(ctx, undefined, { spawnFn: mockSpawnFn, exec: mockExec });
848
+
849
+ const ramblingsPath = join(ctx.issueDir, ARTIFACTS.initialRamblings);
850
+ const content = readFileSync(ramblingsPath, "utf-8");
851
+ expect(content).toContain('"quotes"');
852
+ expect(content).toContain("<tags>");
853
+ expect(content).toContain("$variables");
854
+ });
855
+
856
+ it("handles review with lowercase pass", async () => {
857
+ let claudeCallCount = 0;
858
+ mockClaudeImpl = () => {
859
+ claudeCallCount++;
860
+ mkdirSync(ctx.issueDir, { recursive: true });
861
+
862
+ switch (claudeCallCount) {
863
+ case 1:
864
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.plan), "# Plan");
865
+ break;
866
+ case 2:
867
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.completedSummary), "# Done");
868
+ break;
869
+ case 3:
870
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.simplifySummary), "# Simplified");
871
+ break;
872
+ case 4:
873
+ // Lowercase "pass" should still be recognized
874
+ writeFileSync(join(ctx.issueDir, ARTIFACTS.review), "pass\n\nLooks good.");
875
+ break;
876
+ }
877
+ return { stdout: successClaudeJson(), exitCode: 0 };
878
+ };
879
+
880
+ await runPipeline(ctx, "review", { spawnFn: mockSpawnFn, exec: mockExec });
881
+
882
+ // Should complete successfully with lowercase pass
883
+ expect(claudeCallCount).toBe(4);
884
+ });
885
+ });