@claudetree/cli 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  import { Command } from 'commander';
2
2
  import { join } from 'node:path';
3
3
  import { randomUUID } from 'node:crypto';
4
- import { access, readFile } from 'node:fs/promises';
5
- import { execSync } from 'node:child_process';
6
- import { GitWorktreeAdapter, ClaudeSessionAdapter, FileSessionRepository, FileEventRepository, FileToolApprovalRepository, GitHubAdapter, TemplateLoader, DEFAULT_TEMPLATES, SlackNotifier, } from '@claudetree/core';
4
+ import { access, readFile, writeFile, mkdir } from 'node:fs/promises';
5
+ import { GitWorktreeAdapter, ClaudeSessionAdapter, FileSessionRepository, FileEventRepository, FileToolApprovalRepository, GitHubAdapter, TemplateLoader, DEFAULT_TEMPLATES, SlackNotifier, ValidationGateRunner, } from '@claudetree/core';
7
6
  const CONFIG_DIR = '.claudetree';
8
7
  async function loadConfig(cwd) {
9
8
  try {
@@ -16,18 +15,53 @@ async function loadConfig(cwd) {
16
15
  return null;
17
16
  }
18
17
  }
18
+ function parseGates(gatesStr, testCommand) {
19
+ const gateNames = gatesStr.split(',').map(g => g.trim().toLowerCase());
20
+ const gates = [];
21
+ for (const name of gateNames) {
22
+ switch (name) {
23
+ case 'test':
24
+ gates.push({ name: 'test', command: testCommand ?? 'pnpm test', required: true });
25
+ break;
26
+ case 'type':
27
+ gates.push({ name: 'type', command: 'pnpm tsc --noEmit', required: true });
28
+ break;
29
+ case 'lint':
30
+ gates.push({ name: 'lint', command: 'pnpm lint', required: false });
31
+ break;
32
+ case 'build':
33
+ gates.push({ name: 'build', command: 'pnpm build', required: false });
34
+ break;
35
+ }
36
+ }
37
+ return gates;
38
+ }
39
+ function formatDuration(ms) {
40
+ const seconds = Math.floor(ms / 1000);
41
+ const minutes = Math.floor(seconds / 60);
42
+ const hours = Math.floor(minutes / 60);
43
+ if (hours > 0)
44
+ return `${hours}h ${minutes % 60}m`;
45
+ if (minutes > 0)
46
+ return `${minutes}m ${seconds % 60}s`;
47
+ return `${seconds}s`;
48
+ }
19
49
  export const startCommand = new Command('start')
20
- .description('Create worktree from issue and start Claude session')
50
+ .description('Create worktree from issue and start Claude session (TDD mode by default)')
21
51
  .argument('<issue>', 'Issue number, GitHub URL, or task name')
22
52
  .option('-p, --prompt <prompt>', 'Initial prompt for Claude')
23
53
  .option('--no-session', 'Create worktree without starting Claude')
24
- .option('-s, --skill <skill>', 'Skill to activate (tdd, review)')
54
+ .option('--no-tdd', 'Disable TDD mode (just implement without test-first)')
55
+ .option('-s, --skill <skill>', 'Skill to activate (review)')
25
56
  .option('-T, --template <template>', 'Session template (bugfix, feature, refactor, review)')
26
57
  .option('-b, --branch <branch>', 'Custom branch name')
27
58
  .option('-t, --token <token>', 'GitHub token (or use GITHUB_TOKEN env)')
28
59
  .option('--max-cost <cost>', 'Maximum cost in USD (stops session if exceeded)', parseFloat)
29
- .option('--lint <command>', 'Lint command to run after Claude completes (e.g., "npm run lint")')
30
- .option('--gate', 'Fail session if lint fails', false)
60
+ .option('--timeout <minutes>', 'Total session timeout in minutes (default: 120)')
61
+ .option('--idle-timeout <minutes>', 'Idle timeout in minutes (default: 10)')
62
+ .option('--max-retries <n>', 'Max retries per validation gate (default: 3)')
63
+ .option('--gates <gates>', 'Validation gates: test,type,lint,build (default: test,type)')
64
+ .option('--test-command <cmd>', 'Custom test command (default: pnpm test)')
31
65
  .action(async (issue, options) => {
32
66
  const cwd = process.cwd();
33
67
  const config = await loadConfig(cwd);
@@ -35,15 +69,41 @@ export const startCommand = new Command('start')
35
69
  console.error('Error: claudetree not initialized. Run "claudetree init" first.');
36
70
  process.exit(1);
37
71
  }
72
+ // Build TDD config if TDD mode enabled
73
+ const tddEnabled = options.tdd !== false;
74
+ let tddConfig = null;
75
+ if (tddEnabled) {
76
+ tddConfig = {
77
+ timeout: parseInt(options.timeout ?? '120', 10) * 60 * 1000,
78
+ idleTimeout: parseInt(options.idleTimeout ?? '10', 10) * 60 * 1000,
79
+ maxIterations: 10,
80
+ maxRetries: parseInt(options.maxRetries ?? '3', 10),
81
+ gates: parseGates(options.gates ?? 'test,type', options.testCommand),
82
+ };
83
+ }
84
+ // Header
85
+ if (tddEnabled) {
86
+ console.log('\n\x1b[36m╔══════════════════════════════════════════╗\x1b[0m');
87
+ console.log('\x1b[36m║ TDD Mode Session (Default) ║\x1b[0m');
88
+ console.log('\x1b[36m╚══════════════════════════════════════════╝\x1b[0m');
89
+ console.log('\n\x1b[90mUse --no-tdd to disable TDD mode\x1b[0m\n');
90
+ console.log('\x1b[33m⏱️ Time Limits:\x1b[0m');
91
+ console.log(` Session: ${formatDuration(tddConfig.timeout)}`);
92
+ console.log(` Idle: ${formatDuration(tddConfig.idleTimeout)}`);
93
+ console.log(` Max retries: ${tddConfig.maxRetries}`);
94
+ console.log('\n\x1b[33m✅ Validation Gates:\x1b[0m');
95
+ for (const gate of tddConfig.gates) {
96
+ const status = gate.required ? '\x1b[31m(required)\x1b[0m' : '\x1b[90m(optional)\x1b[0m';
97
+ console.log(` • ${gate.name}: ${gate.command} ${status}`);
98
+ }
99
+ }
38
100
  let issueNumber = null;
39
101
  let issueData = null;
40
102
  let branchName;
41
- // Check if it's a GitHub URL
42
103
  const ghToken = options.token ?? process.env.GITHUB_TOKEN ?? config.github?.token;
43
104
  if (issue.includes('github.com')) {
44
- // Parse GitHub URL
45
105
  if (!ghToken) {
46
- console.error('Error: GitHub token required for URL. Set GITHUB_TOKEN or use --token.');
106
+ console.error('\nError: GitHub token required for URL. Set GITHUB_TOKEN or use --token.');
47
107
  process.exit(1);
48
108
  }
49
109
  const ghAdapter = new GitHubAdapter(ghToken);
@@ -52,7 +112,7 @@ export const startCommand = new Command('start')
52
112
  console.error('Error: Invalid GitHub URL format.');
53
113
  process.exit(1);
54
114
  }
55
- console.log(`Fetching issue #${parsed.number} from ${parsed.owner}/${parsed.repo}...`);
115
+ console.log(`\nFetching issue #${parsed.number} from ${parsed.owner}/${parsed.repo}...`);
56
116
  try {
57
117
  issueData = await ghAdapter.getIssue(parsed.owner, parsed.repo, parsed.number);
58
118
  issueNumber = issueData.number;
@@ -66,21 +126,18 @@ export const startCommand = new Command('start')
66
126
  }
67
127
  }
68
128
  else {
69
- // Parse as issue number or task name
70
129
  const parsed = parseInt(issue, 10);
71
130
  const isNumber = !isNaN(parsed);
72
131
  if (isNumber && ghToken && config.github?.owner && config.github?.repo) {
73
- // Try to fetch issue from configured repo
74
132
  const ghAdapter = new GitHubAdapter(ghToken);
75
133
  try {
76
- console.log(`Fetching issue #${parsed}...`);
134
+ console.log(`\nFetching issue #${parsed}...`);
77
135
  issueData = await ghAdapter.getIssue(config.github.owner, config.github.repo, parsed);
78
136
  issueNumber = issueData.number;
79
137
  branchName = options.branch ?? ghAdapter.generateBranchName(issueNumber, issueData.title);
80
138
  console.log(` Title: ${issueData.title}`);
81
139
  }
82
140
  catch {
83
- // Fall back to simple issue number
84
141
  issueNumber = parsed;
85
142
  branchName = options.branch ?? `issue-${issueNumber}`;
86
143
  }
@@ -94,7 +151,6 @@ export const startCommand = new Command('start')
94
151
  }
95
152
  }
96
153
  const worktreePath = join(cwd, config.worktreeDir, branchName);
97
- // Check if worktree already exists
98
154
  const gitAdapter = new GitWorktreeAdapter(cwd);
99
155
  const existingWorktrees = await gitAdapter.list();
100
156
  const existingWorktree = existingWorktrees.find((wt) => wt.branch === branchName || wt.path.endsWith(branchName));
@@ -132,7 +188,6 @@ export const startCommand = new Command('start')
132
188
  console.log('\nWorktree created. Use "cd" to navigate and start working.');
133
189
  return;
134
190
  }
135
- // Check Claude availability
136
191
  const claudeAdapter = new ClaudeSessionAdapter();
137
192
  const available = await claudeAdapter.isClaudeAvailable();
138
193
  if (!available) {
@@ -140,7 +195,6 @@ export const startCommand = new Command('start')
140
195
  console.log('Worktree created but Claude session not started.');
141
196
  return;
142
197
  }
143
- // Create session record
144
198
  const sessionRepo = new FileSessionRepository(join(cwd, CONFIG_DIR));
145
199
  const session = {
146
200
  id: randomUUID(),
@@ -151,15 +205,12 @@ export const startCommand = new Command('start')
151
205
  prompt: options.prompt ?? null,
152
206
  createdAt: new Date(),
153
207
  updatedAt: new Date(),
154
- // Recovery fields
155
208
  processId: null,
156
209
  osProcessId: null,
157
210
  lastHeartbeat: null,
158
211
  errorCount: 0,
159
212
  worktreePath: worktree.path,
160
- // Token usage
161
213
  usage: null,
162
- // Progress tracking
163
214
  progress: {
164
215
  currentStep: 'analyzing',
165
216
  completedSteps: [],
@@ -172,7 +223,6 @@ export const startCommand = new Command('start')
172
223
  if (options.template) {
173
224
  const templateLoader = new TemplateLoader(join(cwd, CONFIG_DIR));
174
225
  template = await templateLoader.load(options.template);
175
- // Fall back to default templates
176
226
  if (!template && options.template in DEFAULT_TEMPLATES) {
177
227
  template = DEFAULT_TEMPLATES[options.template] ?? null;
178
228
  }
@@ -195,69 +245,118 @@ Issue Description:
195
245
  ${issueData.body || 'No description provided.'}
196
246
 
197
247
  IMPORTANT: Do NOT just analyze or suggest. Actually IMPLEMENT the solution.
198
-
199
- Workflow:
200
- 1. Read the relevant code files
201
- 2. Write the code to solve this issue
202
- 3. Run tests to verify your implementation
203
- 4. When done, commit your changes with a clear message
204
- 5. Create a PR to the develop branch
205
-
206
- Start implementing now.`;
248
+ ${tddEnabled ? '\nStart with TDD - write a failing test first!' : ''}`;
207
249
  }
208
250
  else if (issueNumber) {
209
- prompt = `Working on issue #${issueNumber}. Implement the solution - do not just analyze.`;
251
+ prompt = `Working on issue #${issueNumber}. ${tddEnabled ? 'Start with TDD - write a failing test first!' : 'Implement the solution.'}`;
210
252
  }
211
253
  else {
212
- prompt = `Working on ${branchName}. Implement any required changes.`;
254
+ prompt = `Working on ${branchName}. ${tddEnabled ? 'Start with TDD - write a failing test first!' : 'Implement any required changes.'}`;
213
255
  }
214
- // Apply template to prompt
215
256
  if (template) {
216
257
  const prefix = template.promptPrefix ? `${template.promptPrefix}\n\n` : '';
217
258
  const suffix = template.promptSuffix ? `\n\n${template.promptSuffix}` : '';
218
259
  prompt = `${prefix}${prompt}${suffix}`;
219
260
  }
220
- // Add skill if specified (template skill takes precedence)
261
+ // Build system prompt
221
262
  let systemPrompt;
222
263
  const effectiveSkill = template?.skill || options.skill;
223
- if (effectiveSkill === 'tdd') {
224
- systemPrompt = `You MUST follow strict TDD (Test-Driven Development):
264
+ if (tddEnabled) {
265
+ // TDD system prompt (default)
266
+ systemPrompt = `You are in TDD (Test-Driven Development) mode. Follow this STRICT workflow:
267
+
268
+ ## TDD Cycle (Repeat until done)
225
269
 
226
- 1. RED: Write a failing test FIRST - never write implementation before tests
227
- 2. GREEN: Write MINIMUM code to pass the test - no extra features
228
- 3. REFACTOR: Clean up while keeping tests green
270
+ ### 1. RED Phase - Write Failing Test
271
+ - Write ONE failing test that describes the expected behavior
272
+ - Run the test to confirm it fails
273
+ - Commit: "test: add test for <feature>"
229
274
 
230
- Rules:
231
- - One test at a time
232
- - Commit after each phase: "test: ...", "feat: ...", "refactor: ..."
233
- - Run tests after every change
234
- - Create PR only when all tests pass`;
275
+ ### 2. GREEN Phase - Minimal Implementation
276
+ - Write the MINIMUM code to make the test pass
277
+ - Run tests to confirm they pass
278
+ - Commit: "feat: implement <feature>"
279
+
280
+ ### 3. REFACTOR Phase (Optional)
281
+ - Clean up code while keeping tests green
282
+ - Commit: "refactor: improve <description>"
283
+
284
+ ## Rules
285
+ - NEVER write implementation before tests
286
+ - ONE test at a time
287
+ - Run tests after EVERY change
288
+ - Stop when all requirements are met
289
+
290
+ ## Validation Gates (Must Pass Before PR)
291
+ ${tddConfig.gates.map(g => `- ${g.name}: \`${g.command}\` ${g.required ? '(REQUIRED)' : '(optional)'}`).join('\n')}
292
+
293
+ ## Time Limits
294
+ - Total: ${formatDuration(tddConfig.timeout)}
295
+ - Idle: ${formatDuration(tddConfig.idleTimeout)}
296
+
297
+ When done, create a PR to the develop branch.`;
235
298
  }
236
299
  else if (effectiveSkill === 'review') {
237
300
  systemPrompt = 'Review code thoroughly for security, quality, and best practices.';
238
301
  }
239
- // Template system prompt overrides
240
302
  if (template?.systemPrompt) {
241
303
  systemPrompt = template.systemPrompt;
242
304
  }
243
- console.log('\nStarting Claude session...');
244
- if (effectiveSkill) {
245
- console.log(` Skill: ${effectiveSkill}`);
305
+ console.log('\n\x1b[36m🚀 Starting Claude session...\x1b[0m');
306
+ if (tddEnabled) {
307
+ console.log(' Mode: \x1b[32mTDD\x1b[0m (Test-Driven Development)');
246
308
  }
247
309
  if (options.maxCost) {
248
- console.log(` \x1b[33mBudget limit: $${options.maxCost.toFixed(2)}\x1b[0m`);
310
+ console.log(` Budget: \x1b[33m$${options.maxCost.toFixed(2)}\x1b[0m`);
249
311
  }
250
- // Initialize event repositories
251
312
  const eventRepo = new FileEventRepository(join(cwd, CONFIG_DIR));
252
313
  const approvalRepo = new FileToolApprovalRepository(join(cwd, CONFIG_DIR));
253
- // Setup event listener for recording
314
+ // Save TDD state if enabled
315
+ let tddState = null;
316
+ let tddStatePath = null;
317
+ if (tddEnabled) {
318
+ tddState = {
319
+ phase: 'writing_test',
320
+ currentIteration: 1,
321
+ gateResults: [],
322
+ failureCount: 0,
323
+ lastActivity: new Date(),
324
+ config: tddConfig,
325
+ };
326
+ tddStatePath = join(cwd, CONFIG_DIR, 'tdd-state', `${session.id}.json`);
327
+ await mkdir(join(cwd, CONFIG_DIR, 'tdd-state'), { recursive: true });
328
+ await writeFile(tddStatePath, JSON.stringify(tddState, null, 2));
329
+ }
330
+ // Track timeouts
331
+ const sessionStartTime = Date.now();
332
+ let lastOutputTime = Date.now();
333
+ let sessionTimedOut = false;
334
+ let idleTimedOut = false;
335
+ let timeoutChecker = null;
336
+ if (tddEnabled && tddConfig) {
337
+ timeoutChecker = setInterval(() => {
338
+ const elapsed = Date.now() - sessionStartTime;
339
+ const idleTime = Date.now() - lastOutputTime;
340
+ if (elapsed >= tddConfig.timeout) {
341
+ sessionTimedOut = true;
342
+ console.log(`\n\x1b[31m[Timeout]\x1b[0m Session timeout (${formatDuration(tddConfig.timeout)}) exceeded.`);
343
+ if (timeoutChecker)
344
+ clearInterval(timeoutChecker);
345
+ }
346
+ else if (idleTime >= tddConfig.idleTimeout) {
347
+ idleTimedOut = true;
348
+ console.log(`\n\x1b[31m[Timeout]\x1b[0m Idle timeout (${formatDuration(tddConfig.idleTimeout)}) exceeded.`);
349
+ if (timeoutChecker)
350
+ clearInterval(timeoutChecker);
351
+ }
352
+ }, 5000);
353
+ }
254
354
  claudeAdapter.on('output', async (event) => {
255
355
  const { output } = event;
256
- // Map Claude output type to event type
356
+ lastOutputTime = Date.now();
257
357
  let eventType = 'output';
258
358
  if (output.type === 'tool_use') {
259
359
  eventType = 'tool_call';
260
- // Record tool approval request and update progress
261
360
  try {
262
361
  const parsed = parseToolCall(output.content);
263
362
  if (parsed) {
@@ -266,12 +365,11 @@ Rules:
266
365
  sessionId: session.id,
267
366
  toolName: parsed.toolName,
268
367
  parameters: parsed.parameters,
269
- status: 'approved', // Auto-approved for now
368
+ status: 'approved',
270
369
  approvedBy: 'auto',
271
370
  requestedAt: output.timestamp,
272
371
  resolvedAt: output.timestamp,
273
372
  });
274
- // Update progress based on tool usage
275
373
  if (session.progress) {
276
374
  const detectedStep = detectProgressStep(parsed.toolName, parsed.parameters);
277
375
  if (detectedStep) {
@@ -282,13 +380,12 @@ Rules:
282
380
  }
283
381
  }
284
382
  catch {
285
- // Ignore parse errors
383
+ // Ignore
286
384
  }
287
385
  }
288
386
  else if (output.type === 'error') {
289
387
  eventType = 'error';
290
388
  }
291
- // Record event
292
389
  try {
293
390
  await eventRepo.append({
294
391
  id: randomUUID(),
@@ -299,33 +396,34 @@ Rules:
299
396
  });
300
397
  }
301
398
  catch {
302
- // Ignore file write errors
399
+ // Ignore
303
400
  }
304
401
  });
305
- // Start Claude session
306
402
  const result = await claudeAdapter.start({
307
403
  workingDir: worktree.path,
308
404
  prompt,
309
405
  systemPrompt,
310
406
  allowedTools: ['Read', 'Write', 'Edit', 'Bash', 'Glob', 'Grep'],
311
407
  });
312
- // Update session with process info
313
408
  session.processId = result.processId;
314
409
  session.osProcessId = result.osProcessId;
315
410
  session.lastHeartbeat = new Date();
316
411
  session.status = 'running';
317
412
  session.updatedAt = new Date();
318
413
  await sessionRepo.save(session);
319
- // Setup graceful shutdown
320
414
  const handleShutdown = async () => {
321
415
  console.log('\n[Info] Pausing session...');
416
+ if (timeoutChecker)
417
+ clearInterval(timeoutChecker);
322
418
  session.status = 'paused';
323
419
  session.updatedAt = new Date();
324
420
  await sessionRepo.save(session);
325
- console.log(`Session paused: ${session.id.slice(0, 8)}`);
326
- if (session.claudeSessionId) {
327
- console.log(`Resume with: claudetree resume ${session.id.slice(0, 8)}`);
421
+ if (tddState && tddStatePath) {
422
+ tddState.phase = 'failed';
423
+ await writeFile(tddStatePath, JSON.stringify(tddState, null, 2));
328
424
  }
425
+ console.log(`Session paused: ${session.id.slice(0, 8)}`);
426
+ console.log(`Resume with: claudetree resume ${session.id.slice(0, 8)}`);
329
427
  process.exit(0);
330
428
  };
331
429
  process.on('SIGINT', handleShutdown);
@@ -333,20 +431,26 @@ Rules:
333
431
  console.log(`\nSession started: ${session.id.slice(0, 8)}`);
334
432
  console.log(`Working directory: ${worktree.path}`);
335
433
  console.log('Claude is now working on the issue...\n');
336
- // Wait for Claude to complete and show output
337
434
  let outputCount = 0;
338
435
  let currentCost = 0;
339
436
  let budgetExceeded = false;
340
437
  for await (const output of claudeAdapter.getOutput(result.processId)) {
341
438
  outputCount++;
342
439
  session.lastHeartbeat = new Date();
343
- // Track cumulative cost from system events
440
+ lastOutputTime = Date.now();
441
+ // Check timeouts
442
+ if (sessionTimedOut || idleTimedOut) {
443
+ await claudeAdapter.stop(result.processId);
444
+ session.status = 'failed';
445
+ if (tddState)
446
+ tddState.phase = 'failed';
447
+ break;
448
+ }
344
449
  if (output.cumulativeCost !== undefined) {
345
450
  currentCost = output.cumulativeCost;
346
- // Budget check
347
451
  if (options.maxCost && currentCost >= options.maxCost && !budgetExceeded) {
348
452
  budgetExceeded = true;
349
- console.log(`\x1b[31m[Budget]\x1b[0m Cost $${currentCost.toFixed(4)} exceeded limit $${options.maxCost.toFixed(4)}. Stopping session...`);
453
+ console.log(`\x1b[31m[Budget]\x1b[0m Cost $${currentCost.toFixed(4)} exceeded limit $${options.maxCost.toFixed(4)}. Stopping...`);
350
454
  await claudeAdapter.stop(result.processId);
351
455
  session.status = 'failed';
352
456
  session.updatedAt = new Date();
@@ -365,66 +469,102 @@ Rules:
365
469
  }
366
470
  else if (output.type === 'done') {
367
471
  console.log(`\x1b[32m[Done]\x1b[0m Session ID: ${output.content}`);
368
- // Capture Claude session ID for resume
369
472
  if (output.content) {
370
473
  session.claudeSessionId = output.content;
371
474
  }
372
- // Capture token usage
373
475
  if (output.usage) {
374
476
  session.usage = output.usage;
375
477
  console.log(`\x1b[32m[Usage]\x1b[0m Tokens: ${output.usage.inputTokens} in / ${output.usage.outputTokens} out | Cost: $${output.usage.totalCostUsd.toFixed(4)}`);
376
478
  }
377
479
  }
378
- // Update heartbeat periodically
379
480
  if (outputCount % 10 === 0) {
380
481
  session.updatedAt = new Date();
381
482
  await sessionRepo.save(session);
382
483
  }
383
484
  }
384
- // Skip to end if budget was exceeded
385
- if (budgetExceeded) {
386
- console.log('\nSession stopped due to budget limit.');
387
- }
388
- else {
389
- // Session completed
390
- session.status = 'completed';
391
- session.updatedAt = new Date();
392
- await sessionRepo.save(session);
393
- console.log('\nSession completed.');
394
- // Run lint gate
395
- if (options.lint) {
396
- console.log('\n\x1b[36m[Gate]\x1b[0m Running lint check...\n');
397
- console.log(` \x1b[33mLint:\x1b[0m ${options.lint}`);
398
- try {
399
- execSync(options.lint, { cwd: worktree.path, stdio: 'inherit' });
400
- console.log(' \x1b[32m✓ Lint passed\x1b[0m\n');
401
- }
402
- catch {
403
- console.log(' \x1b[31m✗ Lint failed\x1b[0m\n');
404
- if (options.gate) {
405
- console.log('\x1b[31m[Gate]\x1b[0m Session failed lint check.');
406
- session.status = 'failed';
407
- session.updatedAt = new Date();
408
- await sessionRepo.save(session);
409
- }
485
+ if (timeoutChecker)
486
+ clearInterval(timeoutChecker);
487
+ // Run validation gates if TDD mode and session didn't fail
488
+ if (tddEnabled && tddConfig && session.status !== 'failed' && !budgetExceeded) {
489
+ console.log('\n\x1b[36m╔══════════════════════════════════════════╗\x1b[0m');
490
+ console.log('\x1b[36m║ Running Validation Gates ║\x1b[0m');
491
+ console.log('\x1b[36m╚══════════════════════════════════════════╝\x1b[0m\n');
492
+ if (tddState) {
493
+ tddState.phase = 'validating';
494
+ if (tddStatePath)
495
+ await writeFile(tddStatePath, JSON.stringify(tddState, null, 2));
496
+ }
497
+ const gateRunner = new ValidationGateRunner();
498
+ const gateResults = await gateRunner.runWithAutoRetry(tddConfig.gates, {
499
+ cwd: worktree.path,
500
+ maxRetries: tddConfig.maxRetries,
501
+ onRetry: (attempt, failedGate) => {
502
+ console.log(`\x1b[33m[Retry]\x1b[0m Gate '${failedGate}' failed, attempt ${attempt + 1}/${tddConfig.maxRetries}`);
503
+ },
504
+ });
505
+ console.log('\n\x1b[33m📊 Gate Results:\x1b[0m');
506
+ for (const res of gateResults.results) {
507
+ const icon = res.passed ? '\x1b[32m✓\x1b[0m' : '\x1b[31m✗\x1b[0m';
508
+ const attempts = res.attempts > 1 ? ` (${res.attempts} attempts)` : '';
509
+ console.log(` ${icon} ${res.gateName}${attempts}`);
510
+ }
511
+ console.log(`\n Total time: ${formatDuration(gateResults.totalTime)}`);
512
+ if (tddState) {
513
+ tddState.gateResults = gateResults.results;
514
+ }
515
+ if (gateResults.allPassed) {
516
+ console.log('\n\x1b[32m✅ All validation gates passed!\x1b[0m');
517
+ session.status = 'completed';
518
+ if (tddState)
519
+ tddState.phase = 'completed';
520
+ }
521
+ else {
522
+ console.log('\n\x1b[31m❌ Validation gates failed.\x1b[0m');
523
+ session.status = 'failed';
524
+ if (tddState)
525
+ tddState.phase = 'failed';
526
+ const failedGate = gateResults.results.find(r => !r.passed);
527
+ if (failedGate?.output) {
528
+ console.log(`\n\x1b[33mFailed gate output (${failedGate.gateName}):\x1b[0m`);
529
+ console.log(failedGate.output);
410
530
  }
411
531
  }
412
532
  }
413
- // Send Slack notification
533
+ else if (!tddEnabled && session.status !== 'failed' && !budgetExceeded) {
534
+ session.status = 'completed';
535
+ }
536
+ // Final summary
537
+ const totalDuration = Date.now() - sessionStartTime;
538
+ console.log('\n\x1b[36m╔══════════════════════════════════════════╗\x1b[0m');
539
+ console.log('\x1b[36m║ Session Summary ║\x1b[0m');
540
+ console.log('\x1b[36m╚══════════════════════════════════════════╝\x1b[0m\n');
541
+ console.log(` Status: ${session.status === 'completed' ? '\x1b[32mcompleted\x1b[0m' : '\x1b[31mfailed\x1b[0m'}`);
542
+ console.log(` Mode: ${tddEnabled ? 'TDD' : 'Standard'}`);
543
+ console.log(` Duration: ${formatDuration(totalDuration)}`);
544
+ if (session.usage) {
545
+ console.log(` Cost: $${session.usage.totalCostUsd.toFixed(4)}`);
546
+ }
547
+ session.updatedAt = new Date();
548
+ await sessionRepo.save(session);
549
+ if (tddState && tddStatePath) {
550
+ await writeFile(tddStatePath, JSON.stringify(tddState, null, 2));
551
+ }
414
552
  if (config.slack?.webhookUrl) {
415
553
  const slack = new SlackNotifier(config.slack.webhookUrl);
416
554
  await slack.notifySession({
417
555
  sessionId: session.id,
418
- status: 'completed',
556
+ status: session.status === 'completed' ? 'completed' : 'failed',
419
557
  issueNumber,
420
558
  branch: branchName,
421
559
  worktreePath: worktree.path,
422
- duration: Date.now() - session.createdAt.getTime(),
560
+ duration: totalDuration,
423
561
  });
424
562
  }
563
+ if (session.status === 'failed') {
564
+ process.exit(1);
565
+ }
425
566
  }
426
567
  catch (error) {
427
- // Send failure notification
428
568
  if (config.slack?.webhookUrl) {
429
569
  const slack = new SlackNotifier(config.slack.webhookUrl);
430
570
  await slack.notifySession({
@@ -442,7 +582,6 @@ Rules:
442
582
  }
443
583
  });
444
584
  function parseToolCall(content) {
445
- // Format: "ToolName: {json}"
446
585
  const match = content.match(/^(\w+):\s*(.+)$/);
447
586
  if (!match)
448
587
  return null;
@@ -458,7 +597,6 @@ function parseToolCall(content) {
458
597
  }
459
598
  function detectProgressStep(toolName, params) {
460
599
  const command = String(params.command ?? '');
461
- // Detect test running
462
600
  if (toolName === 'Bash') {
463
601
  if (command.includes('test') || command.includes('jest') || command.includes('vitest') || command.includes('pytest')) {
464
602
  return 'testing';
@@ -470,11 +608,9 @@ function detectProgressStep(toolName, params) {
470
608
  return 'creating_pr';
471
609
  }
472
610
  }
473
- // Detect code writing
474
611
  if (toolName === 'Edit' || toolName === 'Write') {
475
612
  return 'implementing';
476
613
  }
477
- // Detect code reading/analysis
478
614
  if (toolName === 'Read' || toolName === 'Glob' || toolName === 'Grep') {
479
615
  return 'analyzing';
480
616
  }
@@ -484,9 +620,7 @@ function updateProgress(progress, newStep) {
484
620
  const stepOrder = ['analyzing', 'implementing', 'testing', 'committing', 'creating_pr'];
485
621
  const currentIdx = stepOrder.indexOf(progress.currentStep);
486
622
  const newIdx = stepOrder.indexOf(newStep);
487
- // Only advance forward, don't go backwards
488
623
  if (newIdx > currentIdx) {
489
- // Mark all steps between current and new as completed
490
624
  const completed = new Set(progress.completedSteps);
491
625
  for (let i = 0; i <= currentIdx; i++) {
492
626
  completed.add(stepOrder[i]);