@bradtaylorsf/alpha-loop 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +60 -19
  2. package/dist/cli.js +83 -1
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/auth.js +1 -1
  5. package/dist/commands/auth.js.map +1 -1
  6. package/dist/commands/eval.d.ts +53 -0
  7. package/dist/commands/eval.js +538 -0
  8. package/dist/commands/eval.js.map +1 -0
  9. package/dist/commands/evolve.d.ts +25 -0
  10. package/dist/commands/evolve.js +270 -0
  11. package/dist/commands/evolve.js.map +1 -0
  12. package/dist/commands/history.d.ts +1 -1
  13. package/dist/commands/history.js +4 -4
  14. package/dist/commands/history.js.map +1 -1
  15. package/dist/commands/init.d.ts +14 -0
  16. package/dist/commands/init.js +199 -30
  17. package/dist/commands/init.js.map +1 -1
  18. package/dist/commands/resume.js +1 -0
  19. package/dist/commands/resume.js.map +1 -1
  20. package/dist/commands/run.js +170 -12
  21. package/dist/commands/run.js.map +1 -1
  22. package/dist/commands/scan.d.ts +1 -1
  23. package/dist/commands/scan.js +12 -9
  24. package/dist/commands/scan.js.map +1 -1
  25. package/dist/commands/sync.d.ts +5 -0
  26. package/dist/commands/sync.js +24 -5
  27. package/dist/commands/sync.js.map +1 -1
  28. package/dist/commands/vision.js +5 -3
  29. package/dist/commands/vision.js.map +1 -1
  30. package/dist/engine/agents.d.ts +6 -1
  31. package/dist/engine/agents.js +14 -12
  32. package/dist/engine/agents.js.map +1 -1
  33. package/dist/engine/prerequisites.d.ts +4 -7
  34. package/dist/engine/prerequisites.js +12 -36
  35. package/dist/engine/prerequisites.js.map +1 -1
  36. package/dist/lib/agent.d.ts +18 -0
  37. package/dist/lib/agent.js +211 -30
  38. package/dist/lib/agent.js.map +1 -1
  39. package/dist/lib/config.d.ts +25 -2
  40. package/dist/lib/config.js +80 -7
  41. package/dist/lib/config.js.map +1 -1
  42. package/dist/lib/eval-checks.d.ts +91 -0
  43. package/dist/lib/eval-checks.js +254 -0
  44. package/dist/lib/eval-checks.js.map +1 -0
  45. package/dist/lib/eval-runner.d.ts +29 -0
  46. package/dist/lib/eval-runner.js +439 -0
  47. package/dist/lib/eval-runner.js.map +1 -0
  48. package/dist/lib/eval.d.ts +170 -0
  49. package/dist/lib/eval.js +507 -0
  50. package/dist/lib/eval.js.map +1 -0
  51. package/dist/lib/learning.js +2 -2
  52. package/dist/lib/learning.js.map +1 -1
  53. package/dist/lib/pipeline.d.ts +44 -0
  54. package/dist/lib/pipeline.js +607 -138
  55. package/dist/lib/pipeline.js.map +1 -1
  56. package/dist/lib/prompts.d.ts +19 -0
  57. package/dist/lib/prompts.js +56 -5
  58. package/dist/lib/prompts.js.map +1 -1
  59. package/dist/lib/score.d.ts +80 -0
  60. package/dist/lib/score.js +172 -0
  61. package/dist/lib/score.js.map +1 -0
  62. package/dist/lib/session.d.ts +2 -1
  63. package/dist/lib/session.js +70 -19
  64. package/dist/lib/session.js.map +1 -1
  65. package/dist/lib/traces.d.ts +173 -0
  66. package/dist/lib/traces.js +272 -0
  67. package/dist/lib/traces.js.map +1 -0
  68. package/dist/lib/verify.d.ts +7 -1
  69. package/dist/lib/verify.js +109 -157
  70. package/dist/lib/verify.js.map +1 -1
  71. package/dist/lib/worktree.d.ts +1 -0
  72. package/dist/lib/worktree.js +9 -1
  73. package/dist/lib/worktree.js.map +1 -1
  74. package/package.json +1 -1
  75. package/templates/agents/implementer.md +1 -1
  76. package/templates/agents/reviewer.md +1 -1
  77. package/dist/engine/config.d.ts +0 -71
  78. package/dist/engine/config.js +0 -73
  79. package/dist/engine/config.js.map +0 -1
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Process Issue Pipeline — the 12-step orchestration for a single issue.
3
3
  */
4
- import { mkdirSync, readFileSync, existsSync } from 'node:fs';
4
+ import { mkdirSync, readFileSync, writeFileSync, unlinkSync, existsSync } from 'node:fs';
5
5
  import { join } from 'node:path';
6
6
  import { log } from './logger.js';
7
7
  import { exec } from './shell.js';
@@ -13,8 +13,188 @@ import { runTests } from './testing.js';
13
13
  import { runVerify } from './verify.js';
14
14
  import { extractLearnings, getLearningContext } from './learning.js';
15
15
  import { saveResult, getPreviousResult } from './session.js';
16
+ import { writeTrace, writeTraceMetadata, writeTraceToSubdir, writeConfigSnapshot, writeScores, writeCosts, computeScores, computeCosts, } from './traces.js';
17
+ import { estimateCost } from './config.js';
16
18
  /** Max diff size to include in learning analysis. */
17
19
  const MAX_DIFF_CHARS = 10_000;
20
+ /**
21
+ * Build a StepCost entry from an AgentResult.
22
+ * Uses parsed cost/tokens if available, otherwise estimates from output length.
23
+ */
24
+ function buildStepCost(step, issueNum, agentResult, config) {
25
+ const model = agentResult.model || config.model;
26
+ if (agentResult.costUsd != null && agentResult.inputTokens != null && agentResult.outputTokens != null) {
27
+ return {
28
+ step,
29
+ issueNum,
30
+ model,
31
+ input_tokens: agentResult.inputTokens,
32
+ output_tokens: agentResult.outputTokens,
33
+ cost_usd: agentResult.costUsd,
34
+ };
35
+ }
36
+ // Fallback: estimate tokens from output length (chars / 4 ≈ tokens)
37
+ const estimatedOutputTokens = Math.round(agentResult.output.length / 4);
38
+ const estimatedInputTokens = Math.round(estimatedOutputTokens * 1.3);
39
+ const costUsd = estimateCost(model, estimatedInputTokens, estimatedOutputTokens, config.pricing);
40
+ return {
41
+ step,
42
+ issueNum,
43
+ model,
44
+ input_tokens: estimatedInputTokens,
45
+ output_tokens: estimatedOutputTokens,
46
+ cost_usd: costUsd,
47
+ };
48
+ }
49
+ /** Record a prompt trace to the prompts/ subdirectory. */
50
+ function tracePrompt(session, issueNum, step, prompt) {
51
+ try {
52
+ writeTraceToSubdir(session, 'prompts', `issue-${issueNum}-${step}.md`, prompt);
53
+ }
54
+ catch { /* non-fatal */ }
55
+ }
56
+ /** Record an agent output trace to the outputs/ subdirectory. */
57
+ function traceOutput(session, issueNum, step, output) {
58
+ try {
59
+ writeTraceToSubdir(session, 'outputs', `issue-${issueNum}-${step}.log`, output);
60
+ }
61
+ catch { /* non-fatal */ }
62
+ }
63
+ /** Record a diff trace to the diffs/ subdirectory. */
64
+ function traceDiff(session, issueNum, step, diff) {
65
+ try {
66
+ writeTraceToSubdir(session, 'diffs', `issue-${issueNum}-${step}.patch`, diff);
67
+ }
68
+ catch { /* non-fatal */ }
69
+ }
70
+ /** Record a test output trace to the tests/ subdirectory. */
71
+ function traceTest(session, issueNum, attempt, output) {
72
+ try {
73
+ writeTraceToSubdir(session, 'tests', `issue-${issueNum}-test-${attempt}.txt`, output);
74
+ }
75
+ catch { /* non-fatal */ }
76
+ }
77
+ /** Record a verify output trace to the verify/ subdirectory. */
78
+ function traceVerify(session, issueNum, attempt, output) {
79
+ try {
80
+ writeTraceToSubdir(session, 'verify', `issue-${issueNum}-verify-${attempt}.txt`, output);
81
+ }
82
+ catch { /* non-fatal */ }
83
+ }
84
+ /** Patterns that indicate a transient agent error (re-queue, don't mark as failed). */
85
+ const TRANSIENT_ERROR_PATTERNS = [
86
+ /usage limit/i,
87
+ /rate limit/i,
88
+ /too many requests/i,
89
+ /quota exceeded/i,
90
+ /capacity/i,
91
+ /try again/i,
92
+ ];
93
+ /**
94
+ * Check if agent output indicates a transient error (usage limits, rate limits).
95
+ * These issues should be re-queued, not marked as permanently failed.
96
+ */
97
+ function isTransientError(output) {
98
+ return TRANSIENT_ERROR_PATTERNS.some((p) => p.test(output));
99
+ }
100
+ /** Default gate result when agent doesn't write one (assume pass). */
101
+ const DEFAULT_GATE = {
102
+ passed: true,
103
+ summary: 'Gate agent did not write a result file — assuming pass',
104
+ findings: [],
105
+ };
106
+ /** Default plan when planning fails or is skipped. */
107
+ const DEFAULT_PLAN = {
108
+ summary: '',
109
+ files: [],
110
+ implementation: '',
111
+ testing: { needed: true, reason: 'Default: run project test command' },
112
+ verification: { needed: false, reason: 'Default: skip verification unless plan requests it' },
113
+ };
114
+ /**
115
+ * Read and validate a plan JSON file written by the planning agent.
116
+ * Falls back to DEFAULT_PLAN if the file doesn't exist or is invalid.
117
+ */
118
+ function readPlan(planFile) {
119
+ try {
120
+ if (!existsSync(planFile))
121
+ return DEFAULT_PLAN;
122
+ const raw = readFileSync(planFile, 'utf-8');
123
+ const parsed = JSON.parse(raw);
124
+ return {
125
+ summary: String(parsed.summary ?? ''),
126
+ files: Array.isArray(parsed.files) ? parsed.files.map(String) : [],
127
+ implementation: String(parsed.implementation ?? ''),
128
+ testing: {
129
+ needed: parsed.testing?.needed !== false,
130
+ reason: String(parsed.testing?.reason ?? 'No reason given'),
131
+ },
132
+ verification: {
133
+ needed: parsed.verification?.needed === true,
134
+ instructions: parsed.verification?.instructions || undefined,
135
+ reason: String(parsed.verification?.reason ?? 'No reason given'),
136
+ },
137
+ };
138
+ }
139
+ catch {
140
+ return DEFAULT_PLAN;
141
+ }
142
+ }
143
+ /**
144
+ * Read and validate a gate result JSON file written by review/verify agents.
145
+ * Falls back to DEFAULT_GATE if the file doesn't exist or is invalid.
146
+ */
147
+ export function readGateResult(gateFile) {
148
+ try {
149
+ if (!existsSync(gateFile))
150
+ return DEFAULT_GATE;
151
+ const raw = readFileSync(gateFile, 'utf-8');
152
+ const parsed = JSON.parse(raw);
153
+ return {
154
+ passed: parsed.passed === true,
155
+ summary: String(parsed.summary ?? ''),
156
+ findings: Array.isArray(parsed.findings)
157
+ ? parsed.findings.map((f) => ({
158
+ severity: (['critical', 'warning', 'info'].includes(String(f.severity)) ? f.severity : 'info'),
159
+ description: String(f.description ?? ''),
160
+ fixed: f.fixed === true,
161
+ file: f.file ? String(f.file) : undefined,
162
+ }))
163
+ : [],
164
+ };
165
+ }
166
+ catch {
167
+ return DEFAULT_GATE;
168
+ }
169
+ }
170
+ /**
171
+ * Move a JSON file from worktree to session logs dir (for inspection).
172
+ * Deletes the source file from the worktree. Non-fatal on failure.
173
+ */
174
+ function moveToSessionLogs(src, dest) {
175
+ try {
176
+ if (!existsSync(src))
177
+ return;
178
+ const content = readFileSync(src, 'utf-8');
179
+ writeFileSync(dest, content);
180
+ unlinkSync(src);
181
+ }
182
+ catch { /* non-fatal */ }
183
+ }
184
+ /**
185
+ * Format gate findings into a prompt section for the implementer.
186
+ */
187
+ export function formatGateFindings(gate, gateType) {
188
+ const unfixed = gate.findings.filter((f) => !f.fixed);
189
+ if (unfixed.length === 0)
190
+ return '';
191
+ const lines = [`## ${gateType} Findings (MUST FIX)`, '', gate.summary, ''];
192
+ for (const f of unfixed) {
193
+ const fileRef = f.file ? ` (${f.file})` : '';
194
+ lines.push(`- [${f.severity.toUpperCase()}]${fileRef} ${f.description}`);
195
+ }
196
+ return lines.join('\n');
197
+ }
18
198
  /**
19
199
  * Process a single issue through the full pipeline.
20
200
  * Steps: status → worktree → plan → implement → test+retry → verify+retry →
@@ -23,6 +203,8 @@ const MAX_DIFF_CHARS = 10_000;
23
203
  export async function processIssue(issueNum, title, body, config, session) {
24
204
  const startTime = Date.now();
25
205
  const projectDir = process.cwd();
206
+ const stepCosts = [];
207
+ const stepsCompleted = [];
26
208
  // Setup logging
27
209
  mkdirSync(session.logsDir, { recursive: true });
28
210
  const logFile = join(session.logsDir, `issue-${issueNum}.log`);
@@ -49,6 +231,7 @@ export async function processIssue(issueNum, title, body, config, session) {
49
231
  sessionBranch: session.branch,
50
232
  autoMerge: config.autoMerge,
51
233
  skipInstall: config.skipInstall,
234
+ setupCommand: config.setupCommand,
52
235
  dryRun: config.dryRun,
53
236
  });
54
237
  worktreePath = wt.path;
@@ -62,25 +245,78 @@ export async function processIssue(issueNum, title, body, config, session) {
62
245
  }
63
246
  return failureResult(issueNum, title, startTime);
64
247
  }
65
- // --- Step 3: Plan (optional, non-fatal) ---
248
+ // --- Step 3: Plan (structured JSON — controls test/verify steps) ---
66
249
  log.step('Step 3: Planning');
67
- let implBody = body;
250
+ let plan = DEFAULT_PLAN;
251
+ // Write plan inside the worktree (agents sandbox to their CWD), then move to sessions dir
252
+ const planFileInWorktree = join(worktreePath, `plan-issue-${issueNum}.json`);
253
+ const planFileInSession = join(session.logsDir, `plan-issue-${issueNum}.json`);
68
254
  if (!config.dryRun) {
69
255
  try {
256
+ const planPrompt = `Analyze this GitHub issue and produce a structured implementation plan.
257
+
258
+ Issue #${issueNum}: ${title}
259
+
260
+ ${body}
261
+
262
+ Write a JSON file to: plan-issue-${issueNum}.json
263
+
264
+ The file must contain ONLY valid JSON with this exact schema:
265
+
266
+ {
267
+ "summary": "One-line description of what needs to be done",
268
+ "files": ["src/path/to/file.ts", "..."],
269
+ "implementation": "Concise step-by-step plan. What to create, modify, wire up. No issue restatement.",
270
+ "testing": {
271
+ "needed": true,
272
+ "reason": "Why tests are or aren't needed for this change"
273
+ },
274
+ "verification": {
275
+ "needed": false,
276
+ "instructions": "If needed: specific playwright-cli steps to verify the feature. If not needed: omit this field.",
277
+ "reason": "Why verification is or isn't needed (e.g. no UI changes, API-only, config change)"
278
+ }
279
+ }
280
+
281
+ Rules:
282
+ - testing.needed: true if ANY code changes could affect behavior. false only for docs, config, or comments.
283
+ - verification.needed: true ONLY if the issue changes user-visible UI that can be tested in a browser.
284
+ - verification.instructions: if needed, list the exact playwright-cli commands to verify (open URL, click elements, check content).
285
+ - implementation: be concise and actionable. List files to modify and what to change in each.
286
+ - Write ONLY the JSON file. Do not create any other files or make any code changes.`;
287
+ // Trace the plan prompt
288
+ tracePrompt(session.name, issueNum, 'plan', planPrompt);
70
289
  const planResult = await spawnAgent({
71
- agent: 'claude',
290
+ agent: config.agent,
72
291
  model: config.model,
73
- prompt: `Analyze this GitHub issue and enrich it with implementation details.\n\nIssue #${issueNum}: ${title}\n\n${body}\n\nOutput the enriched issue body with acceptance criteria, implementation notes, and any edge cases to handle.`,
292
+ prompt: planPrompt,
74
293
  cwd: worktreePath,
75
294
  logFile: join(session.logsDir, `issue-${issueNum}-plan.log`),
76
295
  verbose: config.verbose,
77
296
  });
78
- if (planResult.exitCode === 0 && planResult.output.trim()) {
79
- implBody = body + '\n\n## Agent Planning Notes\n\n' + planResult.output.trim();
297
+ // Trace the plan output and costs
298
+ traceOutput(session.name, issueNum, 'plan', planResult.output);
299
+ stepCosts.push(buildStepCost('plan', issueNum, planResult, config));
300
+ // Detect transient errors (usage limits) during planning
301
+ if (planResult.exitCode !== 0 && isTransientError(planResult.output)) {
302
+ log.warn(`Agent hit a transient error during planning for #${issueNum} — re-queuing`);
303
+ requeueIssue(config, issueNum);
304
+ await cleanupWorktree({ issueNum, projectDir, autoCleanup: config.autoCleanup });
305
+ return failureResult(issueNum, title, startTime, 'transient');
306
+ }
307
+ plan = readPlan(planFileInWorktree);
308
+ stepsCompleted.push('plan');
309
+ if (plan.summary) {
310
+ // Move plan from worktree to sessions dir for inspection, clean up worktree
311
+ moveToSessionLogs(planFileInWorktree, planFileInSession);
312
+ log.success(`Plan: ${plan.summary} | Tests: ${plan.testing.needed ? 'yes' : 'skip'} | Verify: ${plan.verification.needed ? 'yes' : 'skip'}`);
313
+ }
314
+ else {
315
+ log.warn('Planning agent did not write plan file, using defaults (run all tests, skip verify)');
80
316
  }
81
317
  }
82
318
  catch {
83
- log.warn('Planning stage failed, proceeding with original issue description');
319
+ log.warn('Planning stage failed, using defaults');
84
320
  }
85
321
  }
86
322
  else {
@@ -97,26 +333,38 @@ export async function processIssue(issueNum, title, body, config, session) {
97
333
  const implementPrompt = buildImplementPrompt({
98
334
  issueNum,
99
335
  title,
100
- body: implBody,
336
+ body,
337
+ planContent: plan.implementation || undefined,
101
338
  visionContext: visionContext ?? undefined,
102
339
  projectContext: projectContext ?? undefined,
103
340
  previousResult: previousResult ?? undefined,
104
341
  learningContext: learningContext || undefined,
105
342
  });
343
+ // Trace the implement prompt
344
+ tracePrompt(session.name, issueNum, 'implement', implementPrompt);
106
345
  const implResult = await spawnAgent({
107
- agent: 'claude',
346
+ agent: config.agent,
108
347
  model: config.model,
109
348
  prompt: implementPrompt,
110
349
  cwd: worktreePath,
111
350
  logFile: join(session.logsDir, `issue-${issueNum}-implement.log`),
112
351
  verbose: config.verbose,
113
352
  });
353
+ // Trace the implement output and costs
354
+ traceOutput(session.name, issueNum, 'implement', implResult.output);
355
+ stepCosts.push(buildStepCost('implement', issueNum, implResult, config));
114
356
  if (implResult.exitCode !== 0) {
357
+ if (isTransientError(implResult.output)) {
358
+ log.warn(`Agent hit a transient error during implementation for #${issueNum} — re-queuing`);
359
+ requeueIssue(config, issueNum);
360
+ await cleanupWorktree({ issueNum, projectDir, autoCleanup: config.autoCleanup });
361
+ return failureResult(issueNum, title, startTime, 'transient');
362
+ }
115
363
  log.error(`Implementation failed for issue #${issueNum}`);
116
364
  labelIssue(config.repo, issueNum, 'failed', 'in-progress');
117
365
  commentIssue(config.repo, issueNum, 'Agent loop failed during implementation. See logs for details.');
118
366
  await cleanupWorktree({ issueNum, projectDir, autoCleanup: config.autoCleanup });
119
- return failureResult(issueNum, title, startTime);
367
+ return failureResult(issueNum, title, startTime, 'permanent');
120
368
  }
121
369
  // Auto-commit if agent didn't
122
370
  const statusResult = exec('git status --porcelain', { cwd: worktreePath });
@@ -124,6 +372,14 @@ export async function processIssue(issueNum, title, body, config, session) {
124
372
  exec('git add -A', { cwd: worktreePath });
125
373
  exec(`git commit -m "feat: implement issue #${issueNum} - ${title}"`, { cwd: worktreePath });
126
374
  }
375
+ stepsCompleted.push('implement');
376
+ // Capture implement diff
377
+ try {
378
+ const implDiff = exec(`git diff "origin/${config.baseBranch}...HEAD"`, { cwd: worktreePath });
379
+ if (implDiff.stdout)
380
+ traceDiff(session.name, issueNum, 'implement', implDiff.stdout);
381
+ }
382
+ catch { /* non-fatal */ }
127
383
  }
128
384
  else {
129
385
  log.dry('Would run implementation agent');
@@ -132,33 +388,57 @@ export async function processIssue(issueNum, title, body, config, session) {
132
388
  log.step('Step 5: Running tests');
133
389
  let testOutput = '';
134
390
  let testsPassing = false;
135
- for (let attempt = 1; attempt <= config.maxTestRetries; attempt++) {
391
+ let testRetries = 0;
392
+ if (!plan.testing.needed) {
393
+ log.info(`Tests skipped by plan: ${plan.testing.reason}`);
394
+ testsPassing = true;
395
+ testOutput = `Tests skipped by plan: ${plan.testing.reason}`;
396
+ }
397
+ for (let attempt = 1; testsPassing ? false : attempt <= config.maxTestRetries; attempt++) {
136
398
  log.info(`Test attempt ${attempt} of ${config.maxTestRetries}`);
137
399
  const testResult = runTests(worktreePath, config, logFile);
138
400
  testOutput = testResult.output;
401
+ // Trace test output
402
+ traceTest(session.name, issueNum, attempt, testOutput);
139
403
  if (testResult.passed) {
140
404
  testsPassing = true;
405
+ stepsCompleted.push('test');
141
406
  log.success(`All tests passed on attempt ${attempt}`);
142
407
  break;
143
408
  }
144
409
  if (attempt < config.maxTestRetries) {
410
+ testRetries++;
145
411
  log.warn(`Tests failed on attempt ${attempt}, invoking agent to fix...`);
146
412
  if (!config.dryRun) {
147
- const fixPrompt = `Tests are failing for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}). Fix the failing tests.\n\nTest output:\n${testOutput}\n\nInstructions:\n1. Read the failing test output carefully and identify the ROOT CAUSE\n2. Fix the implementation code or the tests\n3. Run the tests again to verify\n4. Commit your fixes with a DESCRIPTIVE message that explains WHAT you fixed and WHY it failed.\n Format: fix(#${issueNum}): <what you changed> — <why it was failing>\n Example: fix(#${issueNum}): use port 5435 for postgres — default 5432 conflicts with host service\n DO NOT use generic messages like "fix: resolve test failures"`;
148
- await spawnAgent({
149
- agent: 'claude',
413
+ const fixPrompt = `Tests are failing for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}). Fix the failing tests.\n\nTest output:\n${testOutput}\n\nInstructions:\n1. Read the failing test output carefully and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum} — do NOT modify test infrastructure, build scripts, or unrelated files\n3. If tests fail due to environment issues (missing venv, wrong port, missing deps), fix only YOUR code — do NOT rewrite the test runner or package.json scripts\n4. Run the tests again to verify\n5. Commit your fixes with a DESCRIPTIVE message that explains WHAT you fixed and WHY it failed.\n Format: fix(#${issueNum}): <what you changed> — <why it was failing>\n Example: fix(#${issueNum}): use port 5435 for postgres — default 5432 conflicts with host service\n DO NOT use generic messages like "fix: resolve test failures"`;
414
+ // Trace fix prompt
415
+ tracePrompt(session.name, issueNum, `fix-${attempt}`, fixPrompt);
416
+ const fixResult = await spawnAgent({
417
+ agent: config.agent,
150
418
  model: config.model,
151
419
  prompt: fixPrompt,
152
420
  cwd: worktreePath,
421
+ resume: true,
153
422
  logFile: join(session.logsDir, `issue-${issueNum}-fix-${attempt}.log`),
154
423
  verbose: config.verbose,
155
424
  });
425
+ // Trace fix output and costs
426
+ traceOutput(session.name, issueNum, `fix-${attempt}`, fixResult.output);
427
+ stepCosts.push(buildStepCost('test_fix', issueNum, fixResult, config));
428
+ stepsCompleted.push(`fix-${attempt}`);
156
429
  // Auto-commit fixes
157
430
  const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
158
431
  if (fixStatus.stdout.trim()) {
159
432
  exec('git add -A', { cwd: worktreePath });
160
433
  exec(`git commit -m "fix(#${issueNum}): resolve test failures (attempt ${attempt})"`, { cwd: worktreePath });
161
434
  }
435
+ // Capture fix diff
436
+ try {
437
+ const fixDiffResult = exec(`git diff "origin/${config.baseBranch}...HEAD"`, { cwd: worktreePath });
438
+ if (fixDiffResult.stdout)
439
+ traceDiff(session.name, issueNum, `fix-${attempt}`, fixDiffResult.stdout);
440
+ }
441
+ catch { /* non-fatal */ }
162
442
  }
163
443
  }
164
444
  else {
@@ -166,95 +446,198 @@ export async function processIssue(issueNum, title, body, config, session) {
166
446
  testOutput = `TESTS FAILED after ${config.maxTestRetries} fix attempts. Latest output:\n${testOutput}`;
167
447
  }
168
448
  }
169
- // --- Step 6: Live verification with playwright-cli ---
170
- log.step('Step 6: Live verification');
171
- let verifyOutput = '';
172
- let verifyPassing = false;
173
- for (let attempt = 1; attempt <= config.maxTestRetries; attempt++) {
174
- log.info(`Verification attempt ${attempt} of ${config.maxTestRetries}`);
175
- const verifyResult = await runVerify({
176
- worktree: worktreePath,
177
- logFile,
178
- issueNum,
179
- title,
180
- body,
181
- config,
182
- sessionDir: session.resultsDir,
183
- });
184
- verifyOutput = verifyResult.output;
185
- if (verifyResult.passed) {
186
- verifyPassing = true;
187
- log.success(`Verification passed on attempt ${attempt}`);
188
- break;
189
- }
190
- if (attempt < config.maxTestRetries) {
191
- // If the agent timed out, retrying with a fix agent won't help — just retry verification
192
- const timedOut = verifyOutput.includes('[TIMEOUT]');
193
- if (timedOut) {
194
- log.warn(`Verification timed out on attempt ${attempt}, retrying without fix agent...`);
449
+ // --- Step 6: Review gate (JSON-based) ---
450
+ log.step('Step 6: Code review');
451
+ let reviewOutput = '';
452
+ let reviewGate = DEFAULT_GATE;
453
+ if (config.skipReview) {
454
+ log.info('Code review skipped');
455
+ }
456
+ else if (config.dryRun) {
457
+ log.dry('Would run code review');
458
+ }
459
+ else {
460
+ const reviewFileInWorktree = join(worktreePath, `review-issue-${issueNum}.json`);
461
+ const reviewFileInSession = join(session.logsDir, `review-issue-${issueNum}.json`);
462
+ for (let attempt = 1; attempt <= config.maxTestRetries; attempt++) {
463
+ log.info(`Review attempt ${attempt} of ${config.maxTestRetries}`);
464
+ try {
465
+ const reviewPrompt = buildReviewPrompt({
466
+ issueNum,
467
+ title,
468
+ body,
469
+ baseBranch: config.baseBranch,
470
+ visionContext: loadFileIfExists(join(projectDir, '.alpha-loop', 'vision.md')) ?? undefined,
471
+ });
472
+ // Trace review prompt
473
+ tracePrompt(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewPrompt);
474
+ const reviewResult = await spawnAgent({
475
+ agent: config.agent,
476
+ model: config.reviewModel,
477
+ prompt: reviewPrompt,
478
+ cwd: worktreePath,
479
+ logFile: join(session.logsDir, `issue-${issueNum}-review${attempt > 1 ? `-${attempt}` : ''}.log`),
480
+ verbose: config.verbose,
481
+ });
482
+ // Trace review output and costs
483
+ traceOutput(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewResult.output);
484
+ stepCosts.push(buildStepCost('review', issueNum, reviewResult, config));
485
+ reviewOutput = reviewResult.output;
195
486
  }
196
- else {
197
- log.warn(`Verification failed on attempt ${attempt}, invoking agent to fix...`);
198
- const verifyFixPrompt = `Build verification failed after implementing issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}).\nThe app was started and tested with playwright-cli, but verification failed.\n\nVerification output:\n${verifyOutput}\n\nInstructions:\n1. Read the verification output above and identify the ROOT CAUSE of each failure\n2. Fix the implementation code so the feature works correctly\n3. Run the test command to make sure unit tests still pass\n4. Commit your fixes with a DESCRIPTIVE message that explains WHAT you fixed and WHY it failed.\n Format: fix(#${issueNum}): <what you changed> — <why verification failed>\n Example: fix(#${issueNum}): add ENCRYPTION_KEY to langfuse config — service requires 32+ char secret\n DO NOT use generic messages like "fix: resolve verification failures"`;
199
- await spawnAgent({
200
- agent: 'claude',
487
+ catch {
488
+ log.warn('Code review failed, continuing without review');
489
+ reviewOutput = 'Code review could not be completed';
490
+ break;
491
+ }
492
+ // Read the gate JSON
493
+ reviewGate = readGateResult(reviewFileInWorktree);
494
+ moveToSessionLogs(reviewFileInWorktree, reviewFileInSession);
495
+ if (reviewGate.passed) {
496
+ stepsCompleted.push('review');
497
+ log.success(`Review passed: ${reviewGate.summary || 'no issues found'}`);
498
+ break;
499
+ }
500
+ // Review found unfixed issues — loop back to implementer
501
+ const unfixedCount = reviewGate.findings.filter((f) => !f.fixed).length;
502
+ log.warn(`Review found ${unfixedCount} unfixed issue(s), sending back to implementer...`);
503
+ if (attempt < config.maxTestRetries) {
504
+ const findings = formatGateFindings(reviewGate, 'Code Review');
505
+ const fixPrompt = `The code review for issue #${issueNum} found problems that need to be fixed.\n\n${findings}\n\nInstructions:\n1. Address each finding listed above\n2. Run tests to make sure nothing is broken\n3. Commit your fixes with: git commit -m "fix(#${issueNum}): address review findings"`;
506
+ // Trace review-fix prompt
507
+ tracePrompt(session.name, issueNum, `review-fix-${attempt}`, fixPrompt);
508
+ const reviewFixResult = await spawnAgent({
509
+ agent: config.agent,
201
510
  model: config.model,
202
- prompt: verifyFixPrompt,
511
+ prompt: fixPrompt,
203
512
  cwd: worktreePath,
204
- logFile: join(session.logsDir, `issue-${issueNum}-verify-fix-${attempt}.log`),
513
+ resume: true,
514
+ logFile: join(session.logsDir, `issue-${issueNum}-review-fix-${attempt}.log`),
205
515
  verbose: config.verbose,
206
516
  });
517
+ // Trace review-fix output and costs
518
+ traceOutput(session.name, issueNum, `review-fix-${attempt}`, reviewFixResult.output);
519
+ stepCosts.push(buildStepCost('review', issueNum, reviewFixResult, config));
520
+ // Auto-commit if agent didn't
521
+ const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
522
+ if (fixStatus.stdout.trim()) {
523
+ exec('git add -A', { cwd: worktreePath });
524
+ exec(`git commit -m "fix(#${issueNum}): address review findings (attempt ${attempt})"`, { cwd: worktreePath });
525
+ }
526
+ // Re-run tests before next review attempt
527
+ const retest = runTests(worktreePath, config, logFile);
528
+ if (!retest.passed) {
529
+ log.warn('Tests failed after review fixes — will be caught in final status');
530
+ testOutput = retest.output;
531
+ testsPassing = false;
532
+ }
207
533
  }
208
- // Auto-commit fixes
209
- const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
210
- if (fixStatus.stdout.trim()) {
211
- exec('git add -A', { cwd: worktreePath });
212
- exec(`git commit -m "fix(#${issueNum}): resolve verification failures (attempt ${attempt})"`, { cwd: worktreePath });
534
+ else {
535
+ log.warn(`Review still failing after ${config.maxTestRetries} attempts`);
213
536
  }
214
537
  }
215
- else {
216
- log.warn(`Verification still failing after ${config.maxTestRetries} attempts`);
217
- }
218
538
  }
219
- // --- Step 7: Review ---
220
- log.step('Step 7: Code review');
221
- let reviewOutput = '';
222
- if (config.skipReview) {
223
- log.info('Code review skipped');
224
- }
225
- else if (config.dryRun) {
226
- log.dry('Would run code review');
539
+ // --- Step 7: Verify gate (JSON-based) ---
540
+ log.step('Step 7: Live verification');
541
+ let verifyOutput = '';
542
+ let verifyPassing = false;
543
+ let verifySkipped = false;
544
+ if (!plan.verification.needed) {
545
+ log.info(`Verification skipped by plan: ${plan.verification.reason}`);
546
+ verifyPassing = true;
547
+ verifySkipped = true;
548
+ verifyOutput = `Verification skipped by plan: ${plan.verification.reason}`;
227
549
  }
228
- else {
229
- try {
230
- const reviewPrompt = buildReviewPrompt({
550
+ if (!verifySkipped && !config.dryRun) {
551
+ const verifyFileInWorktree = join(worktreePath, `verify-issue-${issueNum}.json`);
552
+ const verifyFileInSession = join(session.logsDir, `verify-issue-${issueNum}.json`);
553
+ for (let attempt = 1; attempt <= config.maxTestRetries; attempt++) {
554
+ log.info(`Verification attempt ${attempt} of ${config.maxTestRetries}`);
555
+ const verifyResult = await runVerify({
556
+ worktree: worktreePath,
557
+ logFile,
231
558
  issueNum,
232
559
  title,
233
560
  body,
234
- baseBranch: config.baseBranch,
235
- visionContext: loadFileIfExists(join(projectDir, '.alpha-loop', 'vision.md')) ?? undefined,
561
+ config,
562
+ sessionDir: session.resultsDir,
563
+ verifyInstructions: plan.verification.instructions,
236
564
  });
237
- const reviewResult = await spawnAgent({
238
- agent: 'claude',
239
- model: config.reviewModel,
240
- prompt: reviewPrompt,
241
- cwd: worktreePath,
242
- logFile: join(session.logsDir, `issue-${issueNum}-review.log`),
243
- verbose: config.verbose,
244
- });
245
- reviewOutput = reviewResult.output;
246
- }
247
- catch {
248
- log.warn('Code review failed, continuing without review');
249
- reviewOutput = 'Code review could not be completed';
565
+ verifyOutput = verifyResult.output;
566
+ // Trace verify output
567
+ traceVerify(session.name, issueNum, attempt, verifyOutput);
568
+ if (verifyResult.skipped) {
569
+ verifyPassing = true;
570
+ verifySkipped = true;
571
+ break;
572
+ }
573
+ // Read verify gate JSON (if the verify agent wrote one)
574
+ const verifyGate = readGateResult(verifyFileInWorktree);
575
+ moveToSessionLogs(verifyFileInWorktree, verifyFileInSession);
576
+ // Use gate JSON if available, otherwise fall back to runVerify's pass/fail
577
+ const passed = verifyGate !== DEFAULT_GATE ? verifyGate.passed : verifyResult.passed;
578
+ if (passed) {
579
+ verifyPassing = true;
580
+ stepsCompleted.push('verify');
581
+ log.success(`Verification passed on attempt ${attempt}`);
582
+ break;
583
+ }
584
+ if (attempt < config.maxTestRetries) {
585
+ const timedOut = verifyOutput.includes('[TIMEOUT]');
586
+ if (timedOut) {
587
+ log.warn(`Verification timed out on attempt ${attempt}, retrying...`);
588
+ }
589
+ else {
590
+ log.warn(`Verification failed on attempt ${attempt}, sending back to implementer...`);
591
+ // Use gate findings if available, otherwise use raw verify output
592
+ const findings = verifyGate !== DEFAULT_GATE
593
+ ? formatGateFindings(verifyGate, 'Verification')
594
+ : `## Verification Findings (MUST FIX)\n\n${verifyOutput}`;
595
+ const fixPrompt = `Live verification failed for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}).\n\n${findings}\n\nInstructions:\n1. Read the verification findings and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum}\n3. Run tests to make sure nothing is broken\n4. Commit your fixes with: git commit -m "fix(#${issueNum}): address verification findings"`;
596
+ // Trace verify-fix prompt
597
+ tracePrompt(session.name, issueNum, `verify-fix-${attempt}`, fixPrompt);
598
+ const verifyFixResult = await spawnAgent({
599
+ agent: config.agent,
600
+ model: config.model,
601
+ prompt: fixPrompt,
602
+ cwd: worktreePath,
603
+ resume: true,
604
+ logFile: join(session.logsDir, `issue-${issueNum}-verify-fix-${attempt}.log`),
605
+ verbose: config.verbose,
606
+ });
607
+ // Trace verify-fix output and costs
608
+ traceOutput(session.name, issueNum, `verify-fix-${attempt}`, verifyFixResult.output);
609
+ stepCosts.push(buildStepCost('verify', issueNum, verifyFixResult, config));
610
+ // Auto-commit if agent didn't
611
+ const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
612
+ if (fixStatus.stdout.trim()) {
613
+ exec('git add -A', { cwd: worktreePath });
614
+ exec(`git commit -m "fix(#${issueNum}): address verification findings (attempt ${attempt})"`, { cwd: worktreePath });
615
+ }
616
+ // Re-run tests before next verify attempt
617
+ const retest = runTests(worktreePath, config, logFile);
618
+ if (!retest.passed) {
619
+ log.warn('Tests failed after verify fixes');
620
+ testOutput = retest.output;
621
+ testsPassing = false;
622
+ }
623
+ }
624
+ }
625
+ else {
626
+ log.warn(`Verification still failing after ${config.maxTestRetries} attempts`);
627
+ }
250
628
  }
251
629
  }
630
+ else if (config.dryRun && !verifySkipped) {
631
+ log.dry('Would run live verification');
632
+ verifyPassing = true;
633
+ verifySkipped = true;
634
+ }
252
635
  // --- Step 8: Create PR ---
253
636
  log.step('Step 8: Creating PR');
254
637
  let prUrl;
255
638
  if (!config.dryRun) {
256
639
  const prBase = config.autoMerge ? session.branch : config.baseBranch;
257
- const prBody = buildPRBody(issueNum, title, reviewOutput, testOutput, testsPassing, verifyPassing, body);
640
+ const prBody = buildPRBody(issueNum, title, reviewGate, testOutput, testsPassing, verifyPassing, verifySkipped, body);
258
641
  try {
259
642
  prUrl = createPR({
260
643
  repo: config.repo,
@@ -264,6 +647,7 @@ export async function processIssue(issueNum, title, body, config, session) {
264
647
  body: prBody,
265
648
  cwd: worktreePath,
266
649
  });
650
+ stepsCompleted.push('pr');
267
651
  log.success(`PR created: ${prUrl}`);
268
652
  }
269
653
  catch (err) {
@@ -286,24 +670,86 @@ export async function processIssue(issueNum, title, body, config, session) {
286
670
  const diffResult = exec(`git diff "origin/${config.baseBranch}...HEAD"`, { cwd: worktreePath });
287
671
  runDiff = diffResult.stdout.slice(0, MAX_DIFF_CHARS);
288
672
  }
673
+ // Format review gate for learnings
674
+ const reviewForLearnings = reviewGate.findings.length > 0
675
+ ? `Review: ${reviewGate.summary}\n${reviewGate.findings.map((f) => `- [${f.severity}] ${f.description} (${f.fixed ? 'fixed' : 'unfixed'})`).join('\n')}`
676
+ : `Review: ${reviewGate.summary || 'passed'}`;
289
677
  await extractLearnings({
290
678
  issueNum,
291
679
  title,
292
680
  status: testsPassing ? 'success' : 'failure',
293
- retries: config.maxTestRetries,
681
+ retries: testRetries,
294
682
  duration,
295
683
  diff: runDiff,
296
684
  testOutput,
297
- reviewOutput,
685
+ reviewOutput: reviewForLearnings,
298
686
  verifyOutput,
299
687
  body,
300
688
  config,
301
689
  });
690
+ // --- Step 9b: Write full traces (Meta-Harness style) ---
691
+ stepsCompleted.push('learn');
692
+ const filesChanged = runDiff ? (runDiff.match(/^diff --git/gm) ?? []).length : 0;
693
+ if (!config.dryRun) {
694
+ try {
695
+ // Per-issue metadata (backward compat)
696
+ writeTraceMetadata(session.name, issueNum, {
697
+ issueNum,
698
+ title,
699
+ status: testsPassing ? 'success' : 'failure',
700
+ duration,
701
+ retries: testRetries,
702
+ testsPassing,
703
+ verifyPassing,
704
+ verifySkipped,
705
+ filesChanged,
706
+ prUrl,
707
+ timestamp: new Date().toISOString(),
708
+ agent: config.agent,
709
+ model: config.model,
710
+ });
711
+ if (runDiff)
712
+ writeTrace(session.name, issueNum, 'diff.patch', runDiff);
713
+ if (testOutput)
714
+ writeTrace(session.name, issueNum, 'test-output.txt', testOutput);
715
+ if (reviewForLearnings)
716
+ writeTrace(session.name, issueNum, 'review-output.json', reviewForLearnings);
717
+ if (verifyOutput)
718
+ writeTrace(session.name, issueNum, 'verify-output.json', verifyOutput);
719
+ if (plan.summary)
720
+ writeTrace(session.name, issueNum, 'plan.json', JSON.stringify(plan, null, 2));
721
+ // Config snapshot (written once per run, idempotent)
722
+ try {
723
+ const configPath = join(projectDir, '.alpha-loop.yaml');
724
+ if (existsSync(configPath)) {
725
+ writeConfigSnapshot(session.name, readFileSync(configPath, 'utf-8'));
726
+ }
727
+ }
728
+ catch { /* non-fatal */ }
729
+ // Run-level scores and costs for this issue
730
+ const issueScoreResult = {
731
+ issueNum,
732
+ status: testsPassing ? 'success' : 'failure',
733
+ testsPassing,
734
+ verifyPassing,
735
+ verifySkipped,
736
+ retries: testRetries,
737
+ duration,
738
+ filesChanged,
739
+ stepsCompleted,
740
+ };
741
+ writeScores(session.name, computeScores([issueScoreResult]));
742
+ writeCosts(session.name, computeCosts(stepCosts));
743
+ }
744
+ catch (err) {
745
+ log.warn(`Failed to write traces for #${issueNum}: ${err}`);
746
+ }
747
+ }
302
748
  // --- Step 10: Update issue status ---
303
749
  log.step('Step 10: Updating issue status');
304
750
  if (!config.dryRun) {
305
751
  const testsStatus = testsPassing ? 'PASSING' : 'FAILING';
306
- updateProjectStatus(config.repo, config.project, config.repoOwner, issueNum, 'Done');
752
+ updateProjectStatus(config.repo, config.project, config.repoOwner, issueNum, 'In Review');
307
753
  labelIssue(config.repo, issueNum, 'in-review', 'in-progress');
308
754
  commentIssue(config.repo, issueNum, `Automated implementation complete.\n\n**PR**: ${prUrl ?? 'N/A'}\n**Tests**: ${testsStatus}\n**Review**: Attached to PR body.\n\n---\n*Processed by alpha-loop in ${duration}s*`);
309
755
  }
@@ -338,11 +784,6 @@ export async function processIssue(issueNum, title, body, config, session) {
338
784
  autoCleanup: config.autoCleanup,
339
785
  dryRun: config.dryRun,
340
786
  });
341
- // Count files changed
342
- let filesChanged = 0;
343
- if (runDiff) {
344
- filesChanged = (runDiff.match(/^diff --git/gm) ?? []).length;
345
- }
346
787
  const result = {
347
788
  issueNum,
348
789
  title,
@@ -350,6 +791,7 @@ export async function processIssue(issueNum, title, body, config, session) {
350
791
  prUrl,
351
792
  testsPassing,
352
793
  verifyPassing,
794
+ verifySkipped,
353
795
  duration,
354
796
  filesChanged,
355
797
  };
@@ -360,17 +802,30 @@ export async function processIssue(issueNum, title, body, config, session) {
360
802
  log.info(`PR: ${prUrl}`);
361
803
  return result;
362
804
  }
363
- function failureResult(issueNum, title, startTime) {
805
+ function failureResult(issueNum, title, startTime, reason) {
364
806
  return {
365
807
  issueNum,
366
808
  title,
367
809
  status: 'failure',
810
+ failureReason: reason,
368
811
  testsPassing: false,
369
812
  verifyPassing: false,
813
+ verifySkipped: false,
370
814
  duration: Math.round((Date.now() - startTime) / 1000),
371
815
  filesChanged: 0,
372
816
  };
373
817
  }
818
+ /**
819
+ * Re-queue an issue back to ready state after a transient failure.
820
+ * Restores the label to ready and project status to Todo.
821
+ */
822
+ function requeueIssue(config, issueNum) {
823
+ if (config.dryRun)
824
+ return;
825
+ labelIssue(config.repo, issueNum, config.labelReady, 'in-progress');
826
+ updateProjectStatus(config.repo, config.project, config.repoOwner, issueNum, 'Todo');
827
+ log.info(`Issue #${issueNum} re-queued for next run`);
828
+ }
374
829
  function loadFileIfExists(filePath) {
375
830
  if (!existsSync(filePath))
376
831
  return null;
@@ -381,57 +836,57 @@ function loadFileIfExists(filePath) {
381
836
  return null;
382
837
  }
383
838
  }
384
- /**
385
- * Extract just the review summary from the full agent output.
386
- * Looks for the structured report section the reviewer agent produces.
387
- */
388
- function extractReviewSummary(reviewOutput) {
389
- if (!reviewOutput)
390
- return 'No review available';
391
- // Look for the structured review report (reviewer agent outputs this format)
392
- const patterns = [
393
- /### Review Summary[\s\S]*$/m,
394
- /### Findings Fixed[\s\S]*$/m,
395
- /## Review Report[\s\S]*$/m,
396
- /\*\*Verdict:.*$/m,
397
- ];
398
- for (const pattern of patterns) {
399
- const match = reviewOutput.match(pattern);
400
- if (match)
401
- return match[0].trim();
402
- }
403
- // Fallback: take the last 500 chars which usually has the summary
404
- const lines = reviewOutput.trim().split('\n');
405
- const lastLines = lines.slice(-20).join('\n');
406
- if (lastLines.length > 0)
407
- return lastLines;
408
- return 'Review completed — see logs for details';
409
- }
410
839
  /**
411
840
  * Extract a one-line test summary from raw test output.
412
- * e.g., "30 passed, 0 failed" from Jest/Vitest output.
841
+ * Aggregates results across multiple test runners (pytest, Jest, Vitest).
842
+ * Handles concurrent output like: [pytest] 189 passed, [frontend] Tests 6 passed, etc.
413
843
  */
414
844
  function extractTestSummary(testOutput) {
415
845
  if (!testOutput)
416
846
  return '';
417
- // Jest: "Tests: 30 passed, 30 total"
418
- const jestMatch = testOutput.match(/Tests:\s+(.+total)/);
419
- if (jestMatch)
420
- return jestMatch[1].trim();
421
- // Vitest: "Tests 30 passed (30)"
422
- const vitestMatch = testOutput.match(/Tests\s+(.+\(\d+\))/);
423
- if (vitestMatch)
424
- return vitestMatch[1].trim();
425
- // Fallback: count "passed" and "failed" lines
426
- const passed = (testOutput.match(/passed/gi) || []).length;
427
- const failed = (testOutput.match(/failed/gi) || []).length;
428
- if (passed > 0 || failed > 0)
429
- return `${passed} passed, ${failed} failed`;
430
- return '';
847
+ let totalPassed = 0;
848
+ let totalFailed = 0;
849
+ let totalSkipped = 0;
850
+ // Pytest summary line: "189 passed, 1 skipped in 7.05s" or "5 failed, 184 passed"
851
+ // Match the "=== ... ===" summary line format
852
+ for (const match of testOutput.matchAll(/=+\s*(.*?)\s*=+/g)) {
853
+ const line = match[1];
854
+ const passed = line.match(/(\d+) passed/);
855
+ const failed = line.match(/(\d+) failed/);
856
+ const skipped = line.match(/(\d+) skipped/);
857
+ if (passed)
858
+ totalPassed += parseInt(passed[1], 10);
859
+ if (failed)
860
+ totalFailed += parseInt(failed[1], 10);
861
+ if (skipped)
862
+ totalSkipped += parseInt(skipped[1], 10);
863
+ }
864
+ // Jest summary: "Tests: 30 passed, 30 total" or "Tests: 2 failed, 28 passed, 30 total"
865
+ for (const match of testOutput.matchAll(/Tests:\s+(?:(\d+) failed,\s+)?(\d+) passed/g)) {
866
+ if (match[1])
867
+ totalFailed += parseInt(match[1], 10);
868
+ totalPassed += parseInt(match[2], 10);
869
+ }
870
+ // Vitest summary: "Tests 6 passed (6)" — uses spaces not colon, has parens
871
+ for (const match of testOutput.matchAll(/Tests\s+(?:(\d+) failed\s+)?(\d+) passed\s+\(\d+\)/g)) {
872
+ if (match[1])
873
+ totalFailed += parseInt(match[1], 10);
874
+ totalPassed += parseInt(match[2], 10);
875
+ }
876
+ if (totalPassed === 0 && totalFailed === 0)
877
+ return '';
878
+ const parts = [];
879
+ parts.push(`${totalPassed} passed`);
880
+ if (totalFailed > 0)
881
+ parts.push(`${totalFailed} failed`);
882
+ if (totalSkipped > 0)
883
+ parts.push(`${totalSkipped} skipped`);
884
+ return parts.join(', ');
431
885
  }
432
- function buildPRBody(issueNum, title, reviewOutput, testOutput, testsPassing, verifyPassing, body) {
886
+ function buildPRBody(issueNum, title, reviewGate, testOutput, testsPassing, verifyPassing, verifySkipped, body) {
433
887
  const testSummary = extractTestSummary(testOutput);
434
- const reviewSummary = extractReviewSummary(reviewOutput);
888
+ const verifyStatus = verifySkipped ? 'SKIPPED' : verifyPassing ? 'PASS' : 'FAIL';
889
+ const reviewStatus = reviewGate.passed ? 'PASS' : 'FAIL';
435
890
  const lines = [
436
891
  `Closes #${issueNum}`,
437
892
  '',
@@ -444,14 +899,28 @@ function buildPRBody(issueNum, title, reviewOutput, testOutput, testsPassing, ve
444
899
  `| Check | Status |`,
445
900
  `|-------|--------|`,
446
901
  `| Unit tests | ${testsPassing ? 'PASS' : 'FAIL'} |`,
447
- `| Verification | ${verifyPassing ? 'PASS' : 'FAIL'} |`,
902
+ `| Code review | ${reviewStatus} |`,
903
+ `| Verification | ${verifyStatus} |`,
448
904
  ];
449
905
  if (testSummary) {
450
906
  lines.push(`| Details | ${testSummary} |`);
451
907
  }
452
908
  lines.push('');
453
- // Code review — just the summary, not the full agent output
454
- lines.push('## Code Review', '', reviewSummary, '');
909
+ // Code review — structured from gate result
910
+ if (reviewGate.findings.length > 0) {
911
+ lines.push('## Code Review', '');
912
+ lines.push(reviewGate.summary || 'Review completed');
913
+ lines.push('');
914
+ for (const f of reviewGate.findings) {
915
+ const status = f.fixed ? 'FIXED' : 'OPEN';
916
+ const fileRef = f.file ? ` \`${f.file}\`` : '';
917
+ lines.push(`- **${f.severity.toUpperCase()}** [${status}]${fileRef}: ${f.description}`);
918
+ }
919
+ lines.push('');
920
+ }
921
+ else {
922
+ lines.push('## Code Review', '', reviewGate.summary || 'No issues found', '');
923
+ }
455
924
  // What to test — from issue body or generic
456
925
  const whatToTestMatch = body.match(/## Test Requirements[\s\S]*?(?=\n## |$)/);
457
926
  if (whatToTestMatch) {