@bradtaylorsf/alpha-loop 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -19
- package/dist/cli.js +83 -1
- package/dist/cli.js.map +1 -1
- package/dist/commands/auth.js +1 -1
- package/dist/commands/auth.js.map +1 -1
- package/dist/commands/eval.d.ts +53 -0
- package/dist/commands/eval.js +538 -0
- package/dist/commands/eval.js.map +1 -0
- package/dist/commands/evolve.d.ts +25 -0
- package/dist/commands/evolve.js +270 -0
- package/dist/commands/evolve.js.map +1 -0
- package/dist/commands/history.d.ts +1 -1
- package/dist/commands/history.js +4 -4
- package/dist/commands/history.js.map +1 -1
- package/dist/commands/init.d.ts +14 -0
- package/dist/commands/init.js +199 -30
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/resume.js +1 -0
- package/dist/commands/resume.js.map +1 -1
- package/dist/commands/run.js +170 -12
- package/dist/commands/run.js.map +1 -1
- package/dist/commands/scan.d.ts +1 -1
- package/dist/commands/scan.js +12 -9
- package/dist/commands/scan.js.map +1 -1
- package/dist/commands/sync.d.ts +5 -0
- package/dist/commands/sync.js +24 -5
- package/dist/commands/sync.js.map +1 -1
- package/dist/commands/vision.js +5 -3
- package/dist/commands/vision.js.map +1 -1
- package/dist/engine/agents.d.ts +6 -1
- package/dist/engine/agents.js +14 -12
- package/dist/engine/agents.js.map +1 -1
- package/dist/engine/prerequisites.d.ts +4 -7
- package/dist/engine/prerequisites.js +12 -36
- package/dist/engine/prerequisites.js.map +1 -1
- package/dist/lib/agent.d.ts +18 -0
- package/dist/lib/agent.js +211 -30
- package/dist/lib/agent.js.map +1 -1
- package/dist/lib/config.d.ts +25 -2
- package/dist/lib/config.js +80 -7
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/eval-checks.d.ts +91 -0
- package/dist/lib/eval-checks.js +254 -0
- package/dist/lib/eval-checks.js.map +1 -0
- package/dist/lib/eval-runner.d.ts +29 -0
- package/dist/lib/eval-runner.js +439 -0
- package/dist/lib/eval-runner.js.map +1 -0
- package/dist/lib/eval.d.ts +170 -0
- package/dist/lib/eval.js +507 -0
- package/dist/lib/eval.js.map +1 -0
- package/dist/lib/learning.js +2 -2
- package/dist/lib/learning.js.map +1 -1
- package/dist/lib/pipeline.d.ts +44 -0
- package/dist/lib/pipeline.js +607 -138
- package/dist/lib/pipeline.js.map +1 -1
- package/dist/lib/prompts.d.ts +19 -0
- package/dist/lib/prompts.js +56 -5
- package/dist/lib/prompts.js.map +1 -1
- package/dist/lib/score.d.ts +80 -0
- package/dist/lib/score.js +172 -0
- package/dist/lib/score.js.map +1 -0
- package/dist/lib/session.d.ts +2 -1
- package/dist/lib/session.js +70 -19
- package/dist/lib/session.js.map +1 -1
- package/dist/lib/traces.d.ts +173 -0
- package/dist/lib/traces.js +272 -0
- package/dist/lib/traces.js.map +1 -0
- package/dist/lib/verify.d.ts +7 -1
- package/dist/lib/verify.js +109 -157
- package/dist/lib/verify.js.map +1 -1
- package/dist/lib/worktree.d.ts +1 -0
- package/dist/lib/worktree.js +9 -1
- package/dist/lib/worktree.js.map +1 -1
- package/package.json +1 -1
- package/templates/agents/implementer.md +1 -1
- package/templates/agents/reviewer.md +1 -1
- package/dist/engine/config.d.ts +0 -71
- package/dist/engine/config.js +0 -73
- package/dist/engine/config.js.map +0 -1
package/dist/lib/pipeline.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Process Issue Pipeline — the 12-step orchestration for a single issue.
|
|
3
3
|
*/
|
|
4
|
-
import { mkdirSync, readFileSync, existsSync } from 'node:fs';
|
|
4
|
+
import { mkdirSync, readFileSync, writeFileSync, unlinkSync, existsSync } from 'node:fs';
|
|
5
5
|
import { join } from 'node:path';
|
|
6
6
|
import { log } from './logger.js';
|
|
7
7
|
import { exec } from './shell.js';
|
|
@@ -13,8 +13,188 @@ import { runTests } from './testing.js';
|
|
|
13
13
|
import { runVerify } from './verify.js';
|
|
14
14
|
import { extractLearnings, getLearningContext } from './learning.js';
|
|
15
15
|
import { saveResult, getPreviousResult } from './session.js';
|
|
16
|
+
import { writeTrace, writeTraceMetadata, writeTraceToSubdir, writeConfigSnapshot, writeScores, writeCosts, computeScores, computeCosts, } from './traces.js';
|
|
17
|
+
import { estimateCost } from './config.js';
|
|
16
18
|
/** Max diff size to include in learning analysis. */
|
|
17
19
|
const MAX_DIFF_CHARS = 10_000;
|
|
20
|
+
/**
|
|
21
|
+
* Build a StepCost entry from an AgentResult.
|
|
22
|
+
* Uses parsed cost/tokens if available, otherwise estimates from output length.
|
|
23
|
+
*/
|
|
24
|
+
function buildStepCost(step, issueNum, agentResult, config) {
|
|
25
|
+
const model = agentResult.model || config.model;
|
|
26
|
+
if (agentResult.costUsd != null && agentResult.inputTokens != null && agentResult.outputTokens != null) {
|
|
27
|
+
return {
|
|
28
|
+
step,
|
|
29
|
+
issueNum,
|
|
30
|
+
model,
|
|
31
|
+
input_tokens: agentResult.inputTokens,
|
|
32
|
+
output_tokens: agentResult.outputTokens,
|
|
33
|
+
cost_usd: agentResult.costUsd,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
// Fallback: estimate tokens from output length (chars / 4 ≈ tokens)
|
|
37
|
+
const estimatedOutputTokens = Math.round(agentResult.output.length / 4);
|
|
38
|
+
const estimatedInputTokens = Math.round(estimatedOutputTokens * 1.3);
|
|
39
|
+
const costUsd = estimateCost(model, estimatedInputTokens, estimatedOutputTokens, config.pricing);
|
|
40
|
+
return {
|
|
41
|
+
step,
|
|
42
|
+
issueNum,
|
|
43
|
+
model,
|
|
44
|
+
input_tokens: estimatedInputTokens,
|
|
45
|
+
output_tokens: estimatedOutputTokens,
|
|
46
|
+
cost_usd: costUsd,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
/** Record a prompt trace to the prompts/ subdirectory. */
|
|
50
|
+
function tracePrompt(session, issueNum, step, prompt) {
|
|
51
|
+
try {
|
|
52
|
+
writeTraceToSubdir(session, 'prompts', `issue-${issueNum}-${step}.md`, prompt);
|
|
53
|
+
}
|
|
54
|
+
catch { /* non-fatal */ }
|
|
55
|
+
}
|
|
56
|
+
/** Record an agent output trace to the outputs/ subdirectory. */
|
|
57
|
+
function traceOutput(session, issueNum, step, output) {
|
|
58
|
+
try {
|
|
59
|
+
writeTraceToSubdir(session, 'outputs', `issue-${issueNum}-${step}.log`, output);
|
|
60
|
+
}
|
|
61
|
+
catch { /* non-fatal */ }
|
|
62
|
+
}
|
|
63
|
+
/** Record a diff trace to the diffs/ subdirectory. */
|
|
64
|
+
function traceDiff(session, issueNum, step, diff) {
|
|
65
|
+
try {
|
|
66
|
+
writeTraceToSubdir(session, 'diffs', `issue-${issueNum}-${step}.patch`, diff);
|
|
67
|
+
}
|
|
68
|
+
catch { /* non-fatal */ }
|
|
69
|
+
}
|
|
70
|
+
/** Record a test output trace to the tests/ subdirectory. */
|
|
71
|
+
function traceTest(session, issueNum, attempt, output) {
|
|
72
|
+
try {
|
|
73
|
+
writeTraceToSubdir(session, 'tests', `issue-${issueNum}-test-${attempt}.txt`, output);
|
|
74
|
+
}
|
|
75
|
+
catch { /* non-fatal */ }
|
|
76
|
+
}
|
|
77
|
+
/** Record a verify output trace to the verify/ subdirectory. */
|
|
78
|
+
function traceVerify(session, issueNum, attempt, output) {
|
|
79
|
+
try {
|
|
80
|
+
writeTraceToSubdir(session, 'verify', `issue-${issueNum}-verify-${attempt}.txt`, output);
|
|
81
|
+
}
|
|
82
|
+
catch { /* non-fatal */ }
|
|
83
|
+
}
|
|
84
|
+
/** Patterns that indicate a transient agent error (re-queue, don't mark as failed). */
|
|
85
|
+
const TRANSIENT_ERROR_PATTERNS = [
|
|
86
|
+
/usage limit/i,
|
|
87
|
+
/rate limit/i,
|
|
88
|
+
/too many requests/i,
|
|
89
|
+
/quota exceeded/i,
|
|
90
|
+
/capacity/i,
|
|
91
|
+
/try again/i,
|
|
92
|
+
];
|
|
93
|
+
/**
|
|
94
|
+
* Check if agent output indicates a transient error (usage limits, rate limits).
|
|
95
|
+
* These issues should be re-queued, not marked as permanently failed.
|
|
96
|
+
*/
|
|
97
|
+
function isTransientError(output) {
|
|
98
|
+
return TRANSIENT_ERROR_PATTERNS.some((p) => p.test(output));
|
|
99
|
+
}
|
|
100
|
+
/** Default gate result when agent doesn't write one (assume pass). */
|
|
101
|
+
const DEFAULT_GATE = {
|
|
102
|
+
passed: true,
|
|
103
|
+
summary: 'Gate agent did not write a result file — assuming pass',
|
|
104
|
+
findings: [],
|
|
105
|
+
};
|
|
106
|
+
/** Default plan when planning fails or is skipped. */
|
|
107
|
+
const DEFAULT_PLAN = {
|
|
108
|
+
summary: '',
|
|
109
|
+
files: [],
|
|
110
|
+
implementation: '',
|
|
111
|
+
testing: { needed: true, reason: 'Default: run project test command' },
|
|
112
|
+
verification: { needed: false, reason: 'Default: skip verification unless plan requests it' },
|
|
113
|
+
};
|
|
114
|
+
/**
|
|
115
|
+
* Read and validate a plan JSON file written by the planning agent.
|
|
116
|
+
* Falls back to DEFAULT_PLAN if the file doesn't exist or is invalid.
|
|
117
|
+
*/
|
|
118
|
+
function readPlan(planFile) {
|
|
119
|
+
try {
|
|
120
|
+
if (!existsSync(planFile))
|
|
121
|
+
return DEFAULT_PLAN;
|
|
122
|
+
const raw = readFileSync(planFile, 'utf-8');
|
|
123
|
+
const parsed = JSON.parse(raw);
|
|
124
|
+
return {
|
|
125
|
+
summary: String(parsed.summary ?? ''),
|
|
126
|
+
files: Array.isArray(parsed.files) ? parsed.files.map(String) : [],
|
|
127
|
+
implementation: String(parsed.implementation ?? ''),
|
|
128
|
+
testing: {
|
|
129
|
+
needed: parsed.testing?.needed !== false,
|
|
130
|
+
reason: String(parsed.testing?.reason ?? 'No reason given'),
|
|
131
|
+
},
|
|
132
|
+
verification: {
|
|
133
|
+
needed: parsed.verification?.needed === true,
|
|
134
|
+
instructions: parsed.verification?.instructions || undefined,
|
|
135
|
+
reason: String(parsed.verification?.reason ?? 'No reason given'),
|
|
136
|
+
},
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
return DEFAULT_PLAN;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Read and validate a gate result JSON file written by review/verify agents.
|
|
145
|
+
* Falls back to DEFAULT_GATE if the file doesn't exist or is invalid.
|
|
146
|
+
*/
|
|
147
|
+
export function readGateResult(gateFile) {
|
|
148
|
+
try {
|
|
149
|
+
if (!existsSync(gateFile))
|
|
150
|
+
return DEFAULT_GATE;
|
|
151
|
+
const raw = readFileSync(gateFile, 'utf-8');
|
|
152
|
+
const parsed = JSON.parse(raw);
|
|
153
|
+
return {
|
|
154
|
+
passed: parsed.passed === true,
|
|
155
|
+
summary: String(parsed.summary ?? ''),
|
|
156
|
+
findings: Array.isArray(parsed.findings)
|
|
157
|
+
? parsed.findings.map((f) => ({
|
|
158
|
+
severity: (['critical', 'warning', 'info'].includes(String(f.severity)) ? f.severity : 'info'),
|
|
159
|
+
description: String(f.description ?? ''),
|
|
160
|
+
fixed: f.fixed === true,
|
|
161
|
+
file: f.file ? String(f.file) : undefined,
|
|
162
|
+
}))
|
|
163
|
+
: [],
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
return DEFAULT_GATE;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Move a JSON file from worktree to session logs dir (for inspection).
|
|
172
|
+
* Deletes the source file from the worktree. Non-fatal on failure.
|
|
173
|
+
*/
|
|
174
|
+
function moveToSessionLogs(src, dest) {
|
|
175
|
+
try {
|
|
176
|
+
if (!existsSync(src))
|
|
177
|
+
return;
|
|
178
|
+
const content = readFileSync(src, 'utf-8');
|
|
179
|
+
writeFileSync(dest, content);
|
|
180
|
+
unlinkSync(src);
|
|
181
|
+
}
|
|
182
|
+
catch { /* non-fatal */ }
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Format gate findings into a prompt section for the implementer.
|
|
186
|
+
*/
|
|
187
|
+
export function formatGateFindings(gate, gateType) {
|
|
188
|
+
const unfixed = gate.findings.filter((f) => !f.fixed);
|
|
189
|
+
if (unfixed.length === 0)
|
|
190
|
+
return '';
|
|
191
|
+
const lines = [`## ${gateType} Findings (MUST FIX)`, '', gate.summary, ''];
|
|
192
|
+
for (const f of unfixed) {
|
|
193
|
+
const fileRef = f.file ? ` (${f.file})` : '';
|
|
194
|
+
lines.push(`- [${f.severity.toUpperCase()}]${fileRef} ${f.description}`);
|
|
195
|
+
}
|
|
196
|
+
return lines.join('\n');
|
|
197
|
+
}
|
|
18
198
|
/**
|
|
19
199
|
* Process a single issue through the full pipeline.
|
|
20
200
|
* Steps: status → worktree → plan → implement → test+retry → verify+retry →
|
|
@@ -23,6 +203,8 @@ const MAX_DIFF_CHARS = 10_000;
|
|
|
23
203
|
export async function processIssue(issueNum, title, body, config, session) {
|
|
24
204
|
const startTime = Date.now();
|
|
25
205
|
const projectDir = process.cwd();
|
|
206
|
+
const stepCosts = [];
|
|
207
|
+
const stepsCompleted = [];
|
|
26
208
|
// Setup logging
|
|
27
209
|
mkdirSync(session.logsDir, { recursive: true });
|
|
28
210
|
const logFile = join(session.logsDir, `issue-${issueNum}.log`);
|
|
@@ -49,6 +231,7 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
49
231
|
sessionBranch: session.branch,
|
|
50
232
|
autoMerge: config.autoMerge,
|
|
51
233
|
skipInstall: config.skipInstall,
|
|
234
|
+
setupCommand: config.setupCommand,
|
|
52
235
|
dryRun: config.dryRun,
|
|
53
236
|
});
|
|
54
237
|
worktreePath = wt.path;
|
|
@@ -62,25 +245,78 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
62
245
|
}
|
|
63
246
|
return failureResult(issueNum, title, startTime);
|
|
64
247
|
}
|
|
65
|
-
// --- Step 3: Plan (
|
|
248
|
+
// --- Step 3: Plan (structured JSON — controls test/verify steps) ---
|
|
66
249
|
log.step('Step 3: Planning');
|
|
67
|
-
let
|
|
250
|
+
let plan = DEFAULT_PLAN;
|
|
251
|
+
// Write plan inside the worktree (agents sandbox to their CWD), then move to sessions dir
|
|
252
|
+
const planFileInWorktree = join(worktreePath, `plan-issue-${issueNum}.json`);
|
|
253
|
+
const planFileInSession = join(session.logsDir, `plan-issue-${issueNum}.json`);
|
|
68
254
|
if (!config.dryRun) {
|
|
69
255
|
try {
|
|
256
|
+
const planPrompt = `Analyze this GitHub issue and produce a structured implementation plan.
|
|
257
|
+
|
|
258
|
+
Issue #${issueNum}: ${title}
|
|
259
|
+
|
|
260
|
+
${body}
|
|
261
|
+
|
|
262
|
+
Write a JSON file to: plan-issue-${issueNum}.json
|
|
263
|
+
|
|
264
|
+
The file must contain ONLY valid JSON with this exact schema:
|
|
265
|
+
|
|
266
|
+
{
|
|
267
|
+
"summary": "One-line description of what needs to be done",
|
|
268
|
+
"files": ["src/path/to/file.ts", "..."],
|
|
269
|
+
"implementation": "Concise step-by-step plan. What to create, modify, wire up. No issue restatement.",
|
|
270
|
+
"testing": {
|
|
271
|
+
"needed": true,
|
|
272
|
+
"reason": "Why tests are or aren't needed for this change"
|
|
273
|
+
},
|
|
274
|
+
"verification": {
|
|
275
|
+
"needed": false,
|
|
276
|
+
"instructions": "If needed: specific playwright-cli steps to verify the feature. If not needed: omit this field.",
|
|
277
|
+
"reason": "Why verification is or isn't needed (e.g. no UI changes, API-only, config change)"
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
Rules:
|
|
282
|
+
- testing.needed: true if ANY code changes could affect behavior. false only for docs, config, or comments.
|
|
283
|
+
- verification.needed: true ONLY if the issue changes user-visible UI that can be tested in a browser.
|
|
284
|
+
- verification.instructions: if needed, list the exact playwright-cli commands to verify (open URL, click elements, check content).
|
|
285
|
+
- implementation: be concise and actionable. List files to modify and what to change in each.
|
|
286
|
+
- Write ONLY the JSON file. Do not create any other files or make any code changes.`;
|
|
287
|
+
// Trace the plan prompt
|
|
288
|
+
tracePrompt(session.name, issueNum, 'plan', planPrompt);
|
|
70
289
|
const planResult = await spawnAgent({
|
|
71
|
-
agent:
|
|
290
|
+
agent: config.agent,
|
|
72
291
|
model: config.model,
|
|
73
|
-
prompt:
|
|
292
|
+
prompt: planPrompt,
|
|
74
293
|
cwd: worktreePath,
|
|
75
294
|
logFile: join(session.logsDir, `issue-${issueNum}-plan.log`),
|
|
76
295
|
verbose: config.verbose,
|
|
77
296
|
});
|
|
78
|
-
|
|
79
|
-
|
|
297
|
+
// Trace the plan output and costs
|
|
298
|
+
traceOutput(session.name, issueNum, 'plan', planResult.output);
|
|
299
|
+
stepCosts.push(buildStepCost('plan', issueNum, planResult, config));
|
|
300
|
+
// Detect transient errors (usage limits) during planning
|
|
301
|
+
if (planResult.exitCode !== 0 && isTransientError(planResult.output)) {
|
|
302
|
+
log.warn(`Agent hit a transient error during planning for #${issueNum} — re-queuing`);
|
|
303
|
+
requeueIssue(config, issueNum);
|
|
304
|
+
await cleanupWorktree({ issueNum, projectDir, autoCleanup: config.autoCleanup });
|
|
305
|
+
return failureResult(issueNum, title, startTime, 'transient');
|
|
306
|
+
}
|
|
307
|
+
plan = readPlan(planFileInWorktree);
|
|
308
|
+
stepsCompleted.push('plan');
|
|
309
|
+
if (plan.summary) {
|
|
310
|
+
// Move plan from worktree to sessions dir for inspection, clean up worktree
|
|
311
|
+
moveToSessionLogs(planFileInWorktree, planFileInSession);
|
|
312
|
+
log.success(`Plan: ${plan.summary} | Tests: ${plan.testing.needed ? 'yes' : 'skip'} | Verify: ${plan.verification.needed ? 'yes' : 'skip'}`);
|
|
313
|
+
}
|
|
314
|
+
else {
|
|
315
|
+
log.warn('Planning agent did not write plan file, using defaults (run all tests, skip verify)');
|
|
80
316
|
}
|
|
81
317
|
}
|
|
82
318
|
catch {
|
|
83
|
-
log.warn('Planning stage failed,
|
|
319
|
+
log.warn('Planning stage failed, using defaults');
|
|
84
320
|
}
|
|
85
321
|
}
|
|
86
322
|
else {
|
|
@@ -97,26 +333,38 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
97
333
|
const implementPrompt = buildImplementPrompt({
|
|
98
334
|
issueNum,
|
|
99
335
|
title,
|
|
100
|
-
body
|
|
336
|
+
body,
|
|
337
|
+
planContent: plan.implementation || undefined,
|
|
101
338
|
visionContext: visionContext ?? undefined,
|
|
102
339
|
projectContext: projectContext ?? undefined,
|
|
103
340
|
previousResult: previousResult ?? undefined,
|
|
104
341
|
learningContext: learningContext || undefined,
|
|
105
342
|
});
|
|
343
|
+
// Trace the implement prompt
|
|
344
|
+
tracePrompt(session.name, issueNum, 'implement', implementPrompt);
|
|
106
345
|
const implResult = await spawnAgent({
|
|
107
|
-
agent:
|
|
346
|
+
agent: config.agent,
|
|
108
347
|
model: config.model,
|
|
109
348
|
prompt: implementPrompt,
|
|
110
349
|
cwd: worktreePath,
|
|
111
350
|
logFile: join(session.logsDir, `issue-${issueNum}-implement.log`),
|
|
112
351
|
verbose: config.verbose,
|
|
113
352
|
});
|
|
353
|
+
// Trace the implement output and costs
|
|
354
|
+
traceOutput(session.name, issueNum, 'implement', implResult.output);
|
|
355
|
+
stepCosts.push(buildStepCost('implement', issueNum, implResult, config));
|
|
114
356
|
if (implResult.exitCode !== 0) {
|
|
357
|
+
if (isTransientError(implResult.output)) {
|
|
358
|
+
log.warn(`Agent hit a transient error during implementation for #${issueNum} — re-queuing`);
|
|
359
|
+
requeueIssue(config, issueNum);
|
|
360
|
+
await cleanupWorktree({ issueNum, projectDir, autoCleanup: config.autoCleanup });
|
|
361
|
+
return failureResult(issueNum, title, startTime, 'transient');
|
|
362
|
+
}
|
|
115
363
|
log.error(`Implementation failed for issue #${issueNum}`);
|
|
116
364
|
labelIssue(config.repo, issueNum, 'failed', 'in-progress');
|
|
117
365
|
commentIssue(config.repo, issueNum, 'Agent loop failed during implementation. See logs for details.');
|
|
118
366
|
await cleanupWorktree({ issueNum, projectDir, autoCleanup: config.autoCleanup });
|
|
119
|
-
return failureResult(issueNum, title, startTime);
|
|
367
|
+
return failureResult(issueNum, title, startTime, 'permanent');
|
|
120
368
|
}
|
|
121
369
|
// Auto-commit if agent didn't
|
|
122
370
|
const statusResult = exec('git status --porcelain', { cwd: worktreePath });
|
|
@@ -124,6 +372,14 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
124
372
|
exec('git add -A', { cwd: worktreePath });
|
|
125
373
|
exec(`git commit -m "feat: implement issue #${issueNum} - ${title}"`, { cwd: worktreePath });
|
|
126
374
|
}
|
|
375
|
+
stepsCompleted.push('implement');
|
|
376
|
+
// Capture implement diff
|
|
377
|
+
try {
|
|
378
|
+
const implDiff = exec(`git diff "origin/${config.baseBranch}...HEAD"`, { cwd: worktreePath });
|
|
379
|
+
if (implDiff.stdout)
|
|
380
|
+
traceDiff(session.name, issueNum, 'implement', implDiff.stdout);
|
|
381
|
+
}
|
|
382
|
+
catch { /* non-fatal */ }
|
|
127
383
|
}
|
|
128
384
|
else {
|
|
129
385
|
log.dry('Would run implementation agent');
|
|
@@ -132,33 +388,57 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
132
388
|
log.step('Step 5: Running tests');
|
|
133
389
|
let testOutput = '';
|
|
134
390
|
let testsPassing = false;
|
|
135
|
-
|
|
391
|
+
let testRetries = 0;
|
|
392
|
+
if (!plan.testing.needed) {
|
|
393
|
+
log.info(`Tests skipped by plan: ${plan.testing.reason}`);
|
|
394
|
+
testsPassing = true;
|
|
395
|
+
testOutput = `Tests skipped by plan: ${plan.testing.reason}`;
|
|
396
|
+
}
|
|
397
|
+
for (let attempt = 1; testsPassing ? false : attempt <= config.maxTestRetries; attempt++) {
|
|
136
398
|
log.info(`Test attempt ${attempt} of ${config.maxTestRetries}`);
|
|
137
399
|
const testResult = runTests(worktreePath, config, logFile);
|
|
138
400
|
testOutput = testResult.output;
|
|
401
|
+
// Trace test output
|
|
402
|
+
traceTest(session.name, issueNum, attempt, testOutput);
|
|
139
403
|
if (testResult.passed) {
|
|
140
404
|
testsPassing = true;
|
|
405
|
+
stepsCompleted.push('test');
|
|
141
406
|
log.success(`All tests passed on attempt ${attempt}`);
|
|
142
407
|
break;
|
|
143
408
|
}
|
|
144
409
|
if (attempt < config.maxTestRetries) {
|
|
410
|
+
testRetries++;
|
|
145
411
|
log.warn(`Tests failed on attempt ${attempt}, invoking agent to fix...`);
|
|
146
412
|
if (!config.dryRun) {
|
|
147
|
-
const fixPrompt = `Tests are failing for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}). Fix the failing tests.\n\nTest output:\n${testOutput}\n\nInstructions:\n1. Read the failing test output carefully and identify the ROOT CAUSE\n2. Fix
|
|
148
|
-
|
|
149
|
-
|
|
413
|
+
const fixPrompt = `Tests are failing for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}). Fix the failing tests.\n\nTest output:\n${testOutput}\n\nInstructions:\n1. Read the failing test output carefully and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum} — do NOT modify test infrastructure, build scripts, or unrelated files\n3. If tests fail due to environment issues (missing venv, wrong port, missing deps), fix only YOUR code — do NOT rewrite the test runner or package.json scripts\n4. Run the tests again to verify\n5. Commit your fixes with a DESCRIPTIVE message that explains WHAT you fixed and WHY it failed.\n Format: fix(#${issueNum}): <what you changed> — <why it was failing>\n Example: fix(#${issueNum}): use port 5435 for postgres — default 5432 conflicts with host service\n DO NOT use generic messages like "fix: resolve test failures"`;
|
|
414
|
+
// Trace fix prompt
|
|
415
|
+
tracePrompt(session.name, issueNum, `fix-${attempt}`, fixPrompt);
|
|
416
|
+
const fixResult = await spawnAgent({
|
|
417
|
+
agent: config.agent,
|
|
150
418
|
model: config.model,
|
|
151
419
|
prompt: fixPrompt,
|
|
152
420
|
cwd: worktreePath,
|
|
421
|
+
resume: true,
|
|
153
422
|
logFile: join(session.logsDir, `issue-${issueNum}-fix-${attempt}.log`),
|
|
154
423
|
verbose: config.verbose,
|
|
155
424
|
});
|
|
425
|
+
// Trace fix output and costs
|
|
426
|
+
traceOutput(session.name, issueNum, `fix-${attempt}`, fixResult.output);
|
|
427
|
+
stepCosts.push(buildStepCost('test_fix', issueNum, fixResult, config));
|
|
428
|
+
stepsCompleted.push(`fix-${attempt}`);
|
|
156
429
|
// Auto-commit fixes
|
|
157
430
|
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
158
431
|
if (fixStatus.stdout.trim()) {
|
|
159
432
|
exec('git add -A', { cwd: worktreePath });
|
|
160
433
|
exec(`git commit -m "fix(#${issueNum}): resolve test failures (attempt ${attempt})"`, { cwd: worktreePath });
|
|
161
434
|
}
|
|
435
|
+
// Capture fix diff
|
|
436
|
+
try {
|
|
437
|
+
const fixDiffResult = exec(`git diff "origin/${config.baseBranch}...HEAD"`, { cwd: worktreePath });
|
|
438
|
+
if (fixDiffResult.stdout)
|
|
439
|
+
traceDiff(session.name, issueNum, `fix-${attempt}`, fixDiffResult.stdout);
|
|
440
|
+
}
|
|
441
|
+
catch { /* non-fatal */ }
|
|
162
442
|
}
|
|
163
443
|
}
|
|
164
444
|
else {
|
|
@@ -166,95 +446,198 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
166
446
|
testOutput = `TESTS FAILED after ${config.maxTestRetries} fix attempts. Latest output:\n${testOutput}`;
|
|
167
447
|
}
|
|
168
448
|
}
|
|
169
|
-
// --- Step 6:
|
|
170
|
-
log.step('Step 6:
|
|
171
|
-
let
|
|
172
|
-
let
|
|
173
|
-
|
|
174
|
-
log.info(
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
449
|
+
// --- Step 6: Review gate (JSON-based) ---
|
|
450
|
+
log.step('Step 6: Code review');
|
|
451
|
+
let reviewOutput = '';
|
|
452
|
+
let reviewGate = DEFAULT_GATE;
|
|
453
|
+
if (config.skipReview) {
|
|
454
|
+
log.info('Code review skipped');
|
|
455
|
+
}
|
|
456
|
+
else if (config.dryRun) {
|
|
457
|
+
log.dry('Would run code review');
|
|
458
|
+
}
|
|
459
|
+
else {
|
|
460
|
+
const reviewFileInWorktree = join(worktreePath, `review-issue-${issueNum}.json`);
|
|
461
|
+
const reviewFileInSession = join(session.logsDir, `review-issue-${issueNum}.json`);
|
|
462
|
+
for (let attempt = 1; attempt <= config.maxTestRetries; attempt++) {
|
|
463
|
+
log.info(`Review attempt ${attempt} of ${config.maxTestRetries}`);
|
|
464
|
+
try {
|
|
465
|
+
const reviewPrompt = buildReviewPrompt({
|
|
466
|
+
issueNum,
|
|
467
|
+
title,
|
|
468
|
+
body,
|
|
469
|
+
baseBranch: config.baseBranch,
|
|
470
|
+
visionContext: loadFileIfExists(join(projectDir, '.alpha-loop', 'vision.md')) ?? undefined,
|
|
471
|
+
});
|
|
472
|
+
// Trace review prompt
|
|
473
|
+
tracePrompt(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewPrompt);
|
|
474
|
+
const reviewResult = await spawnAgent({
|
|
475
|
+
agent: config.agent,
|
|
476
|
+
model: config.reviewModel,
|
|
477
|
+
prompt: reviewPrompt,
|
|
478
|
+
cwd: worktreePath,
|
|
479
|
+
logFile: join(session.logsDir, `issue-${issueNum}-review${attempt > 1 ? `-${attempt}` : ''}.log`),
|
|
480
|
+
verbose: config.verbose,
|
|
481
|
+
});
|
|
482
|
+
// Trace review output and costs
|
|
483
|
+
traceOutput(session.name, issueNum, `review${attempt > 1 ? `-${attempt}` : ''}`, reviewResult.output);
|
|
484
|
+
stepCosts.push(buildStepCost('review', issueNum, reviewResult, config));
|
|
485
|
+
reviewOutput = reviewResult.output;
|
|
195
486
|
}
|
|
196
|
-
|
|
197
|
-
log.warn(
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
487
|
+
catch {
|
|
488
|
+
log.warn('Code review failed, continuing without review');
|
|
489
|
+
reviewOutput = 'Code review could not be completed';
|
|
490
|
+
break;
|
|
491
|
+
}
|
|
492
|
+
// Read the gate JSON
|
|
493
|
+
reviewGate = readGateResult(reviewFileInWorktree);
|
|
494
|
+
moveToSessionLogs(reviewFileInWorktree, reviewFileInSession);
|
|
495
|
+
if (reviewGate.passed) {
|
|
496
|
+
stepsCompleted.push('review');
|
|
497
|
+
log.success(`Review passed: ${reviewGate.summary || 'no issues found'}`);
|
|
498
|
+
break;
|
|
499
|
+
}
|
|
500
|
+
// Review found unfixed issues — loop back to implementer
|
|
501
|
+
const unfixedCount = reviewGate.findings.filter((f) => !f.fixed).length;
|
|
502
|
+
log.warn(`Review found ${unfixedCount} unfixed issue(s), sending back to implementer...`);
|
|
503
|
+
if (attempt < config.maxTestRetries) {
|
|
504
|
+
const findings = formatGateFindings(reviewGate, 'Code Review');
|
|
505
|
+
const fixPrompt = `The code review for issue #${issueNum} found problems that need to be fixed.\n\n${findings}\n\nInstructions:\n1. Address each finding listed above\n2. Run tests to make sure nothing is broken\n3. Commit your fixes with: git commit -m "fix(#${issueNum}): address review findings"`;
|
|
506
|
+
// Trace review-fix prompt
|
|
507
|
+
tracePrompt(session.name, issueNum, `review-fix-${attempt}`, fixPrompt);
|
|
508
|
+
const reviewFixResult = await spawnAgent({
|
|
509
|
+
agent: config.agent,
|
|
201
510
|
model: config.model,
|
|
202
|
-
prompt:
|
|
511
|
+
prompt: fixPrompt,
|
|
203
512
|
cwd: worktreePath,
|
|
204
|
-
|
|
513
|
+
resume: true,
|
|
514
|
+
logFile: join(session.logsDir, `issue-${issueNum}-review-fix-${attempt}.log`),
|
|
205
515
|
verbose: config.verbose,
|
|
206
516
|
});
|
|
517
|
+
// Trace review-fix output and costs
|
|
518
|
+
traceOutput(session.name, issueNum, `review-fix-${attempt}`, reviewFixResult.output);
|
|
519
|
+
stepCosts.push(buildStepCost('review', issueNum, reviewFixResult, config));
|
|
520
|
+
// Auto-commit if agent didn't
|
|
521
|
+
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
522
|
+
if (fixStatus.stdout.trim()) {
|
|
523
|
+
exec('git add -A', { cwd: worktreePath });
|
|
524
|
+
exec(`git commit -m "fix(#${issueNum}): address review findings (attempt ${attempt})"`, { cwd: worktreePath });
|
|
525
|
+
}
|
|
526
|
+
// Re-run tests before next review attempt
|
|
527
|
+
const retest = runTests(worktreePath, config, logFile);
|
|
528
|
+
if (!retest.passed) {
|
|
529
|
+
log.warn('Tests failed after review fixes — will be caught in final status');
|
|
530
|
+
testOutput = retest.output;
|
|
531
|
+
testsPassing = false;
|
|
532
|
+
}
|
|
207
533
|
}
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
if (fixStatus.stdout.trim()) {
|
|
211
|
-
exec('git add -A', { cwd: worktreePath });
|
|
212
|
-
exec(`git commit -m "fix(#${issueNum}): resolve verification failures (attempt ${attempt})"`, { cwd: worktreePath });
|
|
534
|
+
else {
|
|
535
|
+
log.warn(`Review still failing after ${config.maxTestRetries} attempts`);
|
|
213
536
|
}
|
|
214
537
|
}
|
|
215
|
-
else {
|
|
216
|
-
log.warn(`Verification still failing after ${config.maxTestRetries} attempts`);
|
|
217
|
-
}
|
|
218
538
|
}
|
|
219
|
-
// --- Step 7:
|
|
220
|
-
log.step('Step 7:
|
|
221
|
-
let
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
539
|
+
// --- Step 7: Verify gate (JSON-based) ---
|
|
540
|
+
log.step('Step 7: Live verification');
|
|
541
|
+
let verifyOutput = '';
|
|
542
|
+
let verifyPassing = false;
|
|
543
|
+
let verifySkipped = false;
|
|
544
|
+
if (!plan.verification.needed) {
|
|
545
|
+
log.info(`Verification skipped by plan: ${plan.verification.reason}`);
|
|
546
|
+
verifyPassing = true;
|
|
547
|
+
verifySkipped = true;
|
|
548
|
+
verifyOutput = `Verification skipped by plan: ${plan.verification.reason}`;
|
|
227
549
|
}
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
550
|
+
if (!verifySkipped && !config.dryRun) {
|
|
551
|
+
const verifyFileInWorktree = join(worktreePath, `verify-issue-${issueNum}.json`);
|
|
552
|
+
const verifyFileInSession = join(session.logsDir, `verify-issue-${issueNum}.json`);
|
|
553
|
+
for (let attempt = 1; attempt <= config.maxTestRetries; attempt++) {
|
|
554
|
+
log.info(`Verification attempt ${attempt} of ${config.maxTestRetries}`);
|
|
555
|
+
const verifyResult = await runVerify({
|
|
556
|
+
worktree: worktreePath,
|
|
557
|
+
logFile,
|
|
231
558
|
issueNum,
|
|
232
559
|
title,
|
|
233
560
|
body,
|
|
234
|
-
|
|
235
|
-
|
|
561
|
+
config,
|
|
562
|
+
sessionDir: session.resultsDir,
|
|
563
|
+
verifyInstructions: plan.verification.instructions,
|
|
236
564
|
});
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
565
|
+
verifyOutput = verifyResult.output;
|
|
566
|
+
// Trace verify output
|
|
567
|
+
traceVerify(session.name, issueNum, attempt, verifyOutput);
|
|
568
|
+
if (verifyResult.skipped) {
|
|
569
|
+
verifyPassing = true;
|
|
570
|
+
verifySkipped = true;
|
|
571
|
+
break;
|
|
572
|
+
}
|
|
573
|
+
// Read verify gate JSON (if the verify agent wrote one)
|
|
574
|
+
const verifyGate = readGateResult(verifyFileInWorktree);
|
|
575
|
+
moveToSessionLogs(verifyFileInWorktree, verifyFileInSession);
|
|
576
|
+
// Use gate JSON if available, otherwise fall back to runVerify's pass/fail
|
|
577
|
+
const passed = verifyGate !== DEFAULT_GATE ? verifyGate.passed : verifyResult.passed;
|
|
578
|
+
if (passed) {
|
|
579
|
+
verifyPassing = true;
|
|
580
|
+
stepsCompleted.push('verify');
|
|
581
|
+
log.success(`Verification passed on attempt ${attempt}`);
|
|
582
|
+
break;
|
|
583
|
+
}
|
|
584
|
+
if (attempt < config.maxTestRetries) {
|
|
585
|
+
const timedOut = verifyOutput.includes('[TIMEOUT]');
|
|
586
|
+
if (timedOut) {
|
|
587
|
+
log.warn(`Verification timed out on attempt ${attempt}, retrying...`);
|
|
588
|
+
}
|
|
589
|
+
else {
|
|
590
|
+
log.warn(`Verification failed on attempt ${attempt}, sending back to implementer...`);
|
|
591
|
+
// Use gate findings if available, otherwise use raw verify output
|
|
592
|
+
const findings = verifyGate !== DEFAULT_GATE
|
|
593
|
+
? formatGateFindings(verifyGate, 'Verification')
|
|
594
|
+
: `## Verification Findings (MUST FIX)\n\n${verifyOutput}`;
|
|
595
|
+
const fixPrompt = `Live verification failed for issue #${issueNum} (attempt ${attempt} of ${config.maxTestRetries}).\n\n${findings}\n\nInstructions:\n1. Read the verification findings and identify the ROOT CAUSE\n2. Fix ONLY code related to issue #${issueNum}\n3. Run tests to make sure nothing is broken\n4. Commit your fixes with: git commit -m "fix(#${issueNum}): address verification findings"`;
|
|
596
|
+
// Trace verify-fix prompt
|
|
597
|
+
tracePrompt(session.name, issueNum, `verify-fix-${attempt}`, fixPrompt);
|
|
598
|
+
const verifyFixResult = await spawnAgent({
|
|
599
|
+
agent: config.agent,
|
|
600
|
+
model: config.model,
|
|
601
|
+
prompt: fixPrompt,
|
|
602
|
+
cwd: worktreePath,
|
|
603
|
+
resume: true,
|
|
604
|
+
logFile: join(session.logsDir, `issue-${issueNum}-verify-fix-${attempt}.log`),
|
|
605
|
+
verbose: config.verbose,
|
|
606
|
+
});
|
|
607
|
+
// Trace verify-fix output and costs
|
|
608
|
+
traceOutput(session.name, issueNum, `verify-fix-${attempt}`, verifyFixResult.output);
|
|
609
|
+
stepCosts.push(buildStepCost('verify', issueNum, verifyFixResult, config));
|
|
610
|
+
// Auto-commit if agent didn't
|
|
611
|
+
const fixStatus = exec('git status --porcelain', { cwd: worktreePath });
|
|
612
|
+
if (fixStatus.stdout.trim()) {
|
|
613
|
+
exec('git add -A', { cwd: worktreePath });
|
|
614
|
+
exec(`git commit -m "fix(#${issueNum}): address verification findings (attempt ${attempt})"`, { cwd: worktreePath });
|
|
615
|
+
}
|
|
616
|
+
// Re-run tests before next verify attempt
|
|
617
|
+
const retest = runTests(worktreePath, config, logFile);
|
|
618
|
+
if (!retest.passed) {
|
|
619
|
+
log.warn('Tests failed after verify fixes');
|
|
620
|
+
testOutput = retest.output;
|
|
621
|
+
testsPassing = false;
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
else {
|
|
626
|
+
log.warn(`Verification still failing after ${config.maxTestRetries} attempts`);
|
|
627
|
+
}
|
|
250
628
|
}
|
|
251
629
|
}
|
|
630
|
+
else if (config.dryRun && !verifySkipped) {
|
|
631
|
+
log.dry('Would run live verification');
|
|
632
|
+
verifyPassing = true;
|
|
633
|
+
verifySkipped = true;
|
|
634
|
+
}
|
|
252
635
|
// --- Step 8: Create PR ---
|
|
253
636
|
log.step('Step 8: Creating PR');
|
|
254
637
|
let prUrl;
|
|
255
638
|
if (!config.dryRun) {
|
|
256
639
|
const prBase = config.autoMerge ? session.branch : config.baseBranch;
|
|
257
|
-
const prBody = buildPRBody(issueNum, title,
|
|
640
|
+
const prBody = buildPRBody(issueNum, title, reviewGate, testOutput, testsPassing, verifyPassing, verifySkipped, body);
|
|
258
641
|
try {
|
|
259
642
|
prUrl = createPR({
|
|
260
643
|
repo: config.repo,
|
|
@@ -264,6 +647,7 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
264
647
|
body: prBody,
|
|
265
648
|
cwd: worktreePath,
|
|
266
649
|
});
|
|
650
|
+
stepsCompleted.push('pr');
|
|
267
651
|
log.success(`PR created: ${prUrl}`);
|
|
268
652
|
}
|
|
269
653
|
catch (err) {
|
|
@@ -286,24 +670,86 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
286
670
|
const diffResult = exec(`git diff "origin/${config.baseBranch}...HEAD"`, { cwd: worktreePath });
|
|
287
671
|
runDiff = diffResult.stdout.slice(0, MAX_DIFF_CHARS);
|
|
288
672
|
}
|
|
673
|
+
// Format review gate for learnings
|
|
674
|
+
const reviewForLearnings = reviewGate.findings.length > 0
|
|
675
|
+
? `Review: ${reviewGate.summary}\n${reviewGate.findings.map((f) => `- [${f.severity}] ${f.description} (${f.fixed ? 'fixed' : 'unfixed'})`).join('\n')}`
|
|
676
|
+
: `Review: ${reviewGate.summary || 'passed'}`;
|
|
289
677
|
await extractLearnings({
|
|
290
678
|
issueNum,
|
|
291
679
|
title,
|
|
292
680
|
status: testsPassing ? 'success' : 'failure',
|
|
293
|
-
retries:
|
|
681
|
+
retries: testRetries,
|
|
294
682
|
duration,
|
|
295
683
|
diff: runDiff,
|
|
296
684
|
testOutput,
|
|
297
|
-
reviewOutput,
|
|
685
|
+
reviewOutput: reviewForLearnings,
|
|
298
686
|
verifyOutput,
|
|
299
687
|
body,
|
|
300
688
|
config,
|
|
301
689
|
});
|
|
690
|
+
// --- Step 9b: Write full traces (Meta-Harness style) ---
|
|
691
|
+
stepsCompleted.push('learn');
|
|
692
|
+
const filesChanged = runDiff ? (runDiff.match(/^diff --git/gm) ?? []).length : 0;
|
|
693
|
+
if (!config.dryRun) {
|
|
694
|
+
try {
|
|
695
|
+
// Per-issue metadata (backward compat)
|
|
696
|
+
writeTraceMetadata(session.name, issueNum, {
|
|
697
|
+
issueNum,
|
|
698
|
+
title,
|
|
699
|
+
status: testsPassing ? 'success' : 'failure',
|
|
700
|
+
duration,
|
|
701
|
+
retries: testRetries,
|
|
702
|
+
testsPassing,
|
|
703
|
+
verifyPassing,
|
|
704
|
+
verifySkipped,
|
|
705
|
+
filesChanged,
|
|
706
|
+
prUrl,
|
|
707
|
+
timestamp: new Date().toISOString(),
|
|
708
|
+
agent: config.agent,
|
|
709
|
+
model: config.model,
|
|
710
|
+
});
|
|
711
|
+
if (runDiff)
|
|
712
|
+
writeTrace(session.name, issueNum, 'diff.patch', runDiff);
|
|
713
|
+
if (testOutput)
|
|
714
|
+
writeTrace(session.name, issueNum, 'test-output.txt', testOutput);
|
|
715
|
+
if (reviewForLearnings)
|
|
716
|
+
writeTrace(session.name, issueNum, 'review-output.json', reviewForLearnings);
|
|
717
|
+
if (verifyOutput)
|
|
718
|
+
writeTrace(session.name, issueNum, 'verify-output.json', verifyOutput);
|
|
719
|
+
if (plan.summary)
|
|
720
|
+
writeTrace(session.name, issueNum, 'plan.json', JSON.stringify(plan, null, 2));
|
|
721
|
+
// Config snapshot (written once per run, idempotent)
|
|
722
|
+
try {
|
|
723
|
+
const configPath = join(projectDir, '.alpha-loop.yaml');
|
|
724
|
+
if (existsSync(configPath)) {
|
|
725
|
+
writeConfigSnapshot(session.name, readFileSync(configPath, 'utf-8'));
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
catch { /* non-fatal */ }
|
|
729
|
+
// Run-level scores and costs for this issue
|
|
730
|
+
const issueScoreResult = {
|
|
731
|
+
issueNum,
|
|
732
|
+
status: testsPassing ? 'success' : 'failure',
|
|
733
|
+
testsPassing,
|
|
734
|
+
verifyPassing,
|
|
735
|
+
verifySkipped,
|
|
736
|
+
retries: testRetries,
|
|
737
|
+
duration,
|
|
738
|
+
filesChanged,
|
|
739
|
+
stepsCompleted,
|
|
740
|
+
};
|
|
741
|
+
writeScores(session.name, computeScores([issueScoreResult]));
|
|
742
|
+
writeCosts(session.name, computeCosts(stepCosts));
|
|
743
|
+
}
|
|
744
|
+
catch (err) {
|
|
745
|
+
log.warn(`Failed to write traces for #${issueNum}: ${err}`);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
302
748
|
// --- Step 10: Update issue status ---
|
|
303
749
|
log.step('Step 10: Updating issue status');
|
|
304
750
|
if (!config.dryRun) {
|
|
305
751
|
const testsStatus = testsPassing ? 'PASSING' : 'FAILING';
|
|
306
|
-
updateProjectStatus(config.repo, config.project, config.repoOwner, issueNum, '
|
|
752
|
+
updateProjectStatus(config.repo, config.project, config.repoOwner, issueNum, 'In Review');
|
|
307
753
|
labelIssue(config.repo, issueNum, 'in-review', 'in-progress');
|
|
308
754
|
commentIssue(config.repo, issueNum, `Automated implementation complete.\n\n**PR**: ${prUrl ?? 'N/A'}\n**Tests**: ${testsStatus}\n**Review**: Attached to PR body.\n\n---\n*Processed by alpha-loop in ${duration}s*`);
|
|
309
755
|
}
|
|
@@ -338,11 +784,6 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
338
784
|
autoCleanup: config.autoCleanup,
|
|
339
785
|
dryRun: config.dryRun,
|
|
340
786
|
});
|
|
341
|
-
// Count files changed
|
|
342
|
-
let filesChanged = 0;
|
|
343
|
-
if (runDiff) {
|
|
344
|
-
filesChanged = (runDiff.match(/^diff --git/gm) ?? []).length;
|
|
345
|
-
}
|
|
346
787
|
const result = {
|
|
347
788
|
issueNum,
|
|
348
789
|
title,
|
|
@@ -350,6 +791,7 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
350
791
|
prUrl,
|
|
351
792
|
testsPassing,
|
|
352
793
|
verifyPassing,
|
|
794
|
+
verifySkipped,
|
|
353
795
|
duration,
|
|
354
796
|
filesChanged,
|
|
355
797
|
};
|
|
@@ -360,17 +802,30 @@ export async function processIssue(issueNum, title, body, config, session) {
|
|
|
360
802
|
log.info(`PR: ${prUrl}`);
|
|
361
803
|
return result;
|
|
362
804
|
}
|
|
363
|
-
function failureResult(issueNum, title, startTime) {
|
|
805
|
+
function failureResult(issueNum, title, startTime, reason) {
|
|
364
806
|
return {
|
|
365
807
|
issueNum,
|
|
366
808
|
title,
|
|
367
809
|
status: 'failure',
|
|
810
|
+
failureReason: reason,
|
|
368
811
|
testsPassing: false,
|
|
369
812
|
verifyPassing: false,
|
|
813
|
+
verifySkipped: false,
|
|
370
814
|
duration: Math.round((Date.now() - startTime) / 1000),
|
|
371
815
|
filesChanged: 0,
|
|
372
816
|
};
|
|
373
817
|
}
|
|
818
|
+
/**
|
|
819
|
+
* Re-queue an issue back to ready state after a transient failure.
|
|
820
|
+
* Restores the label to ready and project status to Todo.
|
|
821
|
+
*/
|
|
822
|
+
function requeueIssue(config, issueNum) {
|
|
823
|
+
if (config.dryRun)
|
|
824
|
+
return;
|
|
825
|
+
labelIssue(config.repo, issueNum, config.labelReady, 'in-progress');
|
|
826
|
+
updateProjectStatus(config.repo, config.project, config.repoOwner, issueNum, 'Todo');
|
|
827
|
+
log.info(`Issue #${issueNum} re-queued for next run`);
|
|
828
|
+
}
|
|
374
829
|
function loadFileIfExists(filePath) {
|
|
375
830
|
if (!existsSync(filePath))
|
|
376
831
|
return null;
|
|
@@ -381,57 +836,57 @@ function loadFileIfExists(filePath) {
|
|
|
381
836
|
return null;
|
|
382
837
|
}
|
|
383
838
|
}
|
|
384
|
-
/**
|
|
385
|
-
* Extract just the review summary from the full agent output.
|
|
386
|
-
* Looks for the structured report section the reviewer agent produces.
|
|
387
|
-
*/
|
|
388
|
-
function extractReviewSummary(reviewOutput) {
|
|
389
|
-
if (!reviewOutput)
|
|
390
|
-
return 'No review available';
|
|
391
|
-
// Look for the structured review report (reviewer agent outputs this format)
|
|
392
|
-
const patterns = [
|
|
393
|
-
/### Review Summary[\s\S]*$/m,
|
|
394
|
-
/### Findings Fixed[\s\S]*$/m,
|
|
395
|
-
/## Review Report[\s\S]*$/m,
|
|
396
|
-
/\*\*Verdict:.*$/m,
|
|
397
|
-
];
|
|
398
|
-
for (const pattern of patterns) {
|
|
399
|
-
const match = reviewOutput.match(pattern);
|
|
400
|
-
if (match)
|
|
401
|
-
return match[0].trim();
|
|
402
|
-
}
|
|
403
|
-
// Fallback: take the last 500 chars which usually has the summary
|
|
404
|
-
const lines = reviewOutput.trim().split('\n');
|
|
405
|
-
const lastLines = lines.slice(-20).join('\n');
|
|
406
|
-
if (lastLines.length > 0)
|
|
407
|
-
return lastLines;
|
|
408
|
-
return 'Review completed — see logs for details';
|
|
409
|
-
}
|
|
410
839
|
/**
|
|
411
840
|
* Extract a one-line test summary from raw test output.
|
|
412
|
-
*
|
|
841
|
+
* Aggregates results across multiple test runners (pytest, Jest, Vitest).
|
|
842
|
+
* Handles concurrent output like: [pytest] 189 passed, [frontend] Tests 6 passed, etc.
|
|
413
843
|
*/
|
|
414
844
|
function extractTestSummary(testOutput) {
|
|
415
845
|
if (!testOutput)
|
|
416
846
|
return '';
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
//
|
|
422
|
-
const
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
847
|
+
let totalPassed = 0;
|
|
848
|
+
let totalFailed = 0;
|
|
849
|
+
let totalSkipped = 0;
|
|
850
|
+
// Pytest summary line: "189 passed, 1 skipped in 7.05s" or "5 failed, 184 passed"
|
|
851
|
+
// Match the "=== ... ===" summary line format
|
|
852
|
+
for (const match of testOutput.matchAll(/=+\s*(.*?)\s*=+/g)) {
|
|
853
|
+
const line = match[1];
|
|
854
|
+
const passed = line.match(/(\d+) passed/);
|
|
855
|
+
const failed = line.match(/(\d+) failed/);
|
|
856
|
+
const skipped = line.match(/(\d+) skipped/);
|
|
857
|
+
if (passed)
|
|
858
|
+
totalPassed += parseInt(passed[1], 10);
|
|
859
|
+
if (failed)
|
|
860
|
+
totalFailed += parseInt(failed[1], 10);
|
|
861
|
+
if (skipped)
|
|
862
|
+
totalSkipped += parseInt(skipped[1], 10);
|
|
863
|
+
}
|
|
864
|
+
// Jest summary: "Tests: 30 passed, 30 total" or "Tests: 2 failed, 28 passed, 30 total"
|
|
865
|
+
for (const match of testOutput.matchAll(/Tests:\s+(?:(\d+) failed,\s+)?(\d+) passed/g)) {
|
|
866
|
+
if (match[1])
|
|
867
|
+
totalFailed += parseInt(match[1], 10);
|
|
868
|
+
totalPassed += parseInt(match[2], 10);
|
|
869
|
+
}
|
|
870
|
+
// Vitest summary: "Tests 6 passed (6)" — uses spaces not colon, has parens
|
|
871
|
+
for (const match of testOutput.matchAll(/Tests\s+(?:(\d+) failed\s+)?(\d+) passed\s+\(\d+\)/g)) {
|
|
872
|
+
if (match[1])
|
|
873
|
+
totalFailed += parseInt(match[1], 10);
|
|
874
|
+
totalPassed += parseInt(match[2], 10);
|
|
875
|
+
}
|
|
876
|
+
if (totalPassed === 0 && totalFailed === 0)
|
|
877
|
+
return '';
|
|
878
|
+
const parts = [];
|
|
879
|
+
parts.push(`${totalPassed} passed`);
|
|
880
|
+
if (totalFailed > 0)
|
|
881
|
+
parts.push(`${totalFailed} failed`);
|
|
882
|
+
if (totalSkipped > 0)
|
|
883
|
+
parts.push(`${totalSkipped} skipped`);
|
|
884
|
+
return parts.join(', ');
|
|
431
885
|
}
|
|
432
|
-
function buildPRBody(issueNum, title,
|
|
886
|
+
function buildPRBody(issueNum, title, reviewGate, testOutput, testsPassing, verifyPassing, verifySkipped, body) {
|
|
433
887
|
const testSummary = extractTestSummary(testOutput);
|
|
434
|
-
const
|
|
888
|
+
const verifyStatus = verifySkipped ? 'SKIPPED' : verifyPassing ? 'PASS' : 'FAIL';
|
|
889
|
+
const reviewStatus = reviewGate.passed ? 'PASS' : 'FAIL';
|
|
435
890
|
const lines = [
|
|
436
891
|
`Closes #${issueNum}`,
|
|
437
892
|
'',
|
|
@@ -444,14 +899,28 @@ function buildPRBody(issueNum, title, reviewOutput, testOutput, testsPassing, ve
|
|
|
444
899
|
`| Check | Status |`,
|
|
445
900
|
`|-------|--------|`,
|
|
446
901
|
`| Unit tests | ${testsPassing ? 'PASS' : 'FAIL'} |`,
|
|
447
|
-
`|
|
|
902
|
+
`| Code review | ${reviewStatus} |`,
|
|
903
|
+
`| Verification | ${verifyStatus} |`,
|
|
448
904
|
];
|
|
449
905
|
if (testSummary) {
|
|
450
906
|
lines.push(`| Details | ${testSummary} |`);
|
|
451
907
|
}
|
|
452
908
|
lines.push('');
|
|
453
|
-
// Code review —
|
|
454
|
-
|
|
909
|
+
// Code review — structured from gate result
|
|
910
|
+
if (reviewGate.findings.length > 0) {
|
|
911
|
+
lines.push('## Code Review', '');
|
|
912
|
+
lines.push(reviewGate.summary || 'Review completed');
|
|
913
|
+
lines.push('');
|
|
914
|
+
for (const f of reviewGate.findings) {
|
|
915
|
+
const status = f.fixed ? 'FIXED' : 'OPEN';
|
|
916
|
+
const fileRef = f.file ? ` \`${f.file}\`` : '';
|
|
917
|
+
lines.push(`- **${f.severity.toUpperCase()}** [${status}]${fileRef}: ${f.description}`);
|
|
918
|
+
}
|
|
919
|
+
lines.push('');
|
|
920
|
+
}
|
|
921
|
+
else {
|
|
922
|
+
lines.push('## Code Review', '', reviewGate.summary || 'No issues found', '');
|
|
923
|
+
}
|
|
455
924
|
// What to test — from issue body or generic
|
|
456
925
|
const whatToTestMatch = body.match(/## Test Requirements[\s\S]*?(?=\n## |$)/);
|
|
457
926
|
if (whatToTestMatch) {
|