@jhlee0619/codexloop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +34 -0
- package/.claude-plugin/plugin.json +8 -0
- package/.codex-plugin/plugin.json +38 -0
- package/LICENSE +21 -0
- package/README.md +425 -0
- package/assets/banner.png +0 -0
- package/bin/cloop +45 -0
- package/commands/iterate.md +25 -0
- package/commands/model.md +33 -0
- package/commands/result.md +17 -0
- package/commands/start.md +188 -0
- package/commands/status.md +10 -0
- package/commands/stop.md +12 -0
- package/package.json +60 -0
- package/prompts/evaluate.md +91 -0
- package/prompts/rank.md +97 -0
- package/prompts/suggest.md +69 -0
- package/schemas/evaluation.schema.json +65 -0
- package/schemas/loop-state.schema.json +103 -0
- package/schemas/proposal.schema.json +74 -0
- package/schemas/ranking.schema.json +77 -0
- package/scripts/lib/apply.mjs +254 -0
- package/scripts/lib/args.mjs +202 -0
- package/scripts/lib/codex-exec.mjs +318 -0
- package/scripts/lib/convergence.mjs +153 -0
- package/scripts/lib/iteration.mjs +484 -0
- package/scripts/lib/process.mjs +164 -0
- package/scripts/lib/prompts.mjs +53 -0
- package/scripts/lib/rank.mjs +149 -0
- package/scripts/lib/render.mjs +240 -0
- package/scripts/lib/state.mjs +378 -0
- package/scripts/lib/validate.mjs +71 -0
- package/scripts/lib/workspace.mjs +49 -0
- package/scripts/loop-companion.mjs +849 -0
- package/skills/cloop/SKILL.md +177 -0
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
// Single-iteration orchestration: evaluate → suggest → rank → apply → validate → record.
|
|
2
|
+
//
|
|
3
|
+
// This is where the six-step loop lives. `runIteration` is called either
|
|
4
|
+
// synchronously by /cloop:iterate or in a loop by the background worker
|
|
5
|
+
// (/cloop:start --background). It loads state from disk, runs the six steps,
|
|
6
|
+
// persists state, and returns the full iteration record.
|
|
7
|
+
|
|
8
|
+
import process from "node:process";
|
|
9
|
+
|
|
10
|
+
import { codexCall, CodexError } from "./codex-exec.mjs";
|
|
11
|
+
import { runCommand } from "./process.mjs";
|
|
12
|
+
import {
|
|
13
|
+
loadState,
|
|
14
|
+
saveState,
|
|
15
|
+
writeIterationFile,
|
|
16
|
+
writeProposalFile,
|
|
17
|
+
appendProgressLog,
|
|
18
|
+
readProgressLogTail,
|
|
19
|
+
computeGoalHash
|
|
20
|
+
} from "./state.mjs";
|
|
21
|
+
import { recomputeWinner } from "./rank.mjs";
|
|
22
|
+
import { computeQualityScore, checkStopping } from "./convergence.mjs";
|
|
23
|
+
import { applyPatch, rollbackToSha } from "./apply.mjs";
|
|
24
|
+
import { runValidation } from "./validate.mjs";
|
|
25
|
+
|
|
26
|
+
const DEFAULT_PROPOSAL_COUNT = 3;
|
|
27
|
+
|
|
28
|
+
function nowIso() {
|
|
29
|
+
return new Date().toISOString();
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function newIterationRecord(state, { dryRun }) {
|
|
33
|
+
const index = (state.iterations?.length ?? 0) + 1;
|
|
34
|
+
return {
|
|
35
|
+
index,
|
|
36
|
+
startedAt: nowIso(),
|
|
37
|
+
completedAt: null,
|
|
38
|
+
evaluate: null,
|
|
39
|
+
proposals: null,
|
|
40
|
+
ranking: null,
|
|
41
|
+
acceptedProposalId: null,
|
|
42
|
+
rejectionReasons: null,
|
|
43
|
+
apply: null,
|
|
44
|
+
validate: null,
|
|
45
|
+
qualityScore: null,
|
|
46
|
+
qualityDelta: null,
|
|
47
|
+
rationale: null,
|
|
48
|
+
confidence: null,
|
|
49
|
+
remainingIssues: null,
|
|
50
|
+
nextStep: null,
|
|
51
|
+
stopReason: null,
|
|
52
|
+
error: null,
|
|
53
|
+
codexUsage: {
|
|
54
|
+
evaluate: null,
|
|
55
|
+
suggest: null,
|
|
56
|
+
rank: null,
|
|
57
|
+
totals: { inputTokens: 0, cachedInputTokens: 0, outputTokens: 0 }
|
|
58
|
+
},
|
|
59
|
+
dryRun: !!dryRun
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function accumulateUsage(totals, usage) {
|
|
64
|
+
if (!usage) return;
|
|
65
|
+
totals.inputTokens += usage.inputTokens ?? 0;
|
|
66
|
+
totals.cachedInputTokens += usage.cachedInputTokens ?? 0;
|
|
67
|
+
totals.outputTokens += usage.outputTokens ?? 0;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function gitDiffSince(cwd, sinceSha) {
|
|
71
|
+
if (!sinceSha) return "";
|
|
72
|
+
const res = runCommand("git", ["diff", `${sinceSha}..HEAD`, "--", "."], {
|
|
73
|
+
cwd,
|
|
74
|
+
maxBuffer: 32 * 1024 * 1024
|
|
75
|
+
});
|
|
76
|
+
if (res.status !== 0) return "";
|
|
77
|
+
const diff = res.stdout || "";
|
|
78
|
+
// Keep it bounded so the prompt does not blow up.
|
|
79
|
+
if (diff.length > 20_000) {
|
|
80
|
+
return `${diff.slice(0, 20_000)}\n\n... [truncated: ${diff.length - 20_000} more chars]`;
|
|
81
|
+
}
|
|
82
|
+
return diff;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function ensureGoalHash(state) {
|
|
86
|
+
if (!state.goal?.goalHash) {
|
|
87
|
+
state.goal.goalHash = computeGoalHash(state.goal ?? {});
|
|
88
|
+
}
|
|
89
|
+
return state.goal.goalHash;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function assertGoalImmutable(state, expectedHash) {
|
|
93
|
+
const currentHash = computeGoalHash(state.goal ?? {});
|
|
94
|
+
if (expectedHash && currentHash !== expectedHash) {
|
|
95
|
+
throw new Error(
|
|
96
|
+
`goal hash mismatch (expected ${expectedHash}, got ${currentHash}) — goal text may have drifted`
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function summaryForState(iteration) {
|
|
102
|
+
return {
|
|
103
|
+
index: iteration.index,
|
|
104
|
+
startedAt: iteration.startedAt,
|
|
105
|
+
completedAt: iteration.completedAt,
|
|
106
|
+
qualityScore: iteration.qualityScore,
|
|
107
|
+
qualityDelta: iteration.qualityDelta,
|
|
108
|
+
acceptedProposalId: iteration.acceptedProposalId,
|
|
109
|
+
evaluate: iteration.evaluate
|
|
110
|
+
? {
|
|
111
|
+
verdict: iteration.evaluate.verdict,
|
|
112
|
+
distanceFromGoal: iteration.evaluate.distanceFromGoal,
|
|
113
|
+
openIssueCount: iteration.evaluate.openIssues?.length ?? 0,
|
|
114
|
+
passingTests: iteration.evaluate.passingTests ?? null,
|
|
115
|
+
failingTests: iteration.evaluate.failingTests ?? null,
|
|
116
|
+
completionClaim: iteration.evaluate.completionClaim ?? false
|
|
117
|
+
}
|
|
118
|
+
: null,
|
|
119
|
+
validate: iteration.validate
|
|
120
|
+
? {
|
|
121
|
+
passed: iteration.validate.passed,
|
|
122
|
+
regression: iteration.validate.regression ?? false,
|
|
123
|
+
skipped: iteration.validate.skipped ?? null
|
|
124
|
+
}
|
|
125
|
+
: null,
|
|
126
|
+
apply: iteration.apply
|
|
127
|
+
? {
|
|
128
|
+
applied: iteration.apply.applied ?? false,
|
|
129
|
+
empty: iteration.apply.empty ?? false,
|
|
130
|
+
skipped: iteration.apply.skipped ?? null,
|
|
131
|
+
preSha: iteration.apply.preSha ?? null,
|
|
132
|
+
postSha: iteration.apply.postSha ?? null,
|
|
133
|
+
filesTouched: iteration.apply.filesTouched ?? [],
|
|
134
|
+
rolledBack: iteration.apply.rolledBack ?? false,
|
|
135
|
+
error: iteration.apply.error ?? null
|
|
136
|
+
}
|
|
137
|
+
: null,
|
|
138
|
+
ranking: iteration.ranking
|
|
139
|
+
? {
|
|
140
|
+
winner: iteration.ranking.winner,
|
|
141
|
+
tiebreaker: iteration.ranking.tiebreaker ?? null,
|
|
142
|
+
disagreement: iteration.ranking.disagreement ?? false,
|
|
143
|
+
judgeWinnerId: iteration.ranking.judgeWinnerId ?? null
|
|
144
|
+
}
|
|
145
|
+
: null,
|
|
146
|
+
stopReason: iteration.stopReason,
|
|
147
|
+
error: iteration.error,
|
|
148
|
+
dryRun: iteration.dryRun ?? false
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function finalizeIteration(state, iteration, { repoRoot }) {
|
|
153
|
+
iteration.completedAt = nowIso();
|
|
154
|
+
|
|
155
|
+
writeIterationFile(repoRoot, iteration, { dryRun: iteration.dryRun });
|
|
156
|
+
|
|
157
|
+
state.iterations.push(summaryForState(iteration));
|
|
158
|
+
// Budget counts dry-run too: dry-run still burns real Codex calls and time.
|
|
159
|
+
state.budget.consumed.iterations = (state.budget.consumed.iterations ?? 0) + 1;
|
|
160
|
+
const startedAtMs = state.budget.consumed.startedAtMs ?? Date.now();
|
|
161
|
+
state.budget.consumed.elapsedMs = Date.now() - startedAtMs;
|
|
162
|
+
state.convergence.scoreHistory.push(iteration.qualityScore ?? 0);
|
|
163
|
+
state.lastIterationAt = iteration.completedAt;
|
|
164
|
+
|
|
165
|
+
if (iteration.acceptedProposalId) {
|
|
166
|
+
state.accepted.push(`${iteration.index}-${iteration.acceptedProposalId}`);
|
|
167
|
+
}
|
|
168
|
+
for (const [id, reason] of Object.entries(iteration.rejectionReasons ?? {})) {
|
|
169
|
+
state.rejected.push({ id: `${iteration.index}-${id}`, reason });
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (Array.isArray(iteration.evaluate?.openIssues)) {
|
|
173
|
+
state.openIssues = iteration.evaluate.openIssues;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const applyTag = iteration.apply?.applied
|
|
177
|
+
? "applied"
|
|
178
|
+
: iteration.apply?.empty
|
|
179
|
+
? "empty"
|
|
180
|
+
: iteration.apply?.skipped
|
|
181
|
+
? `skipped(${iteration.apply.skipped})`
|
|
182
|
+
: iteration.apply?.error
|
|
183
|
+
? "fail"
|
|
184
|
+
: "none";
|
|
185
|
+
const validateTag = iteration.validate?.skipped
|
|
186
|
+
? `skipped(${iteration.validate.skipped})`
|
|
187
|
+
: iteration.validate?.passed === true
|
|
188
|
+
? "pass"
|
|
189
|
+
: iteration.validate?.passed === false
|
|
190
|
+
? (iteration.validate.regression ? "fail(regression)" : "fail")
|
|
191
|
+
: "n/a";
|
|
192
|
+
appendProgressLog(
|
|
193
|
+
repoRoot,
|
|
194
|
+
`[iter ${iteration.index}${iteration.dryRun ? " DRY" : ""}] verdict=${iteration.evaluate?.verdict ?? "?"} ` +
|
|
195
|
+
`proposals=${iteration.proposals?.length ?? 0} winner=${iteration.acceptedProposalId ?? "?"} ` +
|
|
196
|
+
`apply=${applyTag} validate=${validateTag} q=${(iteration.qualityScore ?? 0).toFixed(3)} ` +
|
|
197
|
+
`(Δ ${(iteration.qualityDelta ?? 0).toFixed(3)})`
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Core runIteration. Loads state, runs the six steps, persists, returns.
|
|
202
|
+
export async function runIteration({ repoRoot, options = {} }) {
|
|
203
|
+
let state = loadState(repoRoot);
|
|
204
|
+
|
|
205
|
+
if (state.budget.consumed.startedAtMs == null) {
|
|
206
|
+
state.budget.consumed.startedAtMs = Date.now();
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const goalHash = ensureGoalHash(state);
|
|
210
|
+
|
|
211
|
+
const iteration = newIterationRecord(state, { dryRun: options.dryRun });
|
|
212
|
+
|
|
213
|
+
try {
|
|
214
|
+
assertGoalImmutable(state, goalHash);
|
|
215
|
+
|
|
216
|
+
const diffSinceSeed = gitDiffSince(repoRoot, state.goal?.seedCommit);
|
|
217
|
+
const progressTail = readProgressLogTail(repoRoot, { lines: 30 });
|
|
218
|
+
|
|
219
|
+
const recentIterations = (state.iterations ?? []).slice(-3).map((i) => ({
|
|
220
|
+
index: i.index,
|
|
221
|
+
qualityScore: i.qualityScore,
|
|
222
|
+
acceptedProposalId: i.acceptedProposalId,
|
|
223
|
+
verdict: i.evaluate?.verdict,
|
|
224
|
+
openIssueCount: i.evaluate?.openIssueCount,
|
|
225
|
+
validatePassed: i.validate?.passed,
|
|
226
|
+
stopReason: i.stopReason
|
|
227
|
+
}));
|
|
228
|
+
|
|
229
|
+
const commonVars = {
|
|
230
|
+
GOAL: state.goal?.text ?? "(unspecified)",
|
|
231
|
+
ACCEPTANCE_CRITERIA:
|
|
232
|
+
Array.isArray(state.goal?.acceptanceCriteria) && state.goal.acceptanceCriteria.length > 0
|
|
233
|
+
? state.goal.acceptanceCriteria.map((c) => `- ${c}`).join("\n")
|
|
234
|
+
: "(none)",
|
|
235
|
+
ITERATION_INDEX: iteration.index,
|
|
236
|
+
MAX_ITERATIONS: state.budget?.maxIterations ?? 20,
|
|
237
|
+
LAST_ITERATIONS: recentIterations.length > 0 ? recentIterations : "(none)",
|
|
238
|
+
PROGRESS_LOG_TAIL: progressTail || "(none)",
|
|
239
|
+
OPEN_ISSUES:
|
|
240
|
+
Array.isArray(state.openIssues) && state.openIssues.length > 0
|
|
241
|
+
? state.openIssues
|
|
242
|
+
: "(none)",
|
|
243
|
+
DIFF_SINCE_SEED: diffSinceSeed || "(none)",
|
|
244
|
+
CURRENT_CHECK_STATE: "(none)"
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
const codexOpts = {
|
|
248
|
+
cwd: repoRoot,
|
|
249
|
+
timeoutMs: options.codexTimeoutMs ?? 300_000,
|
|
250
|
+
retries: 1,
|
|
251
|
+
model: options.model ?? state.model ?? null,
|
|
252
|
+
reasoningEffort: options.reasoningEffort ?? state.reasoningEffort ?? null
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
// ── 1. EVALUATE ────────────────────────────────────────────────
|
|
256
|
+
const evalResult = await codexCall("evaluate", commonVars, codexOpts);
|
|
257
|
+
iteration.evaluate = evalResult.data;
|
|
258
|
+
iteration.codexUsage.evaluate = evalResult.usage;
|
|
259
|
+
accumulateUsage(iteration.codexUsage.totals, evalResult.usage);
|
|
260
|
+
state.budget.consumed.codexCalls += 1;
|
|
261
|
+
|
|
262
|
+
if ((state.openIssuesInitial ?? 0) === 0 && Array.isArray(iteration.evaluate?.openIssues)) {
|
|
263
|
+
state.openIssuesInitial = iteration.evaluate.openIssues.length;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Early exit: reviewer declares goal met.
|
|
267
|
+
if (iteration.evaluate?.verdict === "goal-met") {
|
|
268
|
+
if (!options.dryRun) {
|
|
269
|
+
iteration.validate = await runValidation({
|
|
270
|
+
cwd: repoRoot,
|
|
271
|
+
goal: state.goal,
|
|
272
|
+
previousIteration: state.iterations[state.iterations.length - 1]
|
|
273
|
+
});
|
|
274
|
+
} else {
|
|
275
|
+
iteration.validate = { passed: null, regression: false, commands: [], skipped: "dry-run" };
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const q = computeQualityScore(iteration, state);
|
|
279
|
+
iteration.qualityScore = q.score;
|
|
280
|
+
const prev = state.iterations[state.iterations.length - 1];
|
|
281
|
+
iteration.qualityDelta = iteration.qualityScore - (prev?.qualityScore ?? 0);
|
|
282
|
+
|
|
283
|
+
if (iteration.validate?.passed !== false) {
|
|
284
|
+
iteration.stopReason = "goal-met";
|
|
285
|
+
state.stopReason = "goal-met";
|
|
286
|
+
state.status = "completed";
|
|
287
|
+
state.completedAt = nowIso();
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
finalizeIteration(state, iteration, { repoRoot });
|
|
291
|
+
saveState(repoRoot, state);
|
|
292
|
+
return { state, iteration };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// ── 2. SUGGEST ─────────────────────────────────────────────────
|
|
296
|
+
const recentRejections = (state.rejected ?? []).slice(-5);
|
|
297
|
+
const suggestResult = await codexCall(
|
|
298
|
+
"suggest",
|
|
299
|
+
{
|
|
300
|
+
...commonVars,
|
|
301
|
+
EVALUATION: iteration.evaluate,
|
|
302
|
+
N_PROPOSALS: options.nProposals ?? DEFAULT_PROPOSAL_COUNT,
|
|
303
|
+
RECENT_REJECTIONS: recentRejections.length > 0 ? recentRejections : "(none)"
|
|
304
|
+
},
|
|
305
|
+
codexOpts
|
|
306
|
+
);
|
|
307
|
+
iteration.codexUsage.suggest = suggestResult.usage;
|
|
308
|
+
accumulateUsage(iteration.codexUsage.totals, suggestResult.usage);
|
|
309
|
+
state.budget.consumed.codexCalls += 1;
|
|
310
|
+
|
|
311
|
+
const proposalList = Array.isArray(suggestResult.data?.proposals)
|
|
312
|
+
? suggestResult.data.proposals
|
|
313
|
+
: [];
|
|
314
|
+
if (proposalList.length < 2) {
|
|
315
|
+
throw new Error(`suggest returned ${proposalList.length} proposal(s); minimum is 2`);
|
|
316
|
+
}
|
|
317
|
+
iteration.proposals = proposalList;
|
|
318
|
+
for (const proposal of proposalList) {
|
|
319
|
+
writeProposalFile(repoRoot, iteration.index, proposal.id, proposal);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// ── 3. RANK ────────────────────────────────────────────────────
|
|
323
|
+
const rankResult = await codexCall(
|
|
324
|
+
"rank",
|
|
325
|
+
{
|
|
326
|
+
...commonVars,
|
|
327
|
+
EVALUATION: iteration.evaluate,
|
|
328
|
+
PROPOSALS: proposalList
|
|
329
|
+
},
|
|
330
|
+
codexOpts
|
|
331
|
+
);
|
|
332
|
+
iteration.codexUsage.rank = rankResult.usage;
|
|
333
|
+
accumulateUsage(iteration.codexUsage.totals, rankResult.usage);
|
|
334
|
+
state.budget.consumed.codexCalls += 1;
|
|
335
|
+
|
|
336
|
+
const canonical = recomputeWinner(rankResult.data, proposalList);
|
|
337
|
+
iteration.ranking = canonical;
|
|
338
|
+
iteration.acceptedProposalId = canonical.winner.id;
|
|
339
|
+
iteration.rejectionReasons = canonical.rejections;
|
|
340
|
+
iteration.confidence = canonical.winner.confidence;
|
|
341
|
+
|
|
342
|
+
if (canonical.disagreement) {
|
|
343
|
+
appendProgressLog(
|
|
344
|
+
repoRoot,
|
|
345
|
+
`[iter ${iteration.index}] judge picked ${canonical.judgeWinnerId} but runtime overrode to ${canonical.winner.id}`
|
|
346
|
+
);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const winnerProposal = proposalList.find((p) => p.id === canonical.winner.id);
|
|
350
|
+
if (!winnerProposal) {
|
|
351
|
+
throw new Error(`winner ${canonical.winner.id} not in proposals list`);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// ── 4. APPLY ───────────────────────────────────────────────────
|
|
355
|
+
if (options.dryRun || options.skipApply) {
|
|
356
|
+
iteration.apply = {
|
|
357
|
+
applied: false,
|
|
358
|
+
empty: false,
|
|
359
|
+
filesTouched: winnerProposal.filesTouched ?? [],
|
|
360
|
+
skipped: options.dryRun ? "dry-run" : "skip-apply",
|
|
361
|
+
hackingFindings: [],
|
|
362
|
+
preSha: null,
|
|
363
|
+
postSha: null
|
|
364
|
+
};
|
|
365
|
+
} else {
|
|
366
|
+
iteration.apply = await applyPatch({
|
|
367
|
+
cwd: repoRoot,
|
|
368
|
+
proposal: winnerProposal,
|
|
369
|
+
iterationIndex: iteration.index
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
if (!iteration.apply.applied && !iteration.apply.empty) {
|
|
373
|
+
// Try runner-up once.
|
|
374
|
+
const runnerUpEntry = canonical.scores
|
|
375
|
+
.filter((s) => s.proposalId !== canonical.winner.id)
|
|
376
|
+
.sort((a, b) => b.weighted - a.weighted)[0];
|
|
377
|
+
const runnerUp = runnerUpEntry ? proposalList.find((p) => p.id === runnerUpEntry.proposalId) : null;
|
|
378
|
+
if (runnerUp) {
|
|
379
|
+
appendProgressLog(
|
|
380
|
+
repoRoot,
|
|
381
|
+
`[iter ${iteration.index}] winner ${winnerProposal.id} failed to apply (${(iteration.apply.error ?? "").slice(0, 80)}); trying runner-up ${runnerUp.id}`
|
|
382
|
+
);
|
|
383
|
+
const runnerResult = await applyPatch({
|
|
384
|
+
cwd: repoRoot,
|
|
385
|
+
proposal: runnerUp,
|
|
386
|
+
iterationIndex: iteration.index
|
|
387
|
+
});
|
|
388
|
+
if (runnerResult.applied || runnerResult.empty) {
|
|
389
|
+
iteration.apply = runnerResult;
|
|
390
|
+
const prevWinnerId = canonical.winner.id;
|
|
391
|
+
iteration.acceptedProposalId = runnerUp.id;
|
|
392
|
+
iteration.rejectionReasons = {
|
|
393
|
+
...(iteration.rejectionReasons ?? {}),
|
|
394
|
+
[prevWinnerId]: `apply-failed: ${(iteration.rejectionReasons?.[prevWinnerId] ?? "patch did not apply cleanly")}`
|
|
395
|
+
};
|
|
396
|
+
delete iteration.rejectionReasons[runnerUp.id];
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// ── 5. VALIDATE ────────────────────────────────────────────────
|
|
403
|
+
if (options.dryRun) {
|
|
404
|
+
iteration.validate = {
|
|
405
|
+
passed: null,
|
|
406
|
+
regression: false,
|
|
407
|
+
commands: [],
|
|
408
|
+
skipped: "dry-run"
|
|
409
|
+
};
|
|
410
|
+
} else if (iteration.apply?.applied || iteration.apply?.empty) {
|
|
411
|
+
iteration.validate = await runValidation({
|
|
412
|
+
cwd: repoRoot,
|
|
413
|
+
goal: state.goal,
|
|
414
|
+
previousIteration: state.iterations[state.iterations.length - 1]
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
if (iteration.validate.regression && iteration.apply.applied && iteration.apply.preSha) {
|
|
418
|
+
appendProgressLog(
|
|
419
|
+
repoRoot,
|
|
420
|
+
`[iter ${iteration.index}] regression detected; rolling back to ${iteration.apply.preSha.slice(0, 8)}`
|
|
421
|
+
);
|
|
422
|
+
const rb = rollbackToSha(repoRoot, iteration.apply.preSha);
|
|
423
|
+
if (rb.ok) {
|
|
424
|
+
iteration.apply.rolledBack = true;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
} else {
|
|
428
|
+
iteration.validate = {
|
|
429
|
+
passed: null,
|
|
430
|
+
regression: false,
|
|
431
|
+
commands: [],
|
|
432
|
+
skipped: "apply-failed"
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// ── 6. RECORD + stopping ──────────────────────────────────────
|
|
437
|
+
const q = computeQualityScore(iteration, state);
|
|
438
|
+
iteration.qualityScore = q.score;
|
|
439
|
+
const prev = state.iterations[state.iterations.length - 1];
|
|
440
|
+
iteration.qualityDelta = iteration.qualityScore - (prev?.qualityScore ?? 0);
|
|
441
|
+
iteration.remainingIssues = iteration.evaluate?.openIssues?.map((i) => i.summary) ?? [];
|
|
442
|
+
|
|
443
|
+
finalizeIteration(state, iteration, { repoRoot });
|
|
444
|
+
|
|
445
|
+
const stop = checkStopping(state);
|
|
446
|
+
if (stop.shouldStop) {
|
|
447
|
+
iteration.stopReason = stop.reason;
|
|
448
|
+
state.stopReason = stop.reason;
|
|
449
|
+
state.status = stop.reason === "goal-met" ? "completed" : "paused";
|
|
450
|
+
state.completedAt = nowIso();
|
|
451
|
+
// Re-write the iteration file so the stop reason is persisted in the full record.
|
|
452
|
+
writeIterationFile(repoRoot, iteration, { dryRun: iteration.dryRun });
|
|
453
|
+
const summaryIndex = state.iterations.length - 1;
|
|
454
|
+
if (summaryIndex >= 0) {
|
|
455
|
+
state.iterations[summaryIndex] = summaryForState(iteration);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
saveState(repoRoot, state);
|
|
460
|
+
return { state, iteration };
|
|
461
|
+
} catch (err) {
|
|
462
|
+
iteration.completedAt = nowIso();
|
|
463
|
+
iteration.error = err instanceof CodexError ? `${err.kind}: ${err.message}` : err.message ?? String(err);
|
|
464
|
+
state.error = {
|
|
465
|
+
kind: err instanceof CodexError ? err.kind : err.name ?? "Error",
|
|
466
|
+
message: iteration.error,
|
|
467
|
+
iteration: iteration.index
|
|
468
|
+
};
|
|
469
|
+
try {
|
|
470
|
+
writeIterationFile(repoRoot, iteration, { dryRun: iteration.dryRun });
|
|
471
|
+
// Only append a summary row if we have not already.
|
|
472
|
+
if (!state.iterations.some((i) => i.index === iteration.index)) {
|
|
473
|
+
state.iterations.push(summaryForState(iteration));
|
|
474
|
+
state.budget.consumed.codexCalls = state.budget.consumed.codexCalls ?? 0;
|
|
475
|
+
}
|
|
476
|
+
appendProgressLog(
|
|
477
|
+
repoRoot,
|
|
478
|
+
`[iter ${iteration.index}] ERROR ${iteration.error}`
|
|
479
|
+
);
|
|
480
|
+
saveState(repoRoot, state);
|
|
481
|
+
} catch {}
|
|
482
|
+
throw err;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// Process utilities: synchronous commands, background worker spawning, PID
|
|
2
|
+
// liveness checks, and graceful termination.
|
|
3
|
+
//
|
|
4
|
+
// Shape is deliberately close to what the codex plugin ships so code review
|
|
5
|
+
// across the two plugins stays cheap. Nothing here is imported from there.
|
|
6
|
+
|
|
7
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
8
|
+
import fs from "node:fs";
|
|
9
|
+
import process from "node:process";
|
|
10
|
+
|
|
11
|
+
export function runCommand(command, args = [], options = {}) {
|
|
12
|
+
const result = spawnSync(command, args, {
|
|
13
|
+
cwd: options.cwd,
|
|
14
|
+
env: options.env ?? process.env,
|
|
15
|
+
encoding: "utf8",
|
|
16
|
+
input: options.input,
|
|
17
|
+
stdio: options.stdio ?? "pipe",
|
|
18
|
+
timeout: options.timeoutMs,
|
|
19
|
+
maxBuffer: options.maxBuffer ?? 64 * 1024 * 1024,
|
|
20
|
+
shell: options.shell ?? (process.platform === "win32")
|
|
21
|
+
});
|
|
22
|
+
return {
|
|
23
|
+
command,
|
|
24
|
+
args,
|
|
25
|
+
status: result.status ?? 0,
|
|
26
|
+
signal: result.signal ?? null,
|
|
27
|
+
stdout: result.stdout ?? "",
|
|
28
|
+
stderr: result.stderr ?? "",
|
|
29
|
+
error: result.error ?? null
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function runCommandChecked(command, args = [], options = {}) {
|
|
34
|
+
const result = runCommand(command, args, options);
|
|
35
|
+
if (result.error) {
|
|
36
|
+
throw result.error;
|
|
37
|
+
}
|
|
38
|
+
if (result.status !== 0) {
|
|
39
|
+
throw new Error(formatCommandFailure(result));
|
|
40
|
+
}
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function binaryAvailable(command, versionArgs = ["--version"], options = {}) {
|
|
45
|
+
const result = runCommand(command, versionArgs, options);
|
|
46
|
+
if (result.error && result.error.code === "ENOENT") {
|
|
47
|
+
return { available: false, detail: "not found" };
|
|
48
|
+
}
|
|
49
|
+
if (result.error) {
|
|
50
|
+
return { available: false, detail: result.error.message };
|
|
51
|
+
}
|
|
52
|
+
if (result.status !== 0) {
|
|
53
|
+
const detail = (result.stderr || result.stdout || "").trim() || `exit ${result.status}`;
|
|
54
|
+
return { available: false, detail };
|
|
55
|
+
}
|
|
56
|
+
return { available: true, detail: (result.stdout || result.stderr || "ok").trim() };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Spawn a detached Node worker that survives the parent process exiting.
|
|
60
|
+
// Stdout/stderr are redirected to `logFile` (append mode). Returns the child PID.
|
|
61
|
+
export function spawnDetached(scriptPath, args, options = {}) {
|
|
62
|
+
const { cwd, logFile, env } = options;
|
|
63
|
+
if (!logFile) {
|
|
64
|
+
throw new Error("spawnDetached requires options.logFile");
|
|
65
|
+
}
|
|
66
|
+
const out = fs.openSync(logFile, "a");
|
|
67
|
+
try {
|
|
68
|
+
const child = spawn(process.execPath, [scriptPath, ...args], {
|
|
69
|
+
cwd,
|
|
70
|
+
detached: true,
|
|
71
|
+
stdio: ["ignore", out, out],
|
|
72
|
+
env: { ...process.env, ...(env ?? {}) }
|
|
73
|
+
});
|
|
74
|
+
child.unref();
|
|
75
|
+
return child.pid ?? null;
|
|
76
|
+
} finally {
|
|
77
|
+
// Child has inherited its own fd; parent can release this handle now.
|
|
78
|
+
try {
|
|
79
|
+
fs.closeSync(out);
|
|
80
|
+
} catch {}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function isProcessAlive(pid) {
|
|
85
|
+
if (!Number.isFinite(pid) || pid <= 0) {
|
|
86
|
+
return false;
|
|
87
|
+
}
|
|
88
|
+
try {
|
|
89
|
+
process.kill(pid, 0);
|
|
90
|
+
return true;
|
|
91
|
+
} catch (err) {
|
|
92
|
+
// EPERM means the process exists but we can't signal it. Treat as alive
|
|
93
|
+
// so we err on the side of "do not stomp on someone else's process".
|
|
94
|
+
return err.code === "EPERM";
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Send a signal to a PID. Tries the process group first (`-pid`) so we
|
|
99
|
+
// take down any children the worker may have spawned; falls back to the
|
|
100
|
+
// single PID if the group send fails.
|
|
101
|
+
export function terminateProcess(pid, signal = "SIGTERM") {
|
|
102
|
+
if (!Number.isFinite(pid) || pid <= 0) {
|
|
103
|
+
return { delivered: false, reason: "invalid-pid", method: null };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const tryKill = (target) => {
|
|
107
|
+
try {
|
|
108
|
+
process.kill(target, signal);
|
|
109
|
+
return { ok: true };
|
|
110
|
+
} catch (err) {
|
|
111
|
+
return { ok: false, err };
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const groupResult = tryKill(-pid);
|
|
116
|
+
if (groupResult.ok) {
|
|
117
|
+
return { delivered: true, method: "process-group", signal };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (groupResult.err?.code === "ESRCH") {
|
|
121
|
+
// No group under that leader (or nothing running). Try the raw PID.
|
|
122
|
+
const directResult = tryKill(pid);
|
|
123
|
+
if (directResult.ok) {
|
|
124
|
+
return { delivered: true, method: "process", signal };
|
|
125
|
+
}
|
|
126
|
+
if (directResult.err?.code === "ESRCH") {
|
|
127
|
+
return { delivered: false, reason: "not-running", method: "process", signal };
|
|
128
|
+
}
|
|
129
|
+
throw directResult.err;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (groupResult.err?.code === "EPERM") {
|
|
133
|
+
return { delivered: false, reason: "not-permitted", method: "process-group", signal };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Unknown failure mode — try direct as a last resort.
|
|
137
|
+
const directResult = tryKill(pid);
|
|
138
|
+
if (directResult.ok) {
|
|
139
|
+
return { delivered: true, method: "process", signal };
|
|
140
|
+
}
|
|
141
|
+
throw directResult.err ?? groupResult.err;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function formatCommandFailure(result) {
|
|
145
|
+
const head = `${result.command} ${result.args.join(" ")}`.trim();
|
|
146
|
+
const parts = [head];
|
|
147
|
+
if (result.signal) {
|
|
148
|
+
parts.push(`signal=${result.signal}`);
|
|
149
|
+
} else {
|
|
150
|
+
parts.push(`exit=${result.status}`);
|
|
151
|
+
}
|
|
152
|
+
const stderr = (result.stderr || "").trim();
|
|
153
|
+
const stdout = (result.stdout || "").trim();
|
|
154
|
+
if (stderr) {
|
|
155
|
+
parts.push(stderr);
|
|
156
|
+
} else if (stdout) {
|
|
157
|
+
parts.push(stdout);
|
|
158
|
+
}
|
|
159
|
+
return parts.join(": ");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export async function sleep(ms) {
|
|
163
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
164
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// Prompt template loader + interpolator.
|
|
2
|
+
//
|
|
3
|
+
// Templates live at <plugin-root>/prompts/<name>.md and use {{UPPER_SNAKE}}
|
|
4
|
+
// placeholders. The loader is intentionally dumb — no conditionals, no loops,
|
|
5
|
+
// no arithmetic. If a placeholder is missing from `vars`, it is substituted
|
|
6
|
+
// with "(missing:KEY)" so the prompt stays syntactically valid and any bug in
|
|
7
|
+
// the caller shows up clearly in the rendered prompt rather than silently.
|
|
8
|
+
|
|
9
|
+
import fs from "node:fs";
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
import { fileURLToPath } from "node:url";
|
|
12
|
+
|
|
13
|
+
const SCRIPT_DIR = path.dirname(fileURLToPath(import.meta.url));
|
|
14
|
+
const PLUGIN_ROOT = path.resolve(SCRIPT_DIR, "..", "..");
|
|
15
|
+
|
|
16
|
+
export function getPluginRoot() {
|
|
17
|
+
return PLUGIN_ROOT;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function getPromptPath(name) {
|
|
21
|
+
const fileName = name.endsWith(".md") ? name : `${name}.md`;
|
|
22
|
+
return path.join(PLUGIN_ROOT, "prompts", fileName);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function getSchemaPath(name) {
|
|
26
|
+
const fileName = name.endsWith(".json") ? name : `${name}.schema.json`;
|
|
27
|
+
return path.join(PLUGIN_ROOT, "schemas", fileName);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function loadPromptTemplate(name) {
|
|
31
|
+
return fs.readFileSync(getPromptPath(name), "utf8");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function interpolate(template, vars) {
|
|
35
|
+
return template.replace(/\{\{([A-Z0-9_]+)\}\}/g, (_match, key) => {
|
|
36
|
+
if (!(key in vars)) {
|
|
37
|
+
return `(missing:${key})`;
|
|
38
|
+
}
|
|
39
|
+
const value = vars[key];
|
|
40
|
+
if (value == null) return "(none)";
|
|
41
|
+
if (typeof value === "string") return value;
|
|
42
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
43
|
+
try {
|
|
44
|
+
return JSON.stringify(value, null, 2);
|
|
45
|
+
} catch {
|
|
46
|
+
return String(value);
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function renderPrompt(name, vars) {
|
|
52
|
+
return interpolate(loadPromptTemplate(name), vars);
|
|
53
|
+
}
|