ralph-research 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -0
- package/dist/adapters/extractor/command-extractor.js +46 -4
- package/dist/adapters/extractor/command-extractor.js.map +1 -1
- package/dist/adapters/fs/json-file-frontier-store.js +4 -2
- package/dist/adapters/fs/json-file-frontier-store.js.map +1 -1
- package/dist/adapters/fs/lockfile.d.ts +21 -1
- package/dist/adapters/fs/lockfile.js +65 -11
- package/dist/adapters/fs/lockfile.js.map +1 -1
- package/dist/adapters/fs/manifest-loader.d.ts +5 -1
- package/dist/adapters/fs/manifest-loader.js +15 -2
- package/dist/adapters/fs/manifest-loader.js.map +1 -1
- package/dist/adapters/git/git-client.d.ts +2 -0
- package/dist/adapters/git/git-client.js +19 -0
- package/dist/adapters/git/git-client.js.map +1 -1
- package/dist/app/services/manual-decision-service.js +76 -25
- package/dist/app/services/manual-decision-service.js.map +1 -1
- package/dist/app/services/project-state-service.d.ts +26 -1
- package/dist/app/services/project-state-service.js +172 -14
- package/dist/app/services/project-state-service.js.map +1 -1
- package/dist/app/services/run-admission-service.d.ts +20 -0
- package/dist/app/services/run-admission-service.js +30 -0
- package/dist/app/services/run-admission-service.js.map +1 -0
- package/dist/app/services/run-cycle-service.d.ts +5 -4
- package/dist/app/services/run-cycle-service.js +175 -14
- package/dist/app/services/run-cycle-service.js.map +1 -1
- package/dist/app/services/run-loop-service.d.ts +21 -0
- package/dist/app/services/run-loop-service.js +155 -0
- package/dist/app/services/run-loop-service.js.map +1 -0
- package/dist/cli/commands/demo.js +1 -0
- package/dist/cli/commands/demo.js.map +1 -1
- package/dist/cli/commands/doctor.d.ts +8 -0
- package/dist/cli/commands/doctor.js +59 -0
- package/dist/cli/commands/doctor.js.map +1 -0
- package/dist/cli/commands/init.js +1 -0
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/inspect.js +4 -0
- package/dist/cli/commands/inspect.js.map +1 -1
- package/dist/cli/commands/run.d.ts +3 -1
- package/dist/cli/commands/run.js +31 -28
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/commands/status.js +35 -4
- package/dist/cli/commands/status.js.map +1 -1
- package/dist/cli/commands/validate.js +21 -18
- package/dist/cli/commands/validate.js.map +1 -1
- package/dist/cli/main.js +3 -10
- package/dist/cli/main.js.map +1 -1
- package/dist/core/engine/cycle-runner.d.ts +2 -0
- package/dist/core/engine/cycle-runner.js +504 -34
- package/dist/core/engine/cycle-runner.js.map +1 -1
- package/dist/core/engine/promotion-artifact.d.ts +23 -0
- package/dist/core/engine/promotion-artifact.js +58 -0
- package/dist/core/engine/promotion-artifact.js.map +1 -0
- package/dist/core/engine/workspace-manager.d.ts +10 -1
- package/dist/core/engine/workspace-manager.js +70 -3
- package/dist/core/engine/workspace-manager.js.map +1 -1
- package/dist/core/manifest/admission.d.ts +16 -0
- package/dist/core/manifest/admission.js +64 -0
- package/dist/core/manifest/admission.js.map +1 -0
- package/dist/core/manifest/schema.d.ts +47 -0
- package/dist/core/manifest/schema.js +18 -1
- package/dist/core/manifest/schema.js.map +1 -1
- package/dist/core/model/decision-record.d.ts +4 -0
- package/dist/core/model/decision-record.js +6 -0
- package/dist/core/model/decision-record.js.map +1 -1
- package/dist/core/model/metric-diagnostics.d.ts +7 -0
- package/dist/core/model/metric-diagnostics.js +51 -0
- package/dist/core/model/metric-diagnostics.js.map +1 -0
- package/dist/core/model/run-record.d.ts +6 -0
- package/dist/core/model/run-record.js +4 -0
- package/dist/core/model/run-record.js.map +1 -1
- package/dist/core/state/frontier-materializer.d.ts +12 -0
- package/dist/core/state/frontier-materializer.js +74 -0
- package/dist/core/state/frontier-materializer.js.map +1 -0
- package/dist/core/state/frontier-semantics.d.ts +12 -0
- package/dist/core/state/frontier-semantics.js +26 -0
- package/dist/core/state/frontier-semantics.js.map +1 -0
- package/dist/core/state/ratchet-engine.js +29 -21
- package/dist/core/state/ratchet-engine.js.map +1 -1
- package/dist/core/state/recovery-classifier.d.ts +17 -0
- package/dist/core/state/recovery-classifier.js +150 -0
- package/dist/core/state/recovery-classifier.js.map +1 -0
- package/dist/core/state/run-state-machine.js +33 -23
- package/dist/core/state/run-state-machine.js.map +1 -1
- package/dist/core/state/stopping-target.d.ts +14 -0
- package/dist/core/state/stopping-target.js +67 -0
- package/dist/core/state/stopping-target.js.map +1 -0
- package/dist/mcp/server.js +17 -23
- package/dist/mcp/server.js.map +1 -1
- package/package.json +2 -2
- package/templates/writing/ralph.yaml +7 -0
|
@@ -1,20 +1,29 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
import { copyFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
3
|
import { dirname, extname, join, resolve } from "node:path";
|
|
4
|
+
import { summarizeMetricDiagnostics } from "../model/metric-diagnostics.js";
|
|
4
5
|
import { evaluateAnchorAgreement, applyAnchorAgreementGate, loadAnchorRecords } from "./anchor-checker.js";
|
|
5
6
|
import { sampleAuditQueue } from "./audit-sampler.js";
|
|
6
7
|
import { evaluateChangeBudget } from "./change-budget.js";
|
|
7
8
|
import { compactRecentHistory, countConsecutiveAutoAccepts } from "./history-compactor.js";
|
|
9
|
+
import { preparePromotionArtifact, requirePromotionPatch, requirePromotionPaths } from "./promotion-artifact.js";
|
|
8
10
|
import { runExperiment } from "./experiment-runner.js";
|
|
9
11
|
import { runLlmJudgeMetric } from "./judge-pack.js";
|
|
10
12
|
import { runParallelProposers } from "./parallel-proposer.js";
|
|
11
13
|
import { extractCommandMetric } from "../../adapters/extractor/command-extractor.js";
|
|
12
14
|
import { runCommandProposer } from "../../adapters/proposer/command-proposer.js";
|
|
13
15
|
import { evaluateConstraints } from "../state/constraint-engine.js";
|
|
14
|
-
import {
|
|
16
|
+
import { attachCommitShaToFrontierEntries, buildAcceptedFrontierEntry, updateAcceptedFrontier, } from "../state/frontier-semantics.js";
|
|
15
17
|
import { evaluateRatchet } from "../state/ratchet-engine.js";
|
|
16
18
|
import { advanceRunPhase } from "../state/run-state-machine.js";
|
|
19
|
+
import { derivePendingAction } from "../state/recovery-classifier.js";
|
|
17
20
|
export async function runCycle(input, dependencies) {
|
|
21
|
+
if (input.manifest.proposer.type !== "parallel") {
|
|
22
|
+
return runCommandCycle(input, dependencies);
|
|
23
|
+
}
|
|
24
|
+
if (input.resumeRun) {
|
|
25
|
+
throw new Error("parallel proposer runs cannot be resumed truthfully yet");
|
|
26
|
+
}
|
|
18
27
|
const now = dependencies.now ?? (() => new Date());
|
|
19
28
|
const context = await createRunContext(input.repoRoot, input.manifest, dependencies.runStore, now);
|
|
20
29
|
const manifestDir = dirname(input.manifestPath);
|
|
@@ -24,7 +33,7 @@ export async function runCycle(input, dependencies) {
|
|
|
24
33
|
const priorConsecutiveAccepts = countConsecutiveAutoAccepts(priorDecisions, {
|
|
25
34
|
metricId: "metric" in input.manifest.ratchet ? input.manifest.ratchet.metric ?? referenceMetric : referenceMetric,
|
|
26
35
|
});
|
|
27
|
-
let runRecord = createInitialRunRecord(input.manifest, undefined, context);
|
|
36
|
+
let runRecord = createInitialRunRecord(input.manifest, input.resolvedBaselineRef, undefined, context);
|
|
28
37
|
await dependencies.runStore.put(runRecord);
|
|
29
38
|
let frontier = input.currentFrontier;
|
|
30
39
|
try {
|
|
@@ -39,6 +48,7 @@ export async function runCycle(input, dependencies) {
|
|
|
39
48
|
repoRoot: input.repoRoot,
|
|
40
49
|
manifestDir,
|
|
41
50
|
manifest: input.manifest,
|
|
51
|
+
resolvedBaselineRef: input.resolvedBaselineRef,
|
|
42
52
|
runDir: context.runDir,
|
|
43
53
|
workspaceManager: dependencies.workspaceManager,
|
|
44
54
|
currentFrontier: frontier,
|
|
@@ -103,9 +113,16 @@ export async function runCycle(input, dependencies) {
|
|
|
103
113
|
});
|
|
104
114
|
await dependencies.runStore.put(runRecord);
|
|
105
115
|
const decisionId = `decision-${context.runId}`;
|
|
106
|
-
const
|
|
116
|
+
const decisionCreatedAt = now().toISOString();
|
|
117
|
+
const candidateFrontierEntry = buildAcceptedFrontierEntry({
|
|
118
|
+
runId: context.runId,
|
|
119
|
+
candidateId: selectedCandidate.candidateId,
|
|
120
|
+
acceptedAt: decisionCreatedAt,
|
|
121
|
+
metrics: selectedCandidate.metrics,
|
|
122
|
+
artifacts: selectedCandidate.artifacts,
|
|
123
|
+
});
|
|
107
124
|
const frontierUpdate = ratchetDecision.outcome === "accepted"
|
|
108
|
-
?
|
|
125
|
+
? updateAcceptedFrontier(input.manifest, frontier, candidateFrontierEntry)
|
|
109
126
|
: null;
|
|
110
127
|
let decisionRecord = {
|
|
111
128
|
decisionId,
|
|
@@ -116,13 +133,33 @@ export async function runCycle(input, dependencies) {
|
|
|
116
133
|
metricId: ratchetDecision.metricId,
|
|
117
134
|
...(ratchetDecision.delta === undefined ? {} : { delta: ratchetDecision.delta }),
|
|
118
135
|
reason: ratchetDecision.reason,
|
|
119
|
-
createdAt:
|
|
136
|
+
createdAt: decisionCreatedAt,
|
|
120
137
|
frontierChanged: frontierUpdate?.comparison.frontierChanged ?? false,
|
|
121
138
|
beforeFrontierIds: frontier.map((entry) => entry.frontierId),
|
|
122
139
|
afterFrontierIds: (frontierUpdate?.entries ?? frontier).map((entry) => entry.frontierId),
|
|
123
140
|
auditRequired: false,
|
|
141
|
+
...(buildDecisionDiagnostics(selectedCandidate.metrics[ratchetDecision.metricId])
|
|
142
|
+
? { diagnostics: buildDecisionDiagnostics(selectedCandidate.metrics[ratchetDecision.metricId]) }
|
|
143
|
+
: {}),
|
|
124
144
|
...(ratchetDecision.graduation ? { graduation: ratchetDecision.graduation } : {}),
|
|
125
145
|
};
|
|
146
|
+
if (ratchetDecision.outcome === "accepted") {
|
|
147
|
+
const promotion = await preparePromotionArtifact({
|
|
148
|
+
candidateId: selectedCandidate.candidateId,
|
|
149
|
+
runDir: context.runDir,
|
|
150
|
+
manifest: input.manifest,
|
|
151
|
+
workspaceManager: dependencies.workspaceManager,
|
|
152
|
+
});
|
|
153
|
+
runRecord = {
|
|
154
|
+
...runRecord,
|
|
155
|
+
proposal: {
|
|
156
|
+
...runRecord.proposal,
|
|
157
|
+
patchPath: promotion.patchPath,
|
|
158
|
+
changedPaths: promotion.changedPaths,
|
|
159
|
+
filesChanged: promotion.changedPaths.length,
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
}
|
|
126
163
|
let auditQueue = buildAuditQueue(ratchetDecision.metricId, decisionRecord, input.manifest, selectedCandidate.packByMetricId);
|
|
127
164
|
decisionRecord = {
|
|
128
165
|
...decisionRecord,
|
|
@@ -135,19 +172,14 @@ export async function runCycle(input, dependencies) {
|
|
|
135
172
|
});
|
|
136
173
|
await dependencies.runStore.put(runRecord);
|
|
137
174
|
if (ratchetDecision.outcome === "accepted" && frontierUpdate) {
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
});
|
|
141
|
-
const commitResult = await dependencies.gitClient.stageAndCommitPaths([...promoted.copiedPaths, ...promoted.deletedPaths], `rrx: accept ${context.runId}`);
|
|
175
|
+
await dependencies.gitClient.applyPatchIfNeeded(requirePromotionPatch(runRecord));
|
|
176
|
+
const commitResult = await dependencies.gitClient.stageAndCommitPaths(requirePromotionPaths(runRecord), `rrx: accept ${context.runId}`);
|
|
142
177
|
decisionRecord = {
|
|
143
178
|
...decisionRecord,
|
|
144
179
|
commitSha: commitResult.commitSha,
|
|
145
180
|
};
|
|
146
181
|
await dependencies.decisionStore.put(decisionRecord);
|
|
147
|
-
frontier = frontierUpdate.entries.
|
|
148
|
-
...entry,
|
|
149
|
-
commitSha: commitResult.commitSha,
|
|
150
|
-
}));
|
|
182
|
+
frontier = attachCommitShaToFrontierEntries(frontierUpdate.entries, context.runId, commitResult.commitSha);
|
|
151
183
|
runRecord = advanceRunPhase(runRecord, "committed");
|
|
152
184
|
await dependencies.runStore.put(runRecord);
|
|
153
185
|
await dependencies.frontierStore.save(frontier);
|
|
@@ -188,6 +220,443 @@ export async function runCycle(input, dependencies) {
|
|
|
188
220
|
};
|
|
189
221
|
}
|
|
190
222
|
}
|
|
223
|
+
async function runCommandCycle(input, dependencies) {
|
|
224
|
+
const now = dependencies.now ?? (() => new Date());
|
|
225
|
+
const manifestDir = dirname(input.manifestPath);
|
|
226
|
+
const referenceMetric = getReferenceMetric(input.manifest);
|
|
227
|
+
const priorRuns = await dependencies.runStore.list();
|
|
228
|
+
const priorDecisions = await dependencies.decisionStore.list();
|
|
229
|
+
const priorConsecutiveAccepts = countConsecutiveAutoAccepts(priorDecisions, {
|
|
230
|
+
metricId: "metric" in input.manifest.ratchet ? input.manifest.ratchet.metric ?? referenceMetric : referenceMetric,
|
|
231
|
+
});
|
|
232
|
+
const context = input.resumeRun
|
|
233
|
+
? createRunContextFromRecord(input.repoRoot, input.manifest, input.resumeRun)
|
|
234
|
+
: await createRunContext(input.repoRoot, input.manifest, dependencies.runStore, now);
|
|
235
|
+
let runRecord = input.resumeRun
|
|
236
|
+
? input.resumeRun
|
|
237
|
+
: createInitialRunRecord(input.manifest, input.resolvedBaselineRef, undefined, context);
|
|
238
|
+
if (!input.resumeRun) {
|
|
239
|
+
await dependencies.runStore.put(runRecord);
|
|
240
|
+
}
|
|
241
|
+
let frontier = input.currentFrontier;
|
|
242
|
+
let decisionRecord = runRecord.decisionId
|
|
243
|
+
? await dependencies.decisionStore.get(runRecord.decisionId)
|
|
244
|
+
: null;
|
|
245
|
+
let auditQueue = [];
|
|
246
|
+
let lastChangeBudget;
|
|
247
|
+
let lastAnchorCheck;
|
|
248
|
+
try {
|
|
249
|
+
while (true) {
|
|
250
|
+
const nextAction = runRecord.pendingAction !== "none"
|
|
251
|
+
? runRecord.pendingAction
|
|
252
|
+
: derivePendingAction(runRecord);
|
|
253
|
+
switch (nextAction) {
|
|
254
|
+
case "prepare_proposal": {
|
|
255
|
+
const proposerHistory = await buildProposerHistoryContext({
|
|
256
|
+
manifest: input.manifest,
|
|
257
|
+
runDir: context.runDir,
|
|
258
|
+
runs: priorRuns,
|
|
259
|
+
decisions: priorDecisions,
|
|
260
|
+
primaryMetric: referenceMetric,
|
|
261
|
+
});
|
|
262
|
+
const workspacePath = runRecord.workspacePath
|
|
263
|
+
?? (await dependencies.workspaceManager.createWorkspace(runRecord.candidateId, input.resolvedBaselineRef)).workspacePath;
|
|
264
|
+
const proposal = await executeProposal(input.manifest.proposer, workspacePath, proposerHistory);
|
|
265
|
+
const proposeStdoutPath = await persistText(join(context.runDir, "logs", `${runRecord.candidateId}.propose.stdout.log`), proposal.stdout);
|
|
266
|
+
runRecord = advanceRunPhase({
|
|
267
|
+
...runRecord,
|
|
268
|
+
workspacePath,
|
|
269
|
+
proposal: {
|
|
270
|
+
...runRecord.proposal,
|
|
271
|
+
proposerType: input.manifest.proposer.type,
|
|
272
|
+
summary: proposerHistory ? `${proposal.summary}; history_context=enabled` : proposal.summary,
|
|
273
|
+
operators: [],
|
|
274
|
+
},
|
|
275
|
+
logs: {
|
|
276
|
+
...runRecord.logs,
|
|
277
|
+
proposeStdoutPath,
|
|
278
|
+
},
|
|
279
|
+
}, "proposed");
|
|
280
|
+
await dependencies.runStore.put(runRecord);
|
|
281
|
+
break;
|
|
282
|
+
}
|
|
283
|
+
case "execute_experiment": {
|
|
284
|
+
const workspacePath = requireWorkspacePath(runRecord);
|
|
285
|
+
const experiment = await runExperiment(input.manifest.experiment.run, {
|
|
286
|
+
workspacePath,
|
|
287
|
+
});
|
|
288
|
+
const runStdoutPath = await persistText(join(context.runDir, "logs", `${runRecord.candidateId}.experiment.stdout.log`), experiment.stdout);
|
|
289
|
+
runRecord = advanceRunPhase({
|
|
290
|
+
...runRecord,
|
|
291
|
+
logs: {
|
|
292
|
+
...runRecord.logs,
|
|
293
|
+
runStdoutPath,
|
|
294
|
+
},
|
|
295
|
+
}, "executed");
|
|
296
|
+
await dependencies.runStore.put(runRecord);
|
|
297
|
+
break;
|
|
298
|
+
}
|
|
299
|
+
case "evaluate_metrics": {
|
|
300
|
+
const workspacePath = requireWorkspacePath(runRecord);
|
|
301
|
+
const metricEvaluation = await evaluateMetrics({
|
|
302
|
+
repoRoot: input.repoRoot,
|
|
303
|
+
manifestDir,
|
|
304
|
+
manifest: input.manifest,
|
|
305
|
+
currentFrontier: frontier,
|
|
306
|
+
workspacePath,
|
|
307
|
+
runDir: join(context.runDir, "judge", runRecord.candidateId),
|
|
308
|
+
...(dependencies.judgeProvider ? { judgeProvider: dependencies.judgeProvider } : {}),
|
|
309
|
+
});
|
|
310
|
+
const artifacts = await snapshotArtifacts(input.manifest.experiment.outputs, workspacePath, join(context.runDir, "artifacts", runRecord.candidateId));
|
|
311
|
+
const constraints = evaluateConstraints(input.manifest.constraints, metricEvaluation.metrics);
|
|
312
|
+
const changeBudget = await evaluateChangeBudget({
|
|
313
|
+
workspacePath,
|
|
314
|
+
scope: input.manifest.scope,
|
|
315
|
+
});
|
|
316
|
+
lastChangeBudget = changeBudget;
|
|
317
|
+
let ratchetDecision = resolveDecision({
|
|
318
|
+
manifest: input.manifest,
|
|
319
|
+
metrics: metricEvaluation.metrics,
|
|
320
|
+
currentFrontier: frontier,
|
|
321
|
+
constraints,
|
|
322
|
+
changeBudget,
|
|
323
|
+
priorConsecutiveAccepts,
|
|
324
|
+
});
|
|
325
|
+
lastAnchorCheck = undefined;
|
|
326
|
+
if (ratchetDecision.outcome === "accepted") {
|
|
327
|
+
lastAnchorCheck = metricEvaluation.anchorChecks.get(ratchetDecision.metricId);
|
|
328
|
+
if (lastAnchorCheck) {
|
|
329
|
+
const gated = applyAnchorAgreementGate(ratchetDecision.outcome, lastAnchorCheck);
|
|
330
|
+
ratchetDecision = {
|
|
331
|
+
...ratchetDecision,
|
|
332
|
+
outcome: gated.outcome,
|
|
333
|
+
frontierChanged: gated.outcome === "accepted",
|
|
334
|
+
reason: `${ratchetDecision.reason}; ${gated.reason}`,
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
runRecord = advanceRunPhase({
|
|
339
|
+
...runRecord,
|
|
340
|
+
proposal: {
|
|
341
|
+
...runRecord.proposal,
|
|
342
|
+
diffLines: changeBudget.summary.totalLineDelta,
|
|
343
|
+
filesChanged: changeBudget.summary.filesChanged,
|
|
344
|
+
changedPaths: changeBudget.summary.entries.map((entry) => entry.path),
|
|
345
|
+
withinBudget: changeBudget.withinBudget,
|
|
346
|
+
},
|
|
347
|
+
metrics: metricEvaluation.metrics,
|
|
348
|
+
constraints: constraints.results.map(stripConstraintReason),
|
|
349
|
+
artifacts,
|
|
350
|
+
}, "evaluated", {
|
|
351
|
+
status: ratchetDecision.outcome,
|
|
352
|
+
});
|
|
353
|
+
await dependencies.runStore.put(runRecord);
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
case "write_decision": {
|
|
357
|
+
const decisionState = await buildDecisionState({
|
|
358
|
+
input,
|
|
359
|
+
dependencies,
|
|
360
|
+
runRecord,
|
|
361
|
+
frontier,
|
|
362
|
+
priorConsecutiveAccepts,
|
|
363
|
+
manifestDir,
|
|
364
|
+
runDir: context.runDir,
|
|
365
|
+
});
|
|
366
|
+
lastChangeBudget = decisionState.changeBudget;
|
|
367
|
+
lastAnchorCheck = decisionState.anchorCheck;
|
|
368
|
+
const decisionId = `decision-${runRecord.runId}`;
|
|
369
|
+
const decisionCreatedAt = now().toISOString();
|
|
370
|
+
const candidateFrontierEntry = buildAcceptedFrontierEntry({
|
|
371
|
+
runId: runRecord.runId,
|
|
372
|
+
candidateId: runRecord.candidateId,
|
|
373
|
+
acceptedAt: decisionCreatedAt,
|
|
374
|
+
metrics: runRecord.metrics,
|
|
375
|
+
artifacts: runRecord.artifacts,
|
|
376
|
+
});
|
|
377
|
+
const frontierUpdate = decisionState.ratchetDecision.outcome === "accepted"
|
|
378
|
+
? updateAcceptedFrontier(input.manifest, frontier, candidateFrontierEntry)
|
|
379
|
+
: null;
|
|
380
|
+
decisionRecord = {
|
|
381
|
+
decisionId,
|
|
382
|
+
runId: runRecord.runId,
|
|
383
|
+
outcome: decisionState.ratchetDecision.outcome,
|
|
384
|
+
actorType: "system",
|
|
385
|
+
policyType: decisionState.ratchetDecision.policyType,
|
|
386
|
+
metricId: decisionState.ratchetDecision.metricId,
|
|
387
|
+
...(decisionState.ratchetDecision.delta === undefined ? {} : { delta: decisionState.ratchetDecision.delta }),
|
|
388
|
+
reason: decisionState.ratchetDecision.reason,
|
|
389
|
+
createdAt: decisionCreatedAt,
|
|
390
|
+
frontierChanged: frontierUpdate?.comparison.frontierChanged ?? false,
|
|
391
|
+
beforeFrontierIds: frontier.map((entry) => entry.frontierId),
|
|
392
|
+
afterFrontierIds: (frontierUpdate?.entries ?? frontier).map((entry) => entry.frontierId),
|
|
393
|
+
auditRequired: false,
|
|
394
|
+
...(buildDecisionDiagnostics(runRecord.metrics[decisionState.ratchetDecision.metricId])
|
|
395
|
+
? { diagnostics: buildDecisionDiagnostics(runRecord.metrics[decisionState.ratchetDecision.metricId]) }
|
|
396
|
+
: {}),
|
|
397
|
+
...(decisionState.ratchetDecision.graduation ? { graduation: decisionState.ratchetDecision.graduation } : {}),
|
|
398
|
+
};
|
|
399
|
+
if (decisionState.ratchetDecision.outcome === "accepted") {
|
|
400
|
+
const promotion = await preparePromotionArtifact({
|
|
401
|
+
candidateId: runRecord.candidateId,
|
|
402
|
+
runDir: context.runDir,
|
|
403
|
+
manifest: input.manifest,
|
|
404
|
+
workspaceManager: dependencies.workspaceManager,
|
|
405
|
+
});
|
|
406
|
+
runRecord = {
|
|
407
|
+
...runRecord,
|
|
408
|
+
proposal: {
|
|
409
|
+
...runRecord.proposal,
|
|
410
|
+
patchPath: promotion.patchPath,
|
|
411
|
+
changedPaths: promotion.changedPaths,
|
|
412
|
+
filesChanged: promotion.changedPaths.length,
|
|
413
|
+
},
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
auditQueue = buildAuditQueue(decisionState.ratchetDecision.metricId, decisionRecord, input.manifest, decisionState.packByMetricId);
|
|
417
|
+
decisionRecord = {
|
|
418
|
+
...decisionRecord,
|
|
419
|
+
auditRequired: auditQueue.length > 0,
|
|
420
|
+
};
|
|
421
|
+
await dependencies.decisionStore.put(decisionRecord);
|
|
422
|
+
runRecord = advanceRunPhase(runRecord, "decision_written", {
|
|
423
|
+
status: decisionState.ratchetDecision.outcome,
|
|
424
|
+
decisionId,
|
|
425
|
+
});
|
|
426
|
+
await dependencies.runStore.put(runRecord);
|
|
427
|
+
if (decisionState.ratchetDecision.outcome === "needs_human") {
|
|
428
|
+
return {
|
|
429
|
+
status: "needs_human",
|
|
430
|
+
run: runRecord,
|
|
431
|
+
decision: decisionRecord,
|
|
432
|
+
frontier,
|
|
433
|
+
auditQueue,
|
|
434
|
+
...(lastChangeBudget ? { changeBudget: lastChangeBudget } : {}),
|
|
435
|
+
...(lastAnchorCheck ? { anchorCheck: lastAnchorCheck } : {}),
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
break;
|
|
439
|
+
}
|
|
440
|
+
case "commit_candidate": {
|
|
441
|
+
if (runRecord.status !== "accepted") {
|
|
442
|
+
throw new Error(`cannot commit candidate for non-accepted run ${runRecord.runId}`);
|
|
443
|
+
}
|
|
444
|
+
if (!decisionRecord) {
|
|
445
|
+
decisionRecord = await requireDecisionRecord(dependencies.decisionStore, runRecord);
|
|
446
|
+
}
|
|
447
|
+
await dependencies.gitClient.applyPatchIfNeeded(requirePromotionPatch(runRecord));
|
|
448
|
+
const commitResult = await dependencies.gitClient.stageAndCommitPaths(requirePromotionPaths(runRecord), `rrx: accept ${runRecord.runId}`);
|
|
449
|
+
decisionRecord = {
|
|
450
|
+
...decisionRecord,
|
|
451
|
+
commitSha: commitResult.commitSha,
|
|
452
|
+
};
|
|
453
|
+
await dependencies.decisionStore.put(decisionRecord);
|
|
454
|
+
runRecord = advanceRunPhase(runRecord, "committed");
|
|
455
|
+
await dependencies.runStore.put(runRecord);
|
|
456
|
+
break;
|
|
457
|
+
}
|
|
458
|
+
case "update_frontier": {
|
|
459
|
+
if (!decisionRecord) {
|
|
460
|
+
decisionRecord = await requireDecisionRecord(dependencies.decisionStore, runRecord);
|
|
461
|
+
}
|
|
462
|
+
if (!decisionRecord.commitSha) {
|
|
463
|
+
throw new Error(`cannot update frontier for ${runRecord.runId}: missing commit sha`);
|
|
464
|
+
}
|
|
465
|
+
const candidateFrontierEntry = {
|
|
466
|
+
...buildAcceptedFrontierEntry({
|
|
467
|
+
runId: runRecord.runId,
|
|
468
|
+
candidateId: runRecord.candidateId,
|
|
469
|
+
acceptedAt: decisionRecord.createdAt,
|
|
470
|
+
metrics: runRecord.metrics,
|
|
471
|
+
artifacts: runRecord.artifacts,
|
|
472
|
+
}),
|
|
473
|
+
commitSha: decisionRecord.commitSha,
|
|
474
|
+
};
|
|
475
|
+
frontier = updateAcceptedFrontier(input.manifest, frontier, candidateFrontierEntry).entries;
|
|
476
|
+
await dependencies.frontierStore.save(frontier);
|
|
477
|
+
runRecord = advanceRunPhase(runRecord, "frontier_updated");
|
|
478
|
+
await dependencies.runStore.put(runRecord);
|
|
479
|
+
break;
|
|
480
|
+
}
|
|
481
|
+
case "cleanup_workspace": {
|
|
482
|
+
await dependencies.workspaceManager.cleanupWorkspace(runRecord.candidateId);
|
|
483
|
+
runRecord = advanceRunPhase(runRecord, "completed", {
|
|
484
|
+
status: runRecord.status,
|
|
485
|
+
});
|
|
486
|
+
await dependencies.runStore.put(runRecord);
|
|
487
|
+
return {
|
|
488
|
+
status: toCycleRunStatus(runRecord.status),
|
|
489
|
+
run: runRecord,
|
|
490
|
+
...(decisionRecord ? { decision: decisionRecord } : {}),
|
|
491
|
+
frontier,
|
|
492
|
+
auditQueue,
|
|
493
|
+
...(lastChangeBudget ? { changeBudget: lastChangeBudget } : {}),
|
|
494
|
+
...(lastAnchorCheck ? { anchorCheck: lastAnchorCheck } : {}),
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
case "none": {
|
|
498
|
+
if (runRecord.status === "needs_human") {
|
|
499
|
+
return {
|
|
500
|
+
status: "needs_human",
|
|
501
|
+
run: runRecord,
|
|
502
|
+
...(decisionRecord ? { decision: decisionRecord } : {}),
|
|
503
|
+
frontier,
|
|
504
|
+
auditQueue,
|
|
505
|
+
...(lastChangeBudget ? { changeBudget: lastChangeBudget } : {}),
|
|
506
|
+
...(lastAnchorCheck ? { anchorCheck: lastAnchorCheck } : {}),
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
if (runRecord.phase === "completed") {
|
|
510
|
+
return {
|
|
511
|
+
status: toCycleRunStatus(runRecord.status),
|
|
512
|
+
run: runRecord,
|
|
513
|
+
...(decisionRecord ? { decision: decisionRecord } : {}),
|
|
514
|
+
frontier,
|
|
515
|
+
auditQueue,
|
|
516
|
+
...(lastChangeBudget ? { changeBudget: lastChangeBudget } : {}),
|
|
517
|
+
...(lastAnchorCheck ? { anchorCheck: lastAnchorCheck } : {}),
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
throw new Error(`run ${runRecord.runId} is missing a resumable next action`);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
catch (error) {
|
|
526
|
+
runRecord = advanceRunPhase(runRecord, "failed", {
|
|
527
|
+
error: {
|
|
528
|
+
message: error instanceof Error ? error.message : String(error),
|
|
529
|
+
...(error instanceof Error && error.stack ? { stack: error.stack } : {}),
|
|
530
|
+
},
|
|
531
|
+
status: "failed",
|
|
532
|
+
});
|
|
533
|
+
await dependencies.runStore.put(runRecord);
|
|
534
|
+
return {
|
|
535
|
+
status: "failed",
|
|
536
|
+
run: runRecord,
|
|
537
|
+
frontier,
|
|
538
|
+
auditQueue: [],
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
function createRunContextFromRecord(repoRoot, manifest, run) {
|
|
543
|
+
return {
|
|
544
|
+
runId: run.runId,
|
|
545
|
+
cycle: run.cycle,
|
|
546
|
+
candidateId: run.candidateId,
|
|
547
|
+
runDir: join(resolve(repoRoot), manifest.storage.root, "runs", run.runId),
|
|
548
|
+
startedAt: run.startedAt,
|
|
549
|
+
manifestHash: run.manifestHash,
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
async function buildDecisionState(input) {
|
|
553
|
+
const workspacePath = requireWorkspacePath(input.runRecord);
|
|
554
|
+
const changeBudget = await evaluateChangeBudget({
|
|
555
|
+
workspacePath,
|
|
556
|
+
scope: input.input.manifest.scope,
|
|
557
|
+
});
|
|
558
|
+
const constraints = summarizeStoredConstraints(input.runRecord.constraints);
|
|
559
|
+
const { anchorChecks, packByMetricId } = await evaluateStoredAnchorChecks({
|
|
560
|
+
manifest: input.input.manifest,
|
|
561
|
+
manifestDir: input.manifestDir,
|
|
562
|
+
runDir: input.runDir,
|
|
563
|
+
...(input.dependencies.judgeProvider ? { judgeProvider: input.dependencies.judgeProvider } : {}),
|
|
564
|
+
});
|
|
565
|
+
let ratchetDecision = resolveDecision({
|
|
566
|
+
manifest: input.input.manifest,
|
|
567
|
+
metrics: input.runRecord.metrics,
|
|
568
|
+
currentFrontier: input.frontier,
|
|
569
|
+
constraints,
|
|
570
|
+
changeBudget,
|
|
571
|
+
priorConsecutiveAccepts: input.priorConsecutiveAccepts,
|
|
572
|
+
});
|
|
573
|
+
let anchorCheck;
|
|
574
|
+
if (ratchetDecision.outcome === "accepted") {
|
|
575
|
+
anchorCheck = anchorChecks.get(ratchetDecision.metricId);
|
|
576
|
+
if (anchorCheck) {
|
|
577
|
+
const gated = applyAnchorAgreementGate(ratchetDecision.outcome, anchorCheck);
|
|
578
|
+
ratchetDecision = {
|
|
579
|
+
...ratchetDecision,
|
|
580
|
+
outcome: gated.outcome,
|
|
581
|
+
frontierChanged: gated.outcome === "accepted",
|
|
582
|
+
reason: `${ratchetDecision.reason}; ${gated.reason}`,
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
return {
|
|
587
|
+
ratchetDecision,
|
|
588
|
+
changeBudget,
|
|
589
|
+
...(anchorCheck ? { anchorCheck } : {}),
|
|
590
|
+
packByMetricId,
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
async function evaluateStoredAnchorChecks(input) {
|
|
594
|
+
const anchorChecks = new Map();
|
|
595
|
+
const packByMetricId = new Map();
|
|
596
|
+
const anchorCache = new Map();
|
|
597
|
+
for (const metricDefinition of input.manifest.metrics.catalog) {
|
|
598
|
+
if (metricDefinition.extractor.type !== "llm_judge") {
|
|
599
|
+
continue;
|
|
600
|
+
}
|
|
601
|
+
if (!input.judgeProvider) {
|
|
602
|
+
throw new Error(`metric ${metricDefinition.id} requires a judge provider`);
|
|
603
|
+
}
|
|
604
|
+
const extractor = metricDefinition.extractor;
|
|
605
|
+
const pack = getJudgePack(input.manifest, extractor.judgePack);
|
|
606
|
+
packByMetricId.set(metricDefinition.id, pack);
|
|
607
|
+
if (!anchorCache.has(pack.id)) {
|
|
608
|
+
const anchors = pack.anchors
|
|
609
|
+
? await loadAnchorRecords(resolve(input.manifestDir, pack.anchors.path))
|
|
610
|
+
: [];
|
|
611
|
+
anchorCache.set(pack.id, await evaluateAnchorAgreement({
|
|
612
|
+
pack,
|
|
613
|
+
extractor,
|
|
614
|
+
provider: input.judgeProvider,
|
|
615
|
+
anchors,
|
|
616
|
+
}));
|
|
617
|
+
}
|
|
618
|
+
anchorChecks.set(metricDefinition.id, anchorCache.get(pack.id));
|
|
619
|
+
}
|
|
620
|
+
return {
|
|
621
|
+
anchorChecks,
|
|
622
|
+
packByMetricId,
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
function summarizeStoredConstraints(constraints) {
|
|
626
|
+
const failing = constraints.find((constraint) => !constraint.passed);
|
|
627
|
+
if (!failing) {
|
|
628
|
+
return {
|
|
629
|
+
passed: true,
|
|
630
|
+
reason: "all constraints satisfied",
|
|
631
|
+
};
|
|
632
|
+
}
|
|
633
|
+
return {
|
|
634
|
+
passed: false,
|
|
635
|
+
reason: `constraint ${failing.metric} failed`,
|
|
636
|
+
};
|
|
637
|
+
}
|
|
638
|
+
async function requireDecisionRecord(decisionStore, run) {
|
|
639
|
+
if (!run.decisionId) {
|
|
640
|
+
throw new Error(`run ${run.runId} is missing a decision id`);
|
|
641
|
+
}
|
|
642
|
+
const decision = await decisionStore.get(run.decisionId);
|
|
643
|
+
if (!decision) {
|
|
644
|
+
throw new Error(`decision ${run.decisionId} was not found`);
|
|
645
|
+
}
|
|
646
|
+
return decision;
|
|
647
|
+
}
|
|
648
|
+
function requireWorkspacePath(run) {
|
|
649
|
+
if (!run.workspacePath) {
|
|
650
|
+
throw new Error(`run ${run.runId} is missing a durable workspace path`);
|
|
651
|
+
}
|
|
652
|
+
return run.workspacePath;
|
|
653
|
+
}
|
|
654
|
+
function toCycleRunStatus(status) {
|
|
655
|
+
if (status === "accepted" || status === "rejected" || status === "needs_human" || status === "failed") {
|
|
656
|
+
return status;
|
|
657
|
+
}
|
|
658
|
+
throw new Error(`run ended without a terminal cycle status: ${status}`);
|
|
659
|
+
}
|
|
191
660
|
async function createRunContext(repoRoot, manifest, runStore, now) {
|
|
192
661
|
const runs = await runStore.list();
|
|
193
662
|
const nextCycle = runs.reduce((highest, run) => Math.max(highest, run.cycle), 0) + 1;
|
|
@@ -205,17 +674,19 @@ async function createRunContext(repoRoot, manifest, runStore, now) {
|
|
|
205
674
|
manifestHash,
|
|
206
675
|
};
|
|
207
676
|
}
|
|
208
|
-
function createInitialRunRecord(manifest, workspacePath, context) {
|
|
677
|
+
function createInitialRunRecord(manifest, resolvedBaselineRef, workspacePath, context) {
|
|
209
678
|
return {
|
|
210
679
|
runId: context.runId,
|
|
211
680
|
cycle: context.cycle,
|
|
212
681
|
candidateId: context.candidateId,
|
|
213
682
|
status: "running",
|
|
214
|
-
phase: "
|
|
215
|
-
pendingAction: "
|
|
683
|
+
phase: "started",
|
|
684
|
+
pendingAction: "prepare_proposal",
|
|
216
685
|
startedAt: context.startedAt,
|
|
686
|
+
updatedAt: context.startedAt,
|
|
687
|
+
currentStepStartedAt: context.startedAt,
|
|
217
688
|
manifestHash: context.manifestHash,
|
|
218
|
-
workspaceRef:
|
|
689
|
+
workspaceRef: resolvedBaselineRef,
|
|
219
690
|
...(workspacePath ? { workspacePath } : {}),
|
|
220
691
|
proposal: {
|
|
221
692
|
proposerType: manifest.proposer.type,
|
|
@@ -244,6 +715,7 @@ async function prepareCandidateAttempt(input) {
|
|
|
244
715
|
repoRoot: input.repoRoot,
|
|
245
716
|
manifestDir: input.manifestDir,
|
|
246
717
|
manifest: input.manifest,
|
|
718
|
+
resolvedBaselineRef: input.resolvedBaselineRef,
|
|
247
719
|
proposer: strategy,
|
|
248
720
|
candidateId,
|
|
249
721
|
runDir: input.runDir,
|
|
@@ -293,6 +765,7 @@ async function prepareCandidateAttempt(input) {
|
|
|
293
765
|
repoRoot: input.repoRoot,
|
|
294
766
|
manifestDir: input.manifestDir,
|
|
295
767
|
manifest: input.manifest,
|
|
768
|
+
resolvedBaselineRef: input.resolvedBaselineRef,
|
|
296
769
|
proposer: input.manifest.proposer,
|
|
297
770
|
candidateId: input.baseCandidateId,
|
|
298
771
|
runDir: input.runDir,
|
|
@@ -303,7 +776,7 @@ async function prepareCandidateAttempt(input) {
|
|
|
303
776
|
});
|
|
304
777
|
}
|
|
305
778
|
async function executeCandidateStrategy(input) {
|
|
306
|
-
const workspace = await input.workspaceManager.createWorkspace(input.candidateId);
|
|
779
|
+
const workspace = await input.workspaceManager.createWorkspace(input.candidateId, input.resolvedBaselineRef);
|
|
307
780
|
const proposal = await executeProposal(input.proposer, workspace.workspacePath, input.historyContext);
|
|
308
781
|
const proposeStdoutPath = await persistText(join(input.runDir, "logs", `${input.candidateId}.propose.stdout.log`), proposal.stdout);
|
|
309
782
|
const experiment = await runExperiment(input.manifest.experiment.run, {
|
|
@@ -533,16 +1006,6 @@ function resolveDecision(input) {
|
|
|
533
1006
|
...(input.constraints.passed ? {} : { constraintFailureReason: input.constraints.reason }),
|
|
534
1007
|
});
|
|
535
1008
|
}
|
|
536
|
-
function buildFrontierEntry(runId, candidateId, now, metrics, artifacts) {
|
|
537
|
-
return {
|
|
538
|
-
frontierId: `frontier-${runId}`,
|
|
539
|
-
runId,
|
|
540
|
-
candidateId,
|
|
541
|
-
acceptedAt: now().toISOString(),
|
|
542
|
-
metrics,
|
|
543
|
-
artifacts,
|
|
544
|
-
};
|
|
545
|
-
}
|
|
546
1009
|
function buildAuditQueue(metricId, decision, manifest, packByMetricId) {
|
|
547
1010
|
const pack = packByMetricId.get(metricId);
|
|
548
1011
|
if (!pack) {
|
|
@@ -620,6 +1083,19 @@ function stripConstraintReason(constraint) {
|
|
|
620
1083
|
op: constraint.op,
|
|
621
1084
|
};
|
|
622
1085
|
}
|
|
1086
|
+
function buildDecisionDiagnostics(metric) {
|
|
1087
|
+
if (!metric) {
|
|
1088
|
+
return undefined;
|
|
1089
|
+
}
|
|
1090
|
+
const diagnostics = summarizeMetricDiagnostics(metric);
|
|
1091
|
+
if (!diagnostics) {
|
|
1092
|
+
return undefined;
|
|
1093
|
+
}
|
|
1094
|
+
return {
|
|
1095
|
+
reasons: diagnostics.reasons,
|
|
1096
|
+
...(diagnostics.sourceMetricId ? { sourceMetricId: diagnostics.sourceMetricId } : {}),
|
|
1097
|
+
};
|
|
1098
|
+
}
|
|
623
1099
|
async function persistText(path, value) {
|
|
624
1100
|
await mkdir(dirname(path), { recursive: true });
|
|
625
1101
|
await writeFile(path, value, "utf8");
|
|
@@ -643,10 +1119,4 @@ function getReferenceMetric(manifest) {
|
|
|
643
1119
|
}
|
|
644
1120
|
return manifest.frontier.objectives[0].metric;
|
|
645
1121
|
}
|
|
646
|
-
function updateFrontier(manifest, currentFrontier, candidateEntry) {
|
|
647
|
-
if (manifest.frontier.strategy === "single_best") {
|
|
648
|
-
return updateSingleBestFrontier(currentFrontier, candidateEntry, manifest.frontier.primaryMetric);
|
|
649
|
-
}
|
|
650
|
-
return updateParetoFrontier(currentFrontier, candidateEntry, manifest.frontier.objectives, manifest.frontier.tieBreaker, manifest.frontier.referencePoint);
|
|
651
|
-
}
|
|
652
1122
|
//# sourceMappingURL=cycle-runner.js.map
|