ralph-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +98 -0
- package/dist/adapters/extractor/command-extractor.d.ts +9 -0
- package/dist/adapters/extractor/command-extractor.js +93 -0
- package/dist/adapters/extractor/command-extractor.js.map +1 -0
- package/dist/adapters/extractor/llm-judge-extractor.d.ts +9 -0
- package/dist/adapters/extractor/llm-judge-extractor.js +12 -0
- package/dist/adapters/extractor/llm-judge-extractor.js.map +1 -0
- package/dist/adapters/fs/json-file-decision-store.d.ts +10 -0
- package/dist/adapters/fs/json-file-decision-store.js +53 -0
- package/dist/adapters/fs/json-file-decision-store.js.map +1 -0
- package/dist/adapters/fs/json-file-frontier-store.d.ts +8 -0
- package/dist/adapters/fs/json-file-frontier-store.js +29 -0
- package/dist/adapters/fs/json-file-frontier-store.js.map +1 -0
- package/dist/adapters/fs/json-file-run-store.d.ts +10 -0
- package/dist/adapters/fs/json-file-run-store.js +53 -0
- package/dist/adapters/fs/json-file-run-store.js.map +1 -0
- package/dist/adapters/fs/lockfile.d.ts +24 -0
- package/dist/adapters/fs/lockfile.js +110 -0
- package/dist/adapters/fs/lockfile.js.map +1 -0
- package/dist/adapters/fs/manifest-loader.d.ts +10 -0
- package/dist/adapters/fs/manifest-loader.js +43 -0
- package/dist/adapters/fs/manifest-loader.js.map +1 -0
- package/dist/adapters/git/git-client.d.ts +9 -0
- package/dist/adapters/git/git-client.js +23 -0
- package/dist/adapters/git/git-client.js.map +1 -0
- package/dist/adapters/index.d.ts +1 -0
- package/dist/adapters/index.js +3 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/judge/llm-judge-provider.d.ts +33 -0
- package/dist/adapters/judge/llm-judge-provider.js +90 -0
- package/dist/adapters/judge/llm-judge-provider.js.map +1 -0
- package/dist/adapters/proposer/command-proposer.d.ts +15 -0
- package/dist/adapters/proposer/command-proposer.js +29 -0
- package/dist/adapters/proposer/command-proposer.js.map +1 -0
- package/dist/app/context.d.ts +5 -0
- package/dist/app/context.js +7 -0
- package/dist/app/context.js.map +1 -0
- package/dist/app/services/manual-decision-service.d.ts +20 -0
- package/dist/app/services/manual-decision-service.js +143 -0
- package/dist/app/services/manual-decision-service.js.map +1 -0
- package/dist/app/services/project-state-service.d.ts +52 -0
- package/dist/app/services/project-state-service.js +92 -0
- package/dist/app/services/project-state-service.js.map +1 -0
- package/dist/app/services/run-cycle-service.d.ts +25 -0
- package/dist/app/services/run-cycle-service.js +69 -0
- package/dist/app/services/run-cycle-service.js.map +1 -0
- package/dist/cli/commands/accept.d.ts +10 -0
- package/dist/cli/commands/accept.js +54 -0
- package/dist/cli/commands/accept.js.map +1 -0
- package/dist/cli/commands/demo.d.ts +9 -0
- package/dist/cli/commands/demo.js +108 -0
- package/dist/cli/commands/demo.js.map +1 -0
- package/dist/cli/commands/frontier.d.ts +8 -0
- package/dist/cli/commands/frontier.js +48 -0
- package/dist/cli/commands/frontier.js.map +1 -0
- package/dist/cli/commands/init.d.ts +10 -0
- package/dist/cli/commands/init.js +123 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/inspect.d.ts +8 -0
- package/dist/cli/commands/inspect.js +55 -0
- package/dist/cli/commands/inspect.js.map +1 -0
- package/dist/cli/commands/reject.d.ts +10 -0
- package/dist/cli/commands/reject.js +54 -0
- package/dist/cli/commands/reject.js.map +1 -0
- package/dist/cli/commands/run.d.ts +13 -0
- package/dist/cli/commands/run.js +71 -0
- package/dist/cli/commands/run.js.map +1 -0
- package/dist/cli/commands/serve-mcp.d.ts +7 -0
- package/dist/cli/commands/serve-mcp.js +32 -0
- package/dist/cli/commands/serve-mcp.js.map +1 -0
- package/dist/cli/commands/status.d.ts +8 -0
- package/dist/cli/commands/status.js +53 -0
- package/dist/cli/commands/status.js.map +1 -0
- package/dist/cli/commands/validate.d.ts +11 -0
- package/dist/cli/commands/validate.js +56 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/main.d.ts +2 -0
- package/dist/cli/main.js +38 -0
- package/dist/cli/main.js.map +1 -0
- package/dist/core/engine/anchor-checker.d.ts +35 -0
- package/dist/core/engine/anchor-checker.js +84 -0
- package/dist/core/engine/anchor-checker.js.map +1 -0
- package/dist/core/engine/audit-sampler.d.ts +16 -0
- package/dist/core/engine/audit-sampler.js +25 -0
- package/dist/core/engine/audit-sampler.js.map +1 -0
- package/dist/core/engine/change-budget.d.ts +11 -0
- package/dist/core/engine/change-budget.js +10 -0
- package/dist/core/engine/change-budget.js.map +1 -0
- package/dist/core/engine/cycle-runner.d.ts +39 -0
- package/dist/core/engine/cycle-runner.js +652 -0
- package/dist/core/engine/cycle-runner.js.map +1 -0
- package/dist/core/engine/experiment-runner.d.ts +13 -0
- package/dist/core/engine/experiment-runner.js +24 -0
- package/dist/core/engine/experiment-runner.js.map +1 -0
- package/dist/core/engine/history-compactor.d.ts +15 -0
- package/dist/core/engine/history-compactor.js +76 -0
- package/dist/core/engine/history-compactor.js.map +1 -0
- package/dist/core/engine/judge-pack.d.ts +44 -0
- package/dist/core/engine/judge-pack.js +111 -0
- package/dist/core/engine/judge-pack.js.map +1 -0
- package/dist/core/engine/parallel-proposer.d.ts +21 -0
- package/dist/core/engine/parallel-proposer.js +58 -0
- package/dist/core/engine/parallel-proposer.js.map +1 -0
- package/dist/core/engine/scope-checker.d.ts +35 -0
- package/dist/core/engine/scope-checker.js +166 -0
- package/dist/core/engine/scope-checker.js.map +1 -0
- package/dist/core/engine/workspace-manager.d.ts +32 -0
- package/dist/core/engine/workspace-manager.js +145 -0
- package/dist/core/engine/workspace-manager.js.map +1 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.js +3 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/manifest/defaults.d.ts +55 -0
- package/dist/core/manifest/defaults.js +56 -0
- package/dist/core/manifest/defaults.js.map +1 -0
- package/dist/core/manifest/schema.d.ts +647 -0
- package/dist/core/manifest/schema.js +254 -0
- package/dist/core/manifest/schema.js.map +1 -0
- package/dist/core/model/decision-record.d.ts +38 -0
- package/dist/core/model/decision-record.js +29 -0
- package/dist/core/model/decision-record.js.map +1 -0
- package/dist/core/model/frontier-entry.d.ts +24 -0
- package/dist/core/model/frontier-entry.js +15 -0
- package/dist/core/model/frontier-entry.js.map +1 -0
- package/dist/core/model/metric.d.ts +13 -0
- package/dist/core/model/metric.js +10 -0
- package/dist/core/model/metric.js.map +1 -0
- package/dist/core/model/run-record.d.ts +110 -0
- package/dist/core/model/run-record.js +104 -0
- package/dist/core/model/run-record.js.map +1 -0
- package/dist/core/ports/decision-store.d.ts +6 -0
- package/dist/core/ports/decision-store.js +2 -0
- package/dist/core/ports/decision-store.js.map +1 -0
- package/dist/core/ports/frontier-store.d.ts +5 -0
- package/dist/core/ports/frontier-store.js +2 -0
- package/dist/core/ports/frontier-store.js.map +1 -0
- package/dist/core/ports/run-store.d.ts +6 -0
- package/dist/core/ports/run-store.js +2 -0
- package/dist/core/ports/run-store.js.map +1 -0
- package/dist/core/state/constraint-engine.d.ts +18 -0
- package/dist/core/state/constraint-engine.js +42 -0
- package/dist/core/state/constraint-engine.js.map +1 -0
- package/dist/core/state/frontier-engine.d.ts +24 -0
- package/dist/core/state/frontier-engine.js +178 -0
- package/dist/core/state/frontier-engine.js.map +1 -0
- package/dist/core/state/ratchet-engine.d.ts +28 -0
- package/dist/core/state/ratchet-engine.js +177 -0
- package/dist/core/state/ratchet-engine.js.map +1 -0
- package/dist/core/state/run-state-machine.d.ts +17 -0
- package/dist/core/state/run-state-machine.js +94 -0
- package/dist/core/state/run-state-machine.js.map +1 -0
- package/dist/mcp/main.d.ts +1 -0
- package/dist/mcp/main.js +8 -0
- package/dist/mcp/main.js.map +1 -0
- package/dist/mcp/server.d.ts +6 -0
- package/dist/mcp/server.js +97 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/shared/fs-errors.d.ts +1 -0
- package/dist/shared/fs-errors.js +4 -0
- package/dist/shared/fs-errors.js.map +1 -0
- package/dist/shared/logger.d.ts +2 -0
- package/dist/shared/logger.js +5 -0
- package/dist/shared/logger.js.map +1 -0
- package/dist/shared/template-utils.d.ts +9 -0
- package/dist/shared/template-utils.js +50 -0
- package/dist/shared/template-utils.js.map +1 -0
- package/package.json +44 -0
- package/templates/writing/docs/draft.md +1 -0
- package/templates/writing/prompts/judge.md +15 -0
- package/templates/writing/ralph.yaml +63 -0
- package/templates/writing/scripts/experiment.mjs +6 -0
- package/templates/writing/scripts/metric.mjs +24 -0
- package/templates/writing/scripts/propose.mjs +13 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { copyFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { dirname, extname, join, resolve } from "node:path";
|
|
4
|
+
import { evaluateAnchorAgreement, applyAnchorAgreementGate, loadAnchorRecords } from "./anchor-checker.js";
|
|
5
|
+
import { sampleAuditQueue } from "./audit-sampler.js";
|
|
6
|
+
import { evaluateChangeBudget } from "./change-budget.js";
|
|
7
|
+
import { compactRecentHistory, countConsecutiveAutoAccepts } from "./history-compactor.js";
|
|
8
|
+
import { runExperiment } from "./experiment-runner.js";
|
|
9
|
+
import { runLlmJudgeMetric } from "./judge-pack.js";
|
|
10
|
+
import { runParallelProposers } from "./parallel-proposer.js";
|
|
11
|
+
import { extractCommandMetric } from "../../adapters/extractor/command-extractor.js";
|
|
12
|
+
import { runCommandProposer } from "../../adapters/proposer/command-proposer.js";
|
|
13
|
+
import { evaluateConstraints } from "../state/constraint-engine.js";
|
|
14
|
+
import { updateParetoFrontier, updateSingleBestFrontier } from "../state/frontier-engine.js";
|
|
15
|
+
import { evaluateRatchet } from "../state/ratchet-engine.js";
|
|
16
|
+
import { advanceRunPhase } from "../state/run-state-machine.js";
|
|
17
|
+
export async function runCycle(input, dependencies) {
|
|
18
|
+
const now = dependencies.now ?? (() => new Date());
|
|
19
|
+
const context = await createRunContext(input.repoRoot, input.manifest, dependencies.runStore, now);
|
|
20
|
+
const manifestDir = dirname(input.manifestPath);
|
|
21
|
+
const referenceMetric = getReferenceMetric(input.manifest);
|
|
22
|
+
const priorRuns = await dependencies.runStore.list();
|
|
23
|
+
const priorDecisions = await dependencies.decisionStore.list();
|
|
24
|
+
const priorConsecutiveAccepts = countConsecutiveAutoAccepts(priorDecisions, {
|
|
25
|
+
metricId: "metric" in input.manifest.ratchet ? input.manifest.ratchet.metric ?? referenceMetric : referenceMetric,
|
|
26
|
+
});
|
|
27
|
+
let runRecord = createInitialRunRecord(input.manifest, undefined, context);
|
|
28
|
+
await dependencies.runStore.put(runRecord);
|
|
29
|
+
let frontier = input.currentFrontier;
|
|
30
|
+
try {
|
|
31
|
+
const proposerHistory = await buildProposerHistoryContext({
|
|
32
|
+
manifest: input.manifest,
|
|
33
|
+
runDir: context.runDir,
|
|
34
|
+
runs: priorRuns,
|
|
35
|
+
decisions: priorDecisions,
|
|
36
|
+
primaryMetric: referenceMetric,
|
|
37
|
+
});
|
|
38
|
+
const selectedCandidate = await prepareCandidateAttempt({
|
|
39
|
+
repoRoot: input.repoRoot,
|
|
40
|
+
manifestDir,
|
|
41
|
+
manifest: input.manifest,
|
|
42
|
+
runDir: context.runDir,
|
|
43
|
+
workspaceManager: dependencies.workspaceManager,
|
|
44
|
+
currentFrontier: frontier,
|
|
45
|
+
baseCandidateId: context.candidateId,
|
|
46
|
+
referenceMetric,
|
|
47
|
+
...(proposerHistory ? { historyContext: proposerHistory } : {}),
|
|
48
|
+
...(dependencies.judgeProvider ? { judgeProvider: dependencies.judgeProvider } : {}),
|
|
49
|
+
});
|
|
50
|
+
runRecord = {
|
|
51
|
+
...runRecord,
|
|
52
|
+
candidateId: selectedCandidate.candidateId,
|
|
53
|
+
workspacePath: selectedCandidate.workspacePath,
|
|
54
|
+
proposal: {
|
|
55
|
+
...runRecord.proposal,
|
|
56
|
+
proposerType: input.manifest.proposer.type,
|
|
57
|
+
summary: selectedCandidate.summary,
|
|
58
|
+
operators: selectedCandidate.operators,
|
|
59
|
+
},
|
|
60
|
+
logs: {
|
|
61
|
+
...runRecord.logs,
|
|
62
|
+
proposeStdoutPath: selectedCandidate.proposeStdoutPath,
|
|
63
|
+
runStdoutPath: selectedCandidate.runStdoutPath,
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
runRecord = advanceRunPhase(runRecord, "executed");
|
|
67
|
+
await dependencies.runStore.put(runRecord);
|
|
68
|
+
let ratchetDecision = resolveDecision({
|
|
69
|
+
manifest: input.manifest,
|
|
70
|
+
metrics: selectedCandidate.metrics,
|
|
71
|
+
currentFrontier: frontier,
|
|
72
|
+
constraints: selectedCandidate.constraints,
|
|
73
|
+
changeBudget: selectedCandidate.changeBudget,
|
|
74
|
+
priorConsecutiveAccepts,
|
|
75
|
+
});
|
|
76
|
+
let anchorCheck;
|
|
77
|
+
if (ratchetDecision.outcome === "accepted") {
|
|
78
|
+
anchorCheck = selectedCandidate.anchorChecks.get(ratchetDecision.metricId);
|
|
79
|
+
if (anchorCheck) {
|
|
80
|
+
const gated = applyAnchorAgreementGate(ratchetDecision.outcome, anchorCheck);
|
|
81
|
+
ratchetDecision = {
|
|
82
|
+
...ratchetDecision,
|
|
83
|
+
outcome: gated.outcome,
|
|
84
|
+
frontierChanged: gated.outcome === "accepted",
|
|
85
|
+
reason: `${ratchetDecision.reason}; ${gated.reason}`,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
runRecord = advanceRunPhase({
|
|
90
|
+
...runRecord,
|
|
91
|
+
proposal: {
|
|
92
|
+
...runRecord.proposal,
|
|
93
|
+
diffLines: selectedCandidate.changeBudget.summary.totalLineDelta,
|
|
94
|
+
filesChanged: selectedCandidate.changeBudget.summary.filesChanged,
|
|
95
|
+
changedPaths: selectedCandidate.changeBudget.summary.entries.map((entry) => entry.path),
|
|
96
|
+
withinBudget: selectedCandidate.changeBudget.withinBudget,
|
|
97
|
+
},
|
|
98
|
+
metrics: selectedCandidate.metrics,
|
|
99
|
+
constraints: selectedCandidate.constraints.results.map(stripConstraintReason),
|
|
100
|
+
artifacts: selectedCandidate.artifacts,
|
|
101
|
+
}, "evaluated", {
|
|
102
|
+
status: ratchetDecision.outcome,
|
|
103
|
+
});
|
|
104
|
+
await dependencies.runStore.put(runRecord);
|
|
105
|
+
const decisionId = `decision-${context.runId}`;
|
|
106
|
+
const candidateFrontierEntry = buildFrontierEntry(context.runId, selectedCandidate.candidateId, now, selectedCandidate.metrics, selectedCandidate.artifacts);
|
|
107
|
+
const frontierUpdate = ratchetDecision.outcome === "accepted"
|
|
108
|
+
? updateFrontier(input.manifest, frontier, candidateFrontierEntry)
|
|
109
|
+
: null;
|
|
110
|
+
let decisionRecord = {
|
|
111
|
+
decisionId,
|
|
112
|
+
runId: context.runId,
|
|
113
|
+
outcome: ratchetDecision.outcome,
|
|
114
|
+
actorType: "system",
|
|
115
|
+
policyType: ratchetDecision.policyType,
|
|
116
|
+
metricId: ratchetDecision.metricId,
|
|
117
|
+
...(ratchetDecision.delta === undefined ? {} : { delta: ratchetDecision.delta }),
|
|
118
|
+
reason: ratchetDecision.reason,
|
|
119
|
+
createdAt: now().toISOString(),
|
|
120
|
+
frontierChanged: frontierUpdate?.comparison.frontierChanged ?? false,
|
|
121
|
+
beforeFrontierIds: frontier.map((entry) => entry.frontierId),
|
|
122
|
+
afterFrontierIds: (frontierUpdate?.entries ?? frontier).map((entry) => entry.frontierId),
|
|
123
|
+
auditRequired: false,
|
|
124
|
+
...(ratchetDecision.graduation ? { graduation: ratchetDecision.graduation } : {}),
|
|
125
|
+
};
|
|
126
|
+
let auditQueue = buildAuditQueue(ratchetDecision.metricId, decisionRecord, input.manifest, selectedCandidate.packByMetricId);
|
|
127
|
+
decisionRecord = {
|
|
128
|
+
...decisionRecord,
|
|
129
|
+
auditRequired: auditQueue.length > 0,
|
|
130
|
+
};
|
|
131
|
+
await dependencies.decisionStore.put(decisionRecord);
|
|
132
|
+
runRecord = advanceRunPhase(runRecord, "decision_written", {
|
|
133
|
+
status: ratchetDecision.outcome,
|
|
134
|
+
decisionId,
|
|
135
|
+
});
|
|
136
|
+
await dependencies.runStore.put(runRecord);
|
|
137
|
+
if (ratchetDecision.outcome === "accepted" && frontierUpdate) {
|
|
138
|
+
const promoted = await dependencies.workspaceManager.promoteWorkspace(selectedCandidate.candidateId, {
|
|
139
|
+
excludePaths: input.manifest.experiment.outputs.map((output) => output.path),
|
|
140
|
+
});
|
|
141
|
+
const commitResult = await dependencies.gitClient.stageAndCommitPaths([...promoted.copiedPaths, ...promoted.deletedPaths], `rrx: accept ${context.runId}`);
|
|
142
|
+
decisionRecord = {
|
|
143
|
+
...decisionRecord,
|
|
144
|
+
commitSha: commitResult.commitSha,
|
|
145
|
+
};
|
|
146
|
+
await dependencies.decisionStore.put(decisionRecord);
|
|
147
|
+
frontier = frontierUpdate.entries.map((entry) => ({
|
|
148
|
+
...entry,
|
|
149
|
+
commitSha: commitResult.commitSha,
|
|
150
|
+
}));
|
|
151
|
+
runRecord = advanceRunPhase(runRecord, "committed");
|
|
152
|
+
await dependencies.runStore.put(runRecord);
|
|
153
|
+
await dependencies.frontierStore.save(frontier);
|
|
154
|
+
runRecord = advanceRunPhase(runRecord, "frontier_updated");
|
|
155
|
+
await dependencies.runStore.put(runRecord);
|
|
156
|
+
}
|
|
157
|
+
if (ratchetDecision.outcome !== "needs_human") {
|
|
158
|
+
await dependencies.workspaceManager.cleanupWorkspace(selectedCandidate.candidateId);
|
|
159
|
+
}
|
|
160
|
+
runRecord = advanceRunPhase(runRecord, "completed", {
|
|
161
|
+
status: ratchetDecision.outcome,
|
|
162
|
+
});
|
|
163
|
+
await dependencies.runStore.put(runRecord);
|
|
164
|
+
return {
|
|
165
|
+
status: ratchetDecision.outcome,
|
|
166
|
+
run: runRecord,
|
|
167
|
+
decision: decisionRecord,
|
|
168
|
+
frontier,
|
|
169
|
+
auditQueue,
|
|
170
|
+
changeBudget: selectedCandidate.changeBudget,
|
|
171
|
+
...(anchorCheck ? { anchorCheck } : {}),
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
catch (error) {
|
|
175
|
+
runRecord = advanceRunPhase(runRecord, "failed", {
|
|
176
|
+
error: {
|
|
177
|
+
message: error instanceof Error ? error.message : String(error),
|
|
178
|
+
...(error instanceof Error && error.stack ? { stack: error.stack } : {}),
|
|
179
|
+
},
|
|
180
|
+
status: "failed",
|
|
181
|
+
});
|
|
182
|
+
await dependencies.runStore.put(runRecord);
|
|
183
|
+
return {
|
|
184
|
+
status: "failed",
|
|
185
|
+
run: runRecord,
|
|
186
|
+
frontier,
|
|
187
|
+
auditQueue: [],
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
async function createRunContext(repoRoot, manifest, runStore, now) {
|
|
192
|
+
const runs = await runStore.list();
|
|
193
|
+
const nextCycle = runs.reduce((highest, run) => Math.max(highest, run.cycle), 0) + 1;
|
|
194
|
+
const runId = `run-${String(nextCycle).padStart(4, "0")}`;
|
|
195
|
+
const candidateId = `candidate-${String(nextCycle).padStart(4, "0")}`;
|
|
196
|
+
const runDir = join(resolve(repoRoot), manifest.storage.root, "runs", runId);
|
|
197
|
+
const startedAt = now().toISOString();
|
|
198
|
+
const manifestHash = createHash("sha256").update(JSON.stringify(manifest)).digest("hex");
|
|
199
|
+
return {
|
|
200
|
+
runId,
|
|
201
|
+
cycle: nextCycle,
|
|
202
|
+
candidateId,
|
|
203
|
+
runDir,
|
|
204
|
+
startedAt,
|
|
205
|
+
manifestHash,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
function createInitialRunRecord(manifest, workspacePath, context) {
|
|
209
|
+
return {
|
|
210
|
+
runId: context.runId,
|
|
211
|
+
cycle: context.cycle,
|
|
212
|
+
candidateId: context.candidateId,
|
|
213
|
+
status: "running",
|
|
214
|
+
phase: "proposed",
|
|
215
|
+
pendingAction: "execute_experiment",
|
|
216
|
+
startedAt: context.startedAt,
|
|
217
|
+
manifestHash: context.manifestHash,
|
|
218
|
+
workspaceRef: manifest.project.baselineRef,
|
|
219
|
+
...(workspacePath ? { workspacePath } : {}),
|
|
220
|
+
proposal: {
|
|
221
|
+
proposerType: manifest.proposer.type,
|
|
222
|
+
summary: "proposal pending",
|
|
223
|
+
operators: manifest.proposer.type === "operator_llm"
|
|
224
|
+
? manifest.proposer.operators
|
|
225
|
+
: manifest.proposer.type === "parallel"
|
|
226
|
+
? []
|
|
227
|
+
: [],
|
|
228
|
+
},
|
|
229
|
+
artifacts: [],
|
|
230
|
+
metrics: {},
|
|
231
|
+
constraints: [],
|
|
232
|
+
logs: {},
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
async function prepareCandidateAttempt(input) {
|
|
236
|
+
if (input.manifest.proposer.type === "parallel") {
|
|
237
|
+
const selection = await runParallelProposers({
|
|
238
|
+
strategies: input.manifest.proposer.strategies,
|
|
239
|
+
pickBest: input.manifest.proposer.pickBest,
|
|
240
|
+
referenceMetric: input.referenceMetric,
|
|
241
|
+
execute: async (strategy, index) => {
|
|
242
|
+
const candidateId = `${input.baseCandidateId}-p${String(index + 1).padStart(2, "0")}`;
|
|
243
|
+
const candidate = await executeCandidateStrategy({
|
|
244
|
+
repoRoot: input.repoRoot,
|
|
245
|
+
manifestDir: input.manifestDir,
|
|
246
|
+
manifest: input.manifest,
|
|
247
|
+
proposer: strategy,
|
|
248
|
+
candidateId,
|
|
249
|
+
runDir: input.runDir,
|
|
250
|
+
workspaceManager: input.workspaceManager,
|
|
251
|
+
currentFrontier: input.currentFrontier,
|
|
252
|
+
...(input.judgeProvider ? { judgeProvider: input.judgeProvider } : {}),
|
|
253
|
+
...(input.historyContext ? { historyContext: input.historyContext } : {}),
|
|
254
|
+
});
|
|
255
|
+
return {
|
|
256
|
+
strategyIndex: index,
|
|
257
|
+
strategyType: strategy.type,
|
|
258
|
+
candidate,
|
|
259
|
+
metrics: candidate.metrics,
|
|
260
|
+
summary: candidate.summary,
|
|
261
|
+
};
|
|
262
|
+
},
|
|
263
|
+
...(input.manifest.proposer.pickBest === "judge_pairwise"
|
|
264
|
+
? {
|
|
265
|
+
comparePairwise: async (left, right) => compareCandidateAttempts({
|
|
266
|
+
manifest: input.manifest,
|
|
267
|
+
manifestDir: input.manifestDir,
|
|
268
|
+
referenceMetric: input.referenceMetric,
|
|
269
|
+
...(input.judgeProvider ? { judgeProvider: input.judgeProvider } : {}),
|
|
270
|
+
}, left.candidate, right.candidate),
|
|
271
|
+
}
|
|
272
|
+
: {}),
|
|
273
|
+
});
|
|
274
|
+
const hydratedCandidates = selection.candidates.map((candidate) => ({
|
|
275
|
+
...candidate,
|
|
276
|
+
metrics: candidate.candidate.metrics,
|
|
277
|
+
summary: candidate.candidate.summary,
|
|
278
|
+
}));
|
|
279
|
+
const selected = hydratedCandidates.find((candidate) => candidate.strategyIndex === selection.selected.strategyIndex);
|
|
280
|
+
if (!selected) {
|
|
281
|
+
throw new Error("selected parallel candidate missing from candidate set");
|
|
282
|
+
}
|
|
283
|
+
await Promise.all(hydratedCandidates
|
|
284
|
+
.filter((candidate) => candidate.strategyIndex !== selected.strategyIndex)
|
|
285
|
+
.map((candidate) => input.workspaceManager.cleanupWorkspace(candidate.candidate.candidateId)));
|
|
286
|
+
return {
|
|
287
|
+
...selected.candidate,
|
|
288
|
+
proposerType: "parallel",
|
|
289
|
+
summary: `${selected.candidate.summary}; ${selection.selectionReason}`,
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
return executeCandidateStrategy({
|
|
293
|
+
repoRoot: input.repoRoot,
|
|
294
|
+
manifestDir: input.manifestDir,
|
|
295
|
+
manifest: input.manifest,
|
|
296
|
+
proposer: input.manifest.proposer,
|
|
297
|
+
candidateId: input.baseCandidateId,
|
|
298
|
+
runDir: input.runDir,
|
|
299
|
+
workspaceManager: input.workspaceManager,
|
|
300
|
+
currentFrontier: input.currentFrontier,
|
|
301
|
+
...(input.historyContext ? { historyContext: input.historyContext } : {}),
|
|
302
|
+
...(input.judgeProvider ? { judgeProvider: input.judgeProvider } : {}),
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
async function executeCandidateStrategy(input) {
|
|
306
|
+
const workspace = await input.workspaceManager.createWorkspace(input.candidateId);
|
|
307
|
+
const proposal = await executeProposal(input.proposer, workspace.workspacePath, input.historyContext);
|
|
308
|
+
const proposeStdoutPath = await persistText(join(input.runDir, "logs", `${input.candidateId}.propose.stdout.log`), proposal.stdout);
|
|
309
|
+
const experiment = await runExperiment(input.manifest.experiment.run, {
|
|
310
|
+
workspacePath: workspace.workspacePath,
|
|
311
|
+
});
|
|
312
|
+
const runStdoutPath = await persistText(join(input.runDir, "logs", `${input.candidateId}.experiment.stdout.log`), experiment.stdout);
|
|
313
|
+
const metricEvaluation = await evaluateMetrics({
|
|
314
|
+
repoRoot: input.repoRoot,
|
|
315
|
+
manifestDir: input.manifestDir,
|
|
316
|
+
manifest: input.manifest,
|
|
317
|
+
currentFrontier: input.currentFrontier,
|
|
318
|
+
workspacePath: workspace.workspacePath,
|
|
319
|
+
runDir: join(input.runDir, "judge", input.candidateId),
|
|
320
|
+
...(input.judgeProvider ? { judgeProvider: input.judgeProvider } : {}),
|
|
321
|
+
});
|
|
322
|
+
const artifacts = await snapshotArtifacts(input.manifest.experiment.outputs, workspace.workspacePath, join(input.runDir, "artifacts", input.candidateId));
|
|
323
|
+
const constraints = evaluateConstraints(input.manifest.constraints, metricEvaluation.metrics);
|
|
324
|
+
const changeBudget = await evaluateChangeBudget({
|
|
325
|
+
workspacePath: workspace.workspacePath,
|
|
326
|
+
scope: input.manifest.scope,
|
|
327
|
+
});
|
|
328
|
+
return {
|
|
329
|
+
candidateId: input.candidateId,
|
|
330
|
+
workspacePath: workspace.workspacePath,
|
|
331
|
+
proposerType: input.proposer.type,
|
|
332
|
+
operators: input.proposer.type === "operator_llm" ? input.proposer.operators : [],
|
|
333
|
+
summary: input.historyContext ? `${proposal.summary}; history_context=enabled` : proposal.summary,
|
|
334
|
+
proposeStdoutPath,
|
|
335
|
+
runStdoutPath,
|
|
336
|
+
metrics: metricEvaluation.metrics,
|
|
337
|
+
artifacts,
|
|
338
|
+
constraints,
|
|
339
|
+
changeBudget,
|
|
340
|
+
anchorChecks: metricEvaluation.anchorChecks,
|
|
341
|
+
packByMetricId: metricEvaluation.packByMetricId,
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
async function executeProposal(proposer, workspacePath, historyContext) {
|
|
345
|
+
if (proposer.type !== "command") {
|
|
346
|
+
throw new Error(`unsupported proposer type ${proposer.type} in cycle runner`);
|
|
347
|
+
}
|
|
348
|
+
return runCommandProposer(proposer, {
|
|
349
|
+
workspacePath,
|
|
350
|
+
...(historyContext
|
|
351
|
+
? {
|
|
352
|
+
env: {
|
|
353
|
+
RRX_HISTORY_ENABLED: "1",
|
|
354
|
+
RRX_HISTORY_SUMMARY: historyContext.summary,
|
|
355
|
+
RRX_HISTORY_PATH: historyContext.path,
|
|
356
|
+
},
|
|
357
|
+
}
|
|
358
|
+
: {}),
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
async function compareCandidateAttempts(input, left, right) {
|
|
362
|
+
const metricDefinition = input.manifest.metrics.catalog.find((metric) => metric.id === input.referenceMetric);
|
|
363
|
+
if (!metricDefinition) {
|
|
364
|
+
throw new Error(`missing reference metric definition ${input.referenceMetric}`);
|
|
365
|
+
}
|
|
366
|
+
if (metricDefinition.kind !== "llm_score" ||
|
|
367
|
+
metricDefinition.extractor.type !== "llm_judge" ||
|
|
368
|
+
metricDefinition.extractor.mode !== "pairwise") {
|
|
369
|
+
return compareByMetric(left, right, input.referenceMetric);
|
|
370
|
+
}
|
|
371
|
+
if (!input.judgeProvider) {
|
|
372
|
+
throw new Error("parallel proposer pickBest=judge_pairwise requires a judge provider");
|
|
373
|
+
}
|
|
374
|
+
const extractor = metricDefinition.extractor;
|
|
375
|
+
const pack = getJudgePack(input.manifest, extractor.judgePack);
|
|
376
|
+
const prompt = await buildJudgePrompt({
|
|
377
|
+
repoRoot: process.cwd(),
|
|
378
|
+
manifestDir: input.manifestDir,
|
|
379
|
+
workspacePath: left.workspacePath,
|
|
380
|
+
extractor,
|
|
381
|
+
frontier: [
|
|
382
|
+
{
|
|
383
|
+
frontierId: `parallel-${right.candidateId}`,
|
|
384
|
+
runId: `parallel-${right.candidateId}`,
|
|
385
|
+
candidateId: right.candidateId,
|
|
386
|
+
acceptedAt: new Date(0).toISOString(),
|
|
387
|
+
metrics: right.metrics,
|
|
388
|
+
artifacts: right.artifacts,
|
|
389
|
+
},
|
|
390
|
+
],
|
|
391
|
+
});
|
|
392
|
+
const result = await runLlmJudgeMetric({
|
|
393
|
+
metricId: metricDefinition.id,
|
|
394
|
+
direction: metricDefinition.direction,
|
|
395
|
+
extractor,
|
|
396
|
+
pack,
|
|
397
|
+
prompt,
|
|
398
|
+
provider: input.judgeProvider,
|
|
399
|
+
});
|
|
400
|
+
const winner = result.details.winner;
|
|
401
|
+
if (winner === "candidate") {
|
|
402
|
+
return "left";
|
|
403
|
+
}
|
|
404
|
+
if (winner === "incumbent") {
|
|
405
|
+
return "right";
|
|
406
|
+
}
|
|
407
|
+
return compareByMetric(left, right, input.referenceMetric);
|
|
408
|
+
}
|
|
409
|
+
function compareByMetric(left, right, metricId) {
|
|
410
|
+
const leftMetric = left.metrics[metricId];
|
|
411
|
+
const rightMetric = right.metrics[metricId];
|
|
412
|
+
if (!leftMetric || !rightMetric) {
|
|
413
|
+
throw new Error(`parallel proposer comparison requires metric "${metricId}" on both candidates`);
|
|
414
|
+
}
|
|
415
|
+
if (leftMetric.direction === "maximize") {
|
|
416
|
+
if (leftMetric.value > rightMetric.value) {
|
|
417
|
+
return "left";
|
|
418
|
+
}
|
|
419
|
+
if (leftMetric.value < rightMetric.value) {
|
|
420
|
+
return "right";
|
|
421
|
+
}
|
|
422
|
+
return "tie";
|
|
423
|
+
}
|
|
424
|
+
if (leftMetric.value < rightMetric.value) {
|
|
425
|
+
return "left";
|
|
426
|
+
}
|
|
427
|
+
if (leftMetric.value > rightMetric.value) {
|
|
428
|
+
return "right";
|
|
429
|
+
}
|
|
430
|
+
return "tie";
|
|
431
|
+
}
|
|
432
|
+
async function buildProposerHistoryContext(input) {
|
|
433
|
+
if (!input.manifest.proposer.history.enabled) {
|
|
434
|
+
return undefined;
|
|
435
|
+
}
|
|
436
|
+
const snapshot = compactRecentHistory({
|
|
437
|
+
runs: input.runs.filter((run) => run.phase === "completed"),
|
|
438
|
+
decisions: input.decisions,
|
|
439
|
+
maxRuns: input.manifest.proposer.history.maxRuns,
|
|
440
|
+
primaryMetric: input.primaryMetric,
|
|
441
|
+
});
|
|
442
|
+
const path = join(input.runDir, "history", "proposer-history.md");
|
|
443
|
+
await persistText(path, snapshot.summary);
|
|
444
|
+
return {
|
|
445
|
+
summary: snapshot.summary,
|
|
446
|
+
path,
|
|
447
|
+
};
|
|
448
|
+
}
|
|
449
|
+
async function evaluateMetrics(input) {
|
|
450
|
+
const metrics = {};
|
|
451
|
+
const anchorChecks = new Map();
|
|
452
|
+
const anchorCheckCache = new Map();
|
|
453
|
+
const packByMetricId = new Map();
|
|
454
|
+
for (const metricDefinition of input.manifest.metrics.catalog) {
|
|
455
|
+
if (metricDefinition.extractor.type === "command") {
|
|
456
|
+
metrics[metricDefinition.id] = await extractCommandMetric(metricDefinition.extractor, {
|
|
457
|
+
metricId: metricDefinition.id,
|
|
458
|
+
direction: metricDefinition.direction,
|
|
459
|
+
workspacePath: input.workspacePath,
|
|
460
|
+
});
|
|
461
|
+
continue;
|
|
462
|
+
}
|
|
463
|
+
if (!input.judgeProvider) {
|
|
464
|
+
throw new Error(`metric ${metricDefinition.id} requires a judge provider`);
|
|
465
|
+
}
|
|
466
|
+
const extractor = metricDefinition.extractor;
|
|
467
|
+
const pack = getJudgePack(input.manifest, extractor.judgePack);
|
|
468
|
+
packByMetricId.set(metricDefinition.id, pack);
|
|
469
|
+
const prompt = await buildJudgePrompt({
|
|
470
|
+
repoRoot: input.repoRoot,
|
|
471
|
+
manifestDir: input.manifestDir,
|
|
472
|
+
workspacePath: input.workspacePath,
|
|
473
|
+
extractor,
|
|
474
|
+
frontier: input.currentFrontier,
|
|
475
|
+
});
|
|
476
|
+
const metric = await runLlmJudgeMetric({
|
|
477
|
+
metricId: metricDefinition.id,
|
|
478
|
+
direction: metricDefinition.direction,
|
|
479
|
+
extractor,
|
|
480
|
+
pack,
|
|
481
|
+
prompt,
|
|
482
|
+
provider: input.judgeProvider,
|
|
483
|
+
});
|
|
484
|
+
const judgeTracePath = join(input.runDir, "judge", `${metricDefinition.id}.json`);
|
|
485
|
+
await persistJson(judgeTracePath, metric.details);
|
|
486
|
+
metrics[metricDefinition.id] = {
|
|
487
|
+
...metric,
|
|
488
|
+
judgeTracePath,
|
|
489
|
+
details: {
|
|
490
|
+
...metric.details,
|
|
491
|
+
judgeTracePath,
|
|
492
|
+
},
|
|
493
|
+
};
|
|
494
|
+
if (!anchorCheckCache.has(pack.id)) {
|
|
495
|
+
const anchors = pack.anchors ? await loadAnchorRecords(resolve(input.manifestDir, pack.anchors.path)) : [];
|
|
496
|
+
anchorCheckCache.set(pack.id, await evaluateAnchorAgreement({
|
|
497
|
+
pack,
|
|
498
|
+
extractor,
|
|
499
|
+
provider: input.judgeProvider,
|
|
500
|
+
anchors,
|
|
501
|
+
}));
|
|
502
|
+
}
|
|
503
|
+
anchorChecks.set(metricDefinition.id, anchorCheckCache.get(pack.id));
|
|
504
|
+
}
|
|
505
|
+
return {
|
|
506
|
+
metrics,
|
|
507
|
+
anchorChecks,
|
|
508
|
+
packByMetricId,
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
function resolveDecision(input) {
|
|
512
|
+
const referenceMetric = getReferenceMetric(input.manifest);
|
|
513
|
+
if (!input.changeBudget.withinBudget) {
|
|
514
|
+
return {
|
|
515
|
+
outcome: input.changeBudget.outcome === "needs_human" ? "needs_human" : "rejected",
|
|
516
|
+
frontierChanged: false,
|
|
517
|
+
metricId: referenceMetric,
|
|
518
|
+
policyType: input.manifest.ratchet.type,
|
|
519
|
+
reason: input.changeBudget.reason,
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
return evaluateRatchet({
|
|
523
|
+
ratchet: input.manifest.ratchet,
|
|
524
|
+
primaryMetric: referenceMetric,
|
|
525
|
+
candidateMetrics: input.metrics,
|
|
526
|
+
currentFrontier: input.currentFrontier,
|
|
527
|
+
priorConsecutiveAccepts: input.priorConsecutiveAccepts,
|
|
528
|
+
...(input.manifest.frontier.strategy === "pareto"
|
|
529
|
+
? {
|
|
530
|
+
paretoObjectives: input.manifest.frontier.objectives,
|
|
531
|
+
}
|
|
532
|
+
: {}),
|
|
533
|
+
...(input.constraints.passed ? {} : { constraintFailureReason: input.constraints.reason }),
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
function buildFrontierEntry(runId, candidateId, now, metrics, artifacts) {
|
|
537
|
+
return {
|
|
538
|
+
frontierId: `frontier-${runId}`,
|
|
539
|
+
runId,
|
|
540
|
+
candidateId,
|
|
541
|
+
acceptedAt: now().toISOString(),
|
|
542
|
+
metrics,
|
|
543
|
+
artifacts,
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
function buildAuditQueue(metricId, decision, manifest, packByMetricId) {
|
|
547
|
+
const pack = packByMetricId.get(metricId);
|
|
548
|
+
if (!pack) {
|
|
549
|
+
return [];
|
|
550
|
+
}
|
|
551
|
+
return sampleAuditQueue([
|
|
552
|
+
{
|
|
553
|
+
runId: decision.runId,
|
|
554
|
+
decisionId: decision.decisionId,
|
|
555
|
+
outcome: decision.outcome,
|
|
556
|
+
metricId,
|
|
557
|
+
reason: decision.reason,
|
|
558
|
+
},
|
|
559
|
+
], pack, decision.createdAt);
|
|
560
|
+
}
|
|
561
|
+
async function buildJudgePrompt(input) {
|
|
562
|
+
const template = await readTemplateOrInline(input.manifestDir, input.extractor.prompt);
|
|
563
|
+
const sections = [template];
|
|
564
|
+
for (const [key, source] of Object.entries(input.extractor.inputs)) {
|
|
565
|
+
const value = await resolvePromptInput(source, input.workspacePath, input.frontier);
|
|
566
|
+
sections.push(`\n[${key}]\n${value}`);
|
|
567
|
+
}
|
|
568
|
+
return sections.join("\n");
|
|
569
|
+
}
|
|
570
|
+
async function resolvePromptInput(source, workspacePath, frontier) {
|
|
571
|
+
if (source.startsWith("frontier.best:")) {
|
|
572
|
+
const reference = source.slice("frontier.best:".length);
|
|
573
|
+
const incumbent = frontier[0];
|
|
574
|
+
if (!incumbent) {
|
|
575
|
+
return "";
|
|
576
|
+
}
|
|
577
|
+
const artifact = incumbent.artifacts.find((entry) => entry.id === reference || entry.path.endsWith(reference));
|
|
578
|
+
if (!artifact) {
|
|
579
|
+
return "";
|
|
580
|
+
}
|
|
581
|
+
return readFile(resolve(artifact.path), "utf8");
|
|
582
|
+
}
|
|
583
|
+
const candidatePath = resolve(workspacePath, source);
|
|
584
|
+
try {
|
|
585
|
+
return await readFile(candidatePath, "utf8");
|
|
586
|
+
}
|
|
587
|
+
catch {
|
|
588
|
+
return source;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
async function readTemplateOrInline(baseDir, value) {
|
|
592
|
+
const candidatePath = resolve(baseDir, value);
|
|
593
|
+
try {
|
|
594
|
+
return await readFile(candidatePath, "utf8");
|
|
595
|
+
}
|
|
596
|
+
catch {
|
|
597
|
+
return value;
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
async function snapshotArtifacts(outputs, workspacePath, artifactRoot) {
|
|
601
|
+
const artifacts = [];
|
|
602
|
+
for (const output of outputs) {
|
|
603
|
+
const sourcePath = resolve(workspacePath, output.path);
|
|
604
|
+
const destinationPath = join(artifactRoot, `${output.id}${extname(output.path) || ".txt"}`);
|
|
605
|
+
await mkdir(dirname(destinationPath), { recursive: true });
|
|
606
|
+
await copyFile(sourcePath, destinationPath);
|
|
607
|
+
artifacts.push({
|
|
608
|
+
id: output.id,
|
|
609
|
+
path: destinationPath,
|
|
610
|
+
});
|
|
611
|
+
}
|
|
612
|
+
return artifacts;
|
|
613
|
+
}
|
|
614
|
+
function stripConstraintReason(constraint) {
|
|
615
|
+
return {
|
|
616
|
+
metric: constraint.metric,
|
|
617
|
+
passed: constraint.passed,
|
|
618
|
+
actual: constraint.actual,
|
|
619
|
+
expected: constraint.expected,
|
|
620
|
+
op: constraint.op,
|
|
621
|
+
};
|
|
622
|
+
}
|
|
623
|
+
async function persistText(path, value) {
|
|
624
|
+
await mkdir(dirname(path), { recursive: true });
|
|
625
|
+
await writeFile(path, value, "utf8");
|
|
626
|
+
return path;
|
|
627
|
+
}
|
|
628
|
+
async function persistJson(path, value) {
|
|
629
|
+
await mkdir(dirname(path), { recursive: true });
|
|
630
|
+
await writeFile(path, `${JSON.stringify(value, null, 2)}\n`, "utf8");
|
|
631
|
+
return path;
|
|
632
|
+
}
|
|
633
|
+
function getJudgePack(manifest, judgePackId) {
|
|
634
|
+
const pack = manifest.judgePacks.find((entry) => entry.id === judgePackId);
|
|
635
|
+
if (!pack) {
|
|
636
|
+
throw new Error(`unknown judge pack ${judgePackId}`);
|
|
637
|
+
}
|
|
638
|
+
return pack;
|
|
639
|
+
}
|
|
640
|
+
function getReferenceMetric(manifest) {
|
|
641
|
+
if (manifest.frontier.strategy === "single_best") {
|
|
642
|
+
return manifest.frontier.primaryMetric;
|
|
643
|
+
}
|
|
644
|
+
return manifest.frontier.objectives[0].metric;
|
|
645
|
+
}
|
|
646
|
+
function updateFrontier(manifest, currentFrontier, candidateEntry) {
|
|
647
|
+
if (manifest.frontier.strategy === "single_best") {
|
|
648
|
+
return updateSingleBestFrontier(currentFrontier, candidateEntry, manifest.frontier.primaryMetric);
|
|
649
|
+
}
|
|
650
|
+
return updateParetoFrontier(currentFrontier, candidateEntry, manifest.frontier.objectives, manifest.frontier.tieBreaker, manifest.frontier.referencePoint);
|
|
651
|
+
}
|
|
652
|
+
//# sourceMappingURL=cycle-runner.js.map
|