@lnilluv/pi-ralph-loop 0.3.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/release.yml +8 -39
- package/README.md +53 -160
- package/package.json +2 -2
- package/scripts/version-helper.ts +210 -0
- package/src/index.ts +1388 -187
- package/src/ralph-draft-context.ts +618 -0
- package/src/ralph-draft-llm.ts +297 -0
- package/src/ralph-draft.ts +33 -0
- package/src/ralph.ts +924 -102
- package/src/runner-rpc.ts +466 -0
- package/src/runner-state.ts +839 -0
- package/src/runner.ts +1042 -0
- package/src/secret-paths.ts +66 -0
- package/src/shims.d.ts +0 -3
- package/tests/fixtures/parity/migrate/OPEN_QUESTIONS.md +3 -0
- package/tests/fixtures/parity/migrate/RALPH.md +27 -0
- package/tests/fixtures/parity/migrate/golden/MIGRATED.md +15 -0
- package/tests/fixtures/parity/migrate/legacy/source.md +6 -0
- package/tests/fixtures/parity/migrate/legacy/source.yaml +3 -0
- package/tests/fixtures/parity/migrate/scripts/show-legacy.sh +10 -0
- package/tests/fixtures/parity/migrate/scripts/verify.sh +15 -0
- package/tests/fixtures/parity/research/OPEN_QUESTIONS.md +3 -0
- package/tests/fixtures/parity/research/RALPH.md +45 -0
- package/tests/fixtures/parity/research/claim-evidence-checklist.md +15 -0
- package/tests/fixtures/parity/research/expected-outputs.md +22 -0
- package/tests/fixtures/parity/research/scripts/show-snapshots.sh +13 -0
- package/tests/fixtures/parity/research/scripts/verify.sh +55 -0
- package/tests/fixtures/parity/research/snapshots/app-factory-ai-cli.md +11 -0
- package/tests/fixtures/parity/research/snapshots/docs-factory-ai-cli-features-missions.md +11 -0
- package/tests/fixtures/parity/research/snapshots/factory-ai-news-missions.md +11 -0
- package/tests/fixtures/parity/research/source-manifest.md +20 -0
- package/tests/index.test.ts +3801 -0
- package/tests/parity/README.md +9 -0
- package/tests/parity/harness.py +526 -0
- package/tests/parity-harness.test.ts +42 -0
- package/tests/parity-research-fixture.test.ts +34 -0
- package/tests/ralph-draft-context.test.ts +672 -0
- package/tests/ralph-draft-llm.test.ts +434 -0
- package/tests/ralph-draft.test.ts +168 -0
- package/tests/ralph.test.ts +1413 -19
- package/tests/runner-event-contract.test.ts +235 -0
- package/tests/runner-rpc.test.ts +446 -0
- package/tests/runner-state.test.ts +581 -0
- package/tests/runner.test.ts +1552 -0
- package/tests/secret-paths.test.ts +55 -0
- package/tests/version-helper.test.ts +75 -0
|
@@ -0,0 +1,1552 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { chmodSync, existsSync, mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import test from "node:test";
|
|
6
|
+
|
|
7
|
+
import { assessTaskDirectoryProgress, captureTaskDirectorySnapshot, runRalphLoop, validateCompletionReadiness } from "../src/runner.ts";
|
|
8
|
+
import { readStatusFile, readIterationRecords, readRunnerEvents, checkStopSignal, createCancelSignal, createStopSignal as createStopSignalFn, type RunnerEvent } from "../src/runner-state.ts";
|
|
9
|
+
import { generateDraft } from "../src/ralph.ts";
|
|
10
|
+
import type { DraftTarget, CommandOutput, CommandDef } from "../src/ralph.ts";
|
|
11
|
+
|
|
12
|
+
function createTempDir(): string {
|
|
13
|
+
return mkdtempSync(join(tmpdir(), "pi-ralph-runner-"));
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function writeRalphMd(taskDir: string, content: string): string {
|
|
17
|
+
const ralphPath = join(taskDir, "RALPH.md");
|
|
18
|
+
writeFileSync(ralphPath, content, "utf8");
|
|
19
|
+
return ralphPath;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function minimalRalphMd(overrides: Record<string, unknown> = {}): string {
|
|
23
|
+
const fm = {
|
|
24
|
+
commands: [],
|
|
25
|
+
max_iterations: 2,
|
|
26
|
+
timeout: 5,
|
|
27
|
+
guardrails: { block_commands: [], protected_files: [] },
|
|
28
|
+
...overrides,
|
|
29
|
+
};
|
|
30
|
+
return `---\n${Object.entries(fm)
|
|
31
|
+
.map(([k, v]) => `${k}: ${JSON.stringify(v)}`)
|
|
32
|
+
.join("\n")}\n---\nTask: Do something\n`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function makeMockPi() {
|
|
36
|
+
return {
|
|
37
|
+
on: () => undefined,
|
|
38
|
+
registerCommand: () => undefined,
|
|
39
|
+
appendEntry: () => undefined,
|
|
40
|
+
sendUserMessage: () => undefined,
|
|
41
|
+
exec: async () => ({ killed: false, stdout: "", stderr: "" }),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function isRunnerEventType<T extends RunnerEvent["type"]>(type: T) {
|
|
46
|
+
return (event: RunnerEvent): event is Extract<RunnerEvent, { type: T }> => event.type === type;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function hasIteration(event: RunnerEvent): event is Extract<RunnerEvent, { iteration: number }> {
|
|
50
|
+
return "iteration" in event;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function makeMockSpawnScript(cwd: string, outputs: Array<{ text: string; promise?: string }>): string {
|
|
54
|
+
const lines = [
|
|
55
|
+
"#!/bin/bash",
|
|
56
|
+
"read line",
|
|
57
|
+
`echo '{"type":"response","command":"prompt","success":true}'`,
|
|
58
|
+
];
|
|
59
|
+
for (const output of outputs) {
|
|
60
|
+
const text = output.text.replace(/"/g, '\\"');
|
|
61
|
+
lines.push(`echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"' + text + '"}]}]}'`);
|
|
62
|
+
}
|
|
63
|
+
return lines.join("\n");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
test("runRalphLoop completes a single iteration with mock subprocess", async () => {
|
|
67
|
+
const taskDir = createTempDir();
|
|
68
|
+
try {
|
|
69
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }));
|
|
70
|
+
const notifications: Array<{ message: string; level: string }> = [];
|
|
71
|
+
const statuses: string[] = [];
|
|
72
|
+
|
|
73
|
+
const result = await runRalphLoop({
|
|
74
|
+
ralphPath,
|
|
75
|
+
cwd: taskDir,
|
|
76
|
+
timeout: 5,
|
|
77
|
+
maxIterations: 1,
|
|
78
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
79
|
+
spawnCommand: "echo",
|
|
80
|
+
spawnArgs: ["mock"],
|
|
81
|
+
onNotify(message, level) {
|
|
82
|
+
notifications.push({ message, level });
|
|
83
|
+
},
|
|
84
|
+
onStatusChange(status) {
|
|
85
|
+
statuses.push(status);
|
|
86
|
+
},
|
|
87
|
+
runCommandsFn: async () => [],
|
|
88
|
+
pi: makeMockPi(),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// The "echo mock" command won't produce valid RPC JSONL output,
|
|
92
|
+
// so the subprocess will exit without agent_end
|
|
93
|
+
// This is expected to result in an error or no-progress outcome
|
|
94
|
+
assert.ok(result.status === "error" || result.status === "no-progress-exhaustion" || result.status === "max-iterations");
|
|
95
|
+
assert.ok(result.iterations.length >= 1);
|
|
96
|
+
assert.ok(statuses.length > 0);
|
|
97
|
+
} finally {
|
|
98
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test("runRalphLoop persists RPC telemetry in iteration records", async () => {
|
|
103
|
+
const taskDir = createTempDir();
|
|
104
|
+
try {
|
|
105
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }));
|
|
106
|
+
|
|
107
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
108
|
+
writeFileSync(
|
|
109
|
+
scriptPath,
|
|
110
|
+
`#!/bin/bash
|
|
111
|
+
read line
|
|
112
|
+
printf 'research stderr\n' >&2
|
|
113
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
114
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
115
|
+
`,
|
|
116
|
+
{ mode: 0o755 },
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
await runRalphLoop({
|
|
120
|
+
ralphPath,
|
|
121
|
+
cwd: taskDir,
|
|
122
|
+
timeout: 5,
|
|
123
|
+
maxIterations: 1,
|
|
124
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
125
|
+
spawnCommand: "bash",
|
|
126
|
+
spawnArgs: [scriptPath],
|
|
127
|
+
runCommandsFn: async () => [],
|
|
128
|
+
pi: makeMockPi(),
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
const [record] = readIterationRecords(taskDir);
|
|
132
|
+
assert.ok(record.rpcTelemetry);
|
|
133
|
+
assert.ok(record.rpcTelemetry?.spawnedAt.length > 0);
|
|
134
|
+
assert.ok(record.rpcTelemetry?.promptSentAt);
|
|
135
|
+
assert.ok(record.rpcTelemetry?.firstStdoutEventAt);
|
|
136
|
+
assert.ok(record.rpcTelemetry?.lastEventAt);
|
|
137
|
+
assert.equal(record.rpcTelemetry?.lastEventType, "agent_end");
|
|
138
|
+
assert.ok(record.rpcTelemetry?.exitedAt);
|
|
139
|
+
assert.match(record.rpcTelemetry?.stderrText ?? "", /research stderr/);
|
|
140
|
+
} finally {
|
|
141
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
test("runRalphLoop writes durable status files", async () => {
|
|
146
|
+
const taskDir = createTempDir();
|
|
147
|
+
try {
|
|
148
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }));
|
|
149
|
+
|
|
150
|
+
await runRalphLoop({
|
|
151
|
+
ralphPath,
|
|
152
|
+
cwd: taskDir,
|
|
153
|
+
timeout: 5,
|
|
154
|
+
maxIterations: 1,
|
|
155
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
156
|
+
spawnCommand: "echo",
|
|
157
|
+
spawnArgs: ["mock"],
|
|
158
|
+
runCommandsFn: async () => [],
|
|
159
|
+
pi: makeMockPi(),
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
// Verify status file was written
|
|
163
|
+
const status = readStatusFile(taskDir);
|
|
164
|
+
assert.ok(status !== undefined);
|
|
165
|
+
assert.ok(status.loopToken.length > 0);
|
|
166
|
+
assert.ok(status.taskDir === taskDir || status.taskDir.endsWith(taskDir.split("/").pop()!));
|
|
167
|
+
} finally {
|
|
168
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("runRalphLoop detects task-dir file progress from subprocess writes", async () => {
|
|
173
|
+
const taskDir = createTempDir();
|
|
174
|
+
try {
|
|
175
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }));
|
|
176
|
+
|
|
177
|
+
// Script that writes a file then sends agent_end
|
|
178
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
179
|
+
writeFileSync(
|
|
180
|
+
scriptPath,
|
|
181
|
+
`#!/bin/bash
|
|
182
|
+
read line
|
|
183
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
184
|
+
mkdir -p "${taskDir}/notes"
|
|
185
|
+
echo "findings" > "${taskDir}/notes/findings.md"
|
|
186
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
187
|
+
`,
|
|
188
|
+
{ mode: 0o755 },
|
|
189
|
+
);
|
|
190
|
+
|
|
191
|
+
const notifications: Array<{ message: string; level: string }> = [];
|
|
192
|
+
const result = await runRalphLoop({
|
|
193
|
+
ralphPath,
|
|
194
|
+
cwd: taskDir,
|
|
195
|
+
timeout: 5,
|
|
196
|
+
maxIterations: 1,
|
|
197
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
198
|
+
spawnCommand: "bash",
|
|
199
|
+
spawnArgs: [scriptPath],
|
|
200
|
+
onNotify(message, level) {
|
|
201
|
+
notifications.push({ message, level });
|
|
202
|
+
},
|
|
203
|
+
runCommandsFn: async () => [],
|
|
204
|
+
pi: makeMockPi(),
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
// Should detect progress from file changes
|
|
208
|
+
assert.equal(result.iterations.length, 1);
|
|
209
|
+
assert.ok(result.iterations[0].progress === true || result.iterations[0].progress === "unknown", `unexpected progress: ${result.iterations[0].progress}`);
|
|
210
|
+
if (result.iterations[0].changedFiles.length > 0) {
|
|
211
|
+
assert.ok(result.iterations[0].changedFiles.includes("notes/findings.md"));
|
|
212
|
+
}
|
|
213
|
+
assert.ok(notifications.some((n) => n.message.includes("Iteration 1")));
|
|
214
|
+
} finally {
|
|
215
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
216
|
+
}
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
test("runRalphLoop respects stop signal from durable state", async () => {
|
|
220
|
+
const taskDir = createTempDir();
|
|
221
|
+
try {
|
|
222
|
+
// Use max_iterations: 2 but stop after first iteration
|
|
223
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 2 }));
|
|
224
|
+
|
|
225
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
226
|
+
writeFileSync(
|
|
227
|
+
scriptPath,
|
|
228
|
+
`#!/bin/bash
|
|
229
|
+
read line
|
|
230
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
231
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
232
|
+
`,
|
|
233
|
+
{ mode: 0o755 },
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
// Create stop signal before second iteration
|
|
237
|
+
let iterationCount = 0;
|
|
238
|
+
const result = await runRalphLoop({
|
|
239
|
+
ralphPath,
|
|
240
|
+
cwd: taskDir,
|
|
241
|
+
timeout: 5,
|
|
242
|
+
maxIterations: 2,
|
|
243
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
244
|
+
spawnCommand: "bash",
|
|
245
|
+
spawnArgs: [scriptPath],
|
|
246
|
+
onIterationComplete() {
|
|
247
|
+
iterationCount++;
|
|
248
|
+
if (iterationCount >= 1) {
|
|
249
|
+
createStopSignalFn(taskDir);
|
|
250
|
+
}
|
|
251
|
+
},
|
|
252
|
+
runCommandsFn: async () => [],
|
|
253
|
+
pi: makeMockPi(),
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
assert.equal(result.status, "stopped");
|
|
257
|
+
assert.ok(result.iterations.length <= 2);
|
|
258
|
+
} finally {
|
|
259
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
test("runRalphLoop injects RALPH_PROGRESS.md into every iteration prompt", async () => {
|
|
264
|
+
const taskDir = createTempDir();
|
|
265
|
+
try {
|
|
266
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }));
|
|
267
|
+
writeFileSync(join(taskDir, "RALPH_PROGRESS.md"), "Keep this short.\nOverwrite in place.\n", "utf8");
|
|
268
|
+
|
|
269
|
+
const promptPath = join(taskDir, "prompt.json");
|
|
270
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
271
|
+
writeFileSync(
|
|
272
|
+
scriptPath,
|
|
273
|
+
`#!/bin/bash
|
|
274
|
+
read line
|
|
275
|
+
printf '%s' "$line" > "${promptPath}"
|
|
276
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
277
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
278
|
+
`,
|
|
279
|
+
{ mode: 0o755 },
|
|
280
|
+
);
|
|
281
|
+
|
|
282
|
+
await runRalphLoop({
|
|
283
|
+
ralphPath,
|
|
284
|
+
cwd: taskDir,
|
|
285
|
+
timeout: 5,
|
|
286
|
+
maxIterations: 1,
|
|
287
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
288
|
+
spawnCommand: "bash",
|
|
289
|
+
spawnArgs: [scriptPath],
|
|
290
|
+
runCommandsFn: async () => [],
|
|
291
|
+
pi: makeMockPi(),
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
const prompt = JSON.parse(readFileSync(promptPath, "utf8")) as { message: string };
|
|
295
|
+
assert.match(prompt.message, /RALPH_PROGRESS\.md/);
|
|
296
|
+
assert.match(prompt.message, /Keep this short\./);
|
|
297
|
+
assert.match(prompt.message, /Keep it short/i);
|
|
298
|
+
assert.match(prompt.message, /overwrite in place/i);
|
|
299
|
+
} finally {
|
|
300
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
301
|
+
}
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
test("runRalphLoop ignores missing RALPH_PROGRESS.md", async () => {
|
|
305
|
+
const taskDir = createTempDir();
|
|
306
|
+
try {
|
|
307
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }));
|
|
308
|
+
|
|
309
|
+
const promptPath = join(taskDir, "prompt.json");
|
|
310
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
311
|
+
writeFileSync(
|
|
312
|
+
scriptPath,
|
|
313
|
+
`#!/bin/bash
|
|
314
|
+
read line
|
|
315
|
+
printf '%s' "$line" > "${promptPath}"
|
|
316
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
317
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
318
|
+
`,
|
|
319
|
+
{ mode: 0o755 },
|
|
320
|
+
);
|
|
321
|
+
|
|
322
|
+
await runRalphLoop({
|
|
323
|
+
ralphPath,
|
|
324
|
+
cwd: taskDir,
|
|
325
|
+
timeout: 5,
|
|
326
|
+
maxIterations: 1,
|
|
327
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
328
|
+
spawnCommand: "bash",
|
|
329
|
+
spawnArgs: [scriptPath],
|
|
330
|
+
runCommandsFn: async () => [],
|
|
331
|
+
pi: makeMockPi(),
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
const prompt = JSON.parse(readFileSync(promptPath, "utf8")) as { message: string };
|
|
335
|
+
assert.equal(prompt.message.includes("RALPH_PROGRESS.md"), false);
|
|
336
|
+
assert.equal(prompt.message.toLowerCase().includes("keep it short and overwrite in place"), false);
|
|
337
|
+
} finally {
|
|
338
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
test("runRalphLoop cancels mid-iteration when cancel flag is written", async () => {
|
|
343
|
+
const taskDir = createTempDir();
|
|
344
|
+
try {
|
|
345
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3 }));
|
|
346
|
+
|
|
347
|
+
const scriptPath = join(taskDir, "slow-pi.sh");
|
|
348
|
+
writeFileSync(
|
|
349
|
+
scriptPath,
|
|
350
|
+
`#!/bin/bash
|
|
351
|
+
read line
|
|
352
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
353
|
+
sleep 10
|
|
354
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
355
|
+
`,
|
|
356
|
+
{ mode: 0o755 },
|
|
357
|
+
);
|
|
358
|
+
|
|
359
|
+
setTimeout(() => createCancelSignal(taskDir), 1000);
|
|
360
|
+
|
|
361
|
+
const result = await runRalphLoop({
|
|
362
|
+
ralphPath,
|
|
363
|
+
cwd: taskDir,
|
|
364
|
+
timeout: 30,
|
|
365
|
+
maxIterations: 3,
|
|
366
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
367
|
+
spawnCommand: "bash",
|
|
368
|
+
spawnArgs: [scriptPath],
|
|
369
|
+
runCommandsFn: async () => [],
|
|
370
|
+
pi: makeMockPi(),
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
assert.equal(result.status, "cancelled");
|
|
374
|
+
assert.ok(result.iterations.length >= 1);
|
|
375
|
+
assert.equal(result.iterations[result.iterations.length - 1].status, "cancelled");
|
|
376
|
+
} finally {
|
|
377
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
378
|
+
}
|
|
379
|
+
});
|
|
380
|
+
|
|
381
|
+
test("runRalphLoop checks cancel flag at iteration boundary", async () => {
|
|
382
|
+
const taskDir = createTempDir();
|
|
383
|
+
try {
|
|
384
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3 }));
|
|
385
|
+
|
|
386
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
387
|
+
writeFileSync(
|
|
388
|
+
scriptPath,
|
|
389
|
+
`#!/bin/bash
|
|
390
|
+
read line
|
|
391
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
392
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
393
|
+
`,
|
|
394
|
+
{ mode: 0o755 },
|
|
395
|
+
);
|
|
396
|
+
|
|
397
|
+
let iterationCount = 0;
|
|
398
|
+
const result = await runRalphLoop({
|
|
399
|
+
ralphPath,
|
|
400
|
+
cwd: taskDir,
|
|
401
|
+
timeout: 5,
|
|
402
|
+
maxIterations: 3,
|
|
403
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
404
|
+
spawnCommand: "bash",
|
|
405
|
+
spawnArgs: [scriptPath],
|
|
406
|
+
onIterationComplete() {
|
|
407
|
+
iterationCount++;
|
|
408
|
+
if (iterationCount >= 1) {
|
|
409
|
+
createCancelSignal(taskDir);
|
|
410
|
+
}
|
|
411
|
+
},
|
|
412
|
+
runCommandsFn: async () => [],
|
|
413
|
+
pi: makeMockPi(),
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
assert.equal(result.status, "cancelled");
|
|
417
|
+
} finally {
|
|
418
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
419
|
+
}
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
test("runRalphLoop waits between iterations when inter_iteration_delay is set", async () => {
|
|
423
|
+
const taskDir = createTempDir();
|
|
424
|
+
try {
|
|
425
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 2, inter_iteration_delay: 1 }));
|
|
426
|
+
|
|
427
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
428
|
+
writeFileSync(
|
|
429
|
+
scriptPath,
|
|
430
|
+
`#!/bin/bash
|
|
431
|
+
read line
|
|
432
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
433
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
434
|
+
`,
|
|
435
|
+
{ mode: 0o755 },
|
|
436
|
+
);
|
|
437
|
+
|
|
438
|
+
const iterationStarts: number[] = [];
|
|
439
|
+
await runRalphLoop({
|
|
440
|
+
ralphPath,
|
|
441
|
+
cwd: taskDir,
|
|
442
|
+
timeout: 5,
|
|
443
|
+
maxIterations: 2,
|
|
444
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
445
|
+
spawnCommand: "bash",
|
|
446
|
+
spawnArgs: [scriptPath],
|
|
447
|
+
onIterationStart() {
|
|
448
|
+
iterationStarts.push(Date.now());
|
|
449
|
+
},
|
|
450
|
+
runCommandsFn: async () => [],
|
|
451
|
+
pi: makeMockPi(),
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
assert.equal(iterationStarts.length, 2);
|
|
455
|
+
assert.ok(iterationStarts[1] - iterationStarts[0] >= 900, `expected delay between iterations, got ${iterationStarts[1] - iterationStarts[0]}ms`);
|
|
456
|
+
} finally {
|
|
457
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
458
|
+
}
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
test("runRalphLoop does not delay after the final allowed iteration", async () => {
|
|
462
|
+
const taskDir = createTempDir();
|
|
463
|
+
try {
|
|
464
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1, inter_iteration_delay: 5 }));
|
|
465
|
+
|
|
466
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
467
|
+
writeFileSync(
|
|
468
|
+
scriptPath,
|
|
469
|
+
`#!/bin/bash
|
|
470
|
+
read line
|
|
471
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
472
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
473
|
+
`,
|
|
474
|
+
{ mode: 0o755 },
|
|
475
|
+
);
|
|
476
|
+
|
|
477
|
+
const startedAt = Date.now();
|
|
478
|
+
await runRalphLoop({
|
|
479
|
+
ralphPath,
|
|
480
|
+
cwd: taskDir,
|
|
481
|
+
timeout: 5,
|
|
482
|
+
maxIterations: 1,
|
|
483
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
484
|
+
spawnCommand: "bash",
|
|
485
|
+
spawnArgs: [scriptPath],
|
|
486
|
+
runCommandsFn: async () => [],
|
|
487
|
+
pi: makeMockPi(),
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
assert.ok(Date.now() - startedAt < 2000, "unexpected inter-iteration delay after the final iteration");
|
|
491
|
+
} finally {
|
|
492
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
493
|
+
}
|
|
494
|
+
});
|
|
495
|
+
|
|
496
|
+
test("runRalphLoop stops promptly during inter-iteration delay when /ralph-stop is requested", async () => {
|
|
497
|
+
const taskDir = createTempDir();
|
|
498
|
+
try {
|
|
499
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 2, inter_iteration_delay: 5 }));
|
|
500
|
+
|
|
501
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
502
|
+
writeFileSync(
|
|
503
|
+
scriptPath,
|
|
504
|
+
`#!/bin/bash
|
|
505
|
+
read line
|
|
506
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
507
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
508
|
+
`,
|
|
509
|
+
{ mode: 0o755 },
|
|
510
|
+
);
|
|
511
|
+
|
|
512
|
+
const startedAt = Date.now();
|
|
513
|
+
let completedIterations = 0;
|
|
514
|
+
const result = await runRalphLoop({
|
|
515
|
+
ralphPath,
|
|
516
|
+
cwd: taskDir,
|
|
517
|
+
timeout: 5,
|
|
518
|
+
maxIterations: 2,
|
|
519
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
520
|
+
spawnCommand: "bash",
|
|
521
|
+
spawnArgs: [scriptPath],
|
|
522
|
+
onIterationComplete() {
|
|
523
|
+
completedIterations += 1;
|
|
524
|
+
if (completedIterations === 1) {
|
|
525
|
+
createStopSignalFn(taskDir);
|
|
526
|
+
}
|
|
527
|
+
},
|
|
528
|
+
runCommandsFn: async () => [],
|
|
529
|
+
pi: makeMockPi(),
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
assert.equal(result.status, "stopped");
|
|
533
|
+
assert.equal(result.iterations.length, 1);
|
|
534
|
+
assert.ok(Date.now() - startedAt < 2500, "expected stop during the inter-iteration delay");
|
|
535
|
+
} finally {
|
|
536
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
537
|
+
}
|
|
538
|
+
});
|
|
539
|
+
|
|
540
|
+
test("runRalphLoop detects completion promise in subprocess output", async () => {
|
|
541
|
+
const taskDir = createTempDir();
|
|
542
|
+
try {
|
|
543
|
+
// Write a file so progress is detected
|
|
544
|
+
mkdirSync(join(taskDir, "notes"), { recursive: true });
|
|
545
|
+
writeFileSync(join(taskDir, "notes", "findings.md"), "initial\n");
|
|
546
|
+
|
|
547
|
+
const ralphPath = writeRalphMd(
|
|
548
|
+
taskDir,
|
|
549
|
+
minimalRalphMd({ max_iterations: 3, completion_promise: "DONE" }),
|
|
550
|
+
);
|
|
551
|
+
|
|
552
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
553
|
+
writeFileSync(
|
|
554
|
+
scriptPath,
|
|
555
|
+
`#!/bin/bash
|
|
556
|
+
read line
|
|
557
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
558
|
+
echo "updated findings" > "${taskDir}/notes/findings.md"
|
|
559
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"<promise>DONE</promise> All done!"}]}]}'
|
|
560
|
+
`,
|
|
561
|
+
{ mode: 0o755 },
|
|
562
|
+
);
|
|
563
|
+
|
|
564
|
+
const notifications: Array<{ message: string; level: string }> = [];
|
|
565
|
+
const result = await runRalphLoop({
|
|
566
|
+
ralphPath,
|
|
567
|
+
cwd: taskDir,
|
|
568
|
+
timeout: 10,
|
|
569
|
+
maxIterations: 3,
|
|
570
|
+
completionPromise: "DONE",
|
|
571
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
572
|
+
spawnCommand: "bash",
|
|
573
|
+
spawnArgs: [scriptPath],
|
|
574
|
+
onNotify(message, level) {
|
|
575
|
+
notifications.push({ message, level });
|
|
576
|
+
},
|
|
577
|
+
runCommandsFn: async () => [],
|
|
578
|
+
pi: makeMockPi(),
|
|
579
|
+
});
|
|
580
|
+
|
|
581
|
+
assert.ok(result.iterations.length >= 1);
|
|
582
|
+
// Should have matched the completion promise
|
|
583
|
+
const firstIter = result.iterations[0];
|
|
584
|
+
assert.equal(firstIter.completionPromiseMatched, true);
|
|
585
|
+
assert.ok(notifications.some((n) => n.message.includes("completion promise")));
|
|
586
|
+
} finally {
|
|
587
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
588
|
+
}
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
test("runRalphLoop keeps prompting after a premature completion promise until durable progress exists", async () => {
|
|
592
|
+
const taskDir = createTempDir();
|
|
593
|
+
let captureDir: string | undefined;
|
|
594
|
+
try {
|
|
595
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 2, completion_promise: "DONE" }));
|
|
596
|
+
captureDir = mkdtempSync(join(tmpdir(), "pi-ralph-loop-capture-"));
|
|
597
|
+
const promptCounterPath = join(captureDir, "prompt-counter.txt");
|
|
598
|
+
const promptPathPrefix = join(captureDir, "prompt-");
|
|
599
|
+
const scriptPath = join(taskDir, "mock-pi-recovery.sh");
|
|
600
|
+
writeFileSync(
|
|
601
|
+
scriptPath,
|
|
602
|
+
`#!/bin/bash
|
|
603
|
+
count=0
|
|
604
|
+
if [ -f "${promptCounterPath}" ]; then
|
|
605
|
+
count=$(cat "${promptCounterPath}")
|
|
606
|
+
fi
|
|
607
|
+
count=$((count + 1))
|
|
608
|
+
printf '%s' "$count" > "${promptCounterPath}"
|
|
609
|
+
read line
|
|
610
|
+
printf '%s' "$line" > "${promptPathPrefix}$count.json"
|
|
611
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
612
|
+
if [ "$count" -eq 1 ]; then
|
|
613
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"<promise>DONE</promise> premature"}]}]}'
|
|
614
|
+
else
|
|
615
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"still working"}]}]}'
|
|
616
|
+
fi
|
|
617
|
+
`,
|
|
618
|
+
{ mode: 0o755 },
|
|
619
|
+
);
|
|
620
|
+
|
|
621
|
+
const result = await runRalphLoop({
|
|
622
|
+
ralphPath,
|
|
623
|
+
cwd: taskDir,
|
|
624
|
+
timeout: 5,
|
|
625
|
+
maxIterations: 2,
|
|
626
|
+
completionPromise: "DONE",
|
|
627
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
628
|
+
spawnCommand: "bash",
|
|
629
|
+
spawnArgs: [scriptPath],
|
|
630
|
+
runCommandsFn: async () => [],
|
|
631
|
+
pi: makeMockPi(),
|
|
632
|
+
});
|
|
633
|
+
|
|
634
|
+
assert.equal(result.iterations.length, 2);
|
|
635
|
+
const secondPrompt = JSON.parse(readFileSync(join(captureDir!, "prompt-2.json"), "utf8")) as { message: string };
|
|
636
|
+
assert.match(secondPrompt.message, /\[completion gate rejection\]/);
|
|
637
|
+
assert.match(secondPrompt.message, /Still missing: durable progress/);
|
|
638
|
+
} finally {
|
|
639
|
+
if (captureDir) {
|
|
640
|
+
rmSync(captureDir, { recursive: true, force: true });
|
|
641
|
+
}
|
|
642
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
643
|
+
}
|
|
644
|
+
});
|
|
645
|
+
|
|
646
|
+
test("validateCompletionReadiness reports ready when required outputs exist and OPEN_QUESTIONS.md is clear", (t) => {
|
|
647
|
+
const taskDir = createTempDir();
|
|
648
|
+
t.after(() => rmSync(taskDir, { recursive: true, force: true }));
|
|
649
|
+
|
|
650
|
+
writeFileSync(join(taskDir, "ARCHITECTURE.md"), "done\n", "utf8");
|
|
651
|
+
writeFileSync(join(taskDir, "OPEN_QUESTIONS.md"), "# Open questions\n\nAll clear.\n", "utf8");
|
|
652
|
+
|
|
653
|
+
assert.deepEqual(validateCompletionReadiness(taskDir, ["ARCHITECTURE.md"]), { ready: true, reasons: [] });
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
test("validateCompletionReadiness reports blocking reasons for missing outputs and unresolved questions", (t) => {
|
|
657
|
+
const taskDir = createTempDir();
|
|
658
|
+
t.after(() => rmSync(taskDir, { recursive: true, force: true }));
|
|
659
|
+
|
|
660
|
+
writeFileSync(
|
|
661
|
+
join(taskDir, "OPEN_QUESTIONS.md"),
|
|
662
|
+
"## P0\n- [ ] Decide the migration order\n\n## P1\n- [ ] Confirm the test plan\n",
|
|
663
|
+
"utf8",
|
|
664
|
+
);
|
|
665
|
+
|
|
666
|
+
const readiness = validateCompletionReadiness(taskDir, ["ARCHITECTURE.md"]);
|
|
667
|
+
assert.equal(readiness.ready, false);
|
|
668
|
+
assert.ok(readiness.reasons.includes("Missing required output: ARCHITECTURE.md"));
|
|
669
|
+
assert.ok(readiness.reasons.includes("OPEN_QUESTIONS.md still has P0 items"));
|
|
670
|
+
assert.ok(readiness.reasons.includes("OPEN_QUESTIONS.md still has P1 items"));
|
|
671
|
+
});
|
|
672
|
+
|
|
673
|
+
test("validateCompletionReadiness blocks on any markdown heading level used for P0 and P1 sections", (t) => {
|
|
674
|
+
const cases = [
|
|
675
|
+
{
|
|
676
|
+
label: "# P0",
|
|
677
|
+
content: "# P0\n- [ ] Decide the migration order\n",
|
|
678
|
+
expectedReason: "OPEN_QUESTIONS.md still has P0 items",
|
|
679
|
+
},
|
|
680
|
+
{
|
|
681
|
+
label: "### P1",
|
|
682
|
+
content: "### P1\n- [ ] Confirm the test plan\n",
|
|
683
|
+
expectedReason: "OPEN_QUESTIONS.md still has P1 items",
|
|
684
|
+
},
|
|
685
|
+
{
|
|
686
|
+
label: "nested subheading under ## P0",
|
|
687
|
+
content: "## P0\n### Notes\n- [ ] Decide the migration order\n",
|
|
688
|
+
expectedReason: "OPEN_QUESTIONS.md still has P0 items",
|
|
689
|
+
},
|
|
690
|
+
] as const;
|
|
691
|
+
|
|
692
|
+
for (const { label, content, expectedReason } of cases) {
|
|
693
|
+
const taskDir = createTempDir();
|
|
694
|
+
t.after(() => rmSync(taskDir, { recursive: true, force: true }));
|
|
695
|
+
writeFileSync(join(taskDir, "OPEN_QUESTIONS.md"), content, "utf8");
|
|
696
|
+
|
|
697
|
+
const readiness = validateCompletionReadiness(taskDir, []);
|
|
698
|
+
assert.equal(readiness.ready, false, label);
|
|
699
|
+
assert.ok(readiness.reasons.includes(expectedReason), label);
|
|
700
|
+
}
|
|
701
|
+
});
|
|
702
|
+
|
|
703
|
+
test("validateCompletionReadiness ignores checked items inside P0 and P1 sections", (t) => {
|
|
704
|
+
const taskDir = createTempDir();
|
|
705
|
+
t.after(() => rmSync(taskDir, { recursive: true, force: true }));
|
|
706
|
+
|
|
707
|
+
writeFileSync(
|
|
708
|
+
join(taskDir, "OPEN_QUESTIONS.md"),
|
|
709
|
+
"# P0\n- [x] Decide the migration order\n\n### P1\n1. [X] Confirm the test plan\n",
|
|
710
|
+
"utf8",
|
|
711
|
+
);
|
|
712
|
+
|
|
713
|
+
assert.deepEqual(validateCompletionReadiness(taskDir, []), { ready: true, reasons: [] });
|
|
714
|
+
});
|
|
715
|
+
|
|
716
|
+
test("validateCompletionReadiness ignores nested note bullets under checked items", (t) => {
|
|
717
|
+
const taskDir = createTempDir();
|
|
718
|
+
t.after(() => rmSync(taskDir, { recursive: true, force: true }));
|
|
719
|
+
|
|
720
|
+
writeFileSync(
|
|
721
|
+
join(taskDir, "OPEN_QUESTIONS.md"),
|
|
722
|
+
"# P0\n- [x] Decide the migration order\n - note: revisit after merge\n",
|
|
723
|
+
"utf8",
|
|
724
|
+
);
|
|
725
|
+
|
|
726
|
+
assert.deepEqual(validateCompletionReadiness(taskDir, []), { ready: true, reasons: [] });
|
|
727
|
+
});
|
|
728
|
+
|
|
729
|
+
test("runRalphLoop does not stop on completion promise when required outputs are missing", async () => {
|
|
730
|
+
const taskDir = createTempDir();
|
|
731
|
+
try {
|
|
732
|
+
writeFileSync(join(taskDir, "OPEN_QUESTIONS.md"), "# Open questions\n\nAll clear.\n", "utf8");
|
|
733
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 2, completion_promise: "DONE", required_outputs: ["ARCHITECTURE.md"] }));
|
|
734
|
+
|
|
735
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
736
|
+
writeFileSync(
|
|
737
|
+
scriptPath,
|
|
738
|
+
`#!/bin/bash
|
|
739
|
+
read line
|
|
740
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
741
|
+
mkdir -p "${taskDir}/notes"
|
|
742
|
+
echo "updated findings" > "${taskDir}/notes/findings.md"
|
|
743
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"<promise>DONE</promise> All done!"}]}]}'
|
|
744
|
+
`,
|
|
745
|
+
{ mode: 0o755 },
|
|
746
|
+
);
|
|
747
|
+
|
|
748
|
+
const result = await runRalphLoop({
|
|
749
|
+
ralphPath,
|
|
750
|
+
cwd: taskDir,
|
|
751
|
+
timeout: 10,
|
|
752
|
+
maxIterations: 2,
|
|
753
|
+
completionPromise: "DONE",
|
|
754
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
755
|
+
spawnCommand: "bash",
|
|
756
|
+
spawnArgs: [scriptPath],
|
|
757
|
+
runCommandsFn: async () => [],
|
|
758
|
+
pi: makeMockPi(),
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
assert.equal(result.status === "complete", false);
|
|
762
|
+
assert.equal(result.iterations.length, 2);
|
|
763
|
+
assert.equal(result.iterations[0].completionPromiseMatched, true);
|
|
764
|
+
assert.equal(result.iterations[0].completionGate?.ready, false);
|
|
765
|
+
assert.ok(result.iterations[0].completionGate?.reasons.includes("Missing required output: ARCHITECTURE.md"));
|
|
766
|
+
} finally {
|
|
767
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
768
|
+
}
|
|
769
|
+
});
|
|
770
|
+
|
|
771
|
+
test("runRalphLoop does not stop on completion promise when OPEN_QUESTIONS.md still has P0 items", async () => {
|
|
772
|
+
const taskDir = createTempDir();
|
|
773
|
+
try {
|
|
774
|
+
writeFileSync(join(taskDir, "ARCHITECTURE.md"), "done\n", "utf8");
|
|
775
|
+
writeFileSync(join(taskDir, "OPEN_QUESTIONS.md"), "## P0\n- [ ] Decide the architecture\n", "utf8");
|
|
776
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 2, completion_promise: "DONE", required_outputs: ["ARCHITECTURE.md"] }));
|
|
777
|
+
|
|
778
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
779
|
+
writeFileSync(
|
|
780
|
+
scriptPath,
|
|
781
|
+
`#!/bin/bash
|
|
782
|
+
read line
|
|
783
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
784
|
+
mkdir -p "${taskDir}/notes"
|
|
785
|
+
echo "updated findings" > "${taskDir}/notes/findings.md"
|
|
786
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"<promise>DONE</promise> All done!"}]}]}'
|
|
787
|
+
`,
|
|
788
|
+
{ mode: 0o755 },
|
|
789
|
+
);
|
|
790
|
+
|
|
791
|
+
const result = await runRalphLoop({
|
|
792
|
+
ralphPath,
|
|
793
|
+
cwd: taskDir,
|
|
794
|
+
timeout: 10,
|
|
795
|
+
maxIterations: 2,
|
|
796
|
+
completionPromise: "DONE",
|
|
797
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
798
|
+
spawnCommand: "bash",
|
|
799
|
+
spawnArgs: [scriptPath],
|
|
800
|
+
runCommandsFn: async () => [],
|
|
801
|
+
pi: makeMockPi(),
|
|
802
|
+
});
|
|
803
|
+
|
|
804
|
+
assert.notEqual(result.status, "complete");
|
|
805
|
+
assert.equal(result.iterations.length, 2);
|
|
806
|
+
assert.equal(result.iterations[0].completionGate?.ready, false);
|
|
807
|
+
assert.ok(result.iterations[0].completionGate?.reasons.includes("OPEN_QUESTIONS.md still has P0 items"));
|
|
808
|
+
} finally {
|
|
809
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
810
|
+
}
|
|
811
|
+
});
|
|
812
|
+
|
|
813
|
+
test("runRalphLoop stops when the completion gate passes", async () => {
|
|
814
|
+
const taskDir = createTempDir();
|
|
815
|
+
try {
|
|
816
|
+
writeFileSync(join(taskDir, "ARCHITECTURE.md"), "done\n", "utf8");
|
|
817
|
+
writeFileSync(join(taskDir, "OPEN_QUESTIONS.md"), "# Open questions\n\nNothing open.\n", "utf8");
|
|
818
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3, completion_promise: "DONE", required_outputs: ["ARCHITECTURE.md"] }));
|
|
819
|
+
|
|
820
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
821
|
+
writeFileSync(
|
|
822
|
+
scriptPath,
|
|
823
|
+
`#!/bin/bash
|
|
824
|
+
read line
|
|
825
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
826
|
+
mkdir -p "${taskDir}/notes"
|
|
827
|
+
echo "updated findings" > "${taskDir}/notes/findings.md"
|
|
828
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"<promise>DONE</promise> All done!"}]}]}'
|
|
829
|
+
`,
|
|
830
|
+
{ mode: 0o755 },
|
|
831
|
+
);
|
|
832
|
+
|
|
833
|
+
const result = await runRalphLoop({
|
|
834
|
+
ralphPath,
|
|
835
|
+
cwd: taskDir,
|
|
836
|
+
timeout: 10,
|
|
837
|
+
maxIterations: 3,
|
|
838
|
+
completionPromise: "DONE",
|
|
839
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
840
|
+
spawnCommand: "bash",
|
|
841
|
+
spawnArgs: [scriptPath],
|
|
842
|
+
runCommandsFn: async () => [],
|
|
843
|
+
pi: makeMockPi(),
|
|
844
|
+
});
|
|
845
|
+
|
|
846
|
+
assert.equal(result.status, "complete");
|
|
847
|
+
assert.equal(result.iterations.length, 1);
|
|
848
|
+
assert.equal(result.iterations[0].completionPromiseMatched, true);
|
|
849
|
+
assert.deepEqual(result.iterations[0].completionGate, { ready: true, reasons: [] });
|
|
850
|
+
assert.deepEqual(result.iterations[0].completion, {
|
|
851
|
+
promiseSeen: true,
|
|
852
|
+
durableProgressObserved: true,
|
|
853
|
+
gateChecked: true,
|
|
854
|
+
gatePassed: true,
|
|
855
|
+
gateBlocked: false,
|
|
856
|
+
blockingReasons: [],
|
|
857
|
+
});
|
|
858
|
+
} finally {
|
|
859
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
860
|
+
}
|
|
861
|
+
});
|
|
862
|
+
|
|
863
|
+
test("runRalphLoop records completion observability events when the completion gate is blocked", async () => {
|
|
864
|
+
const taskDir = createTempDir();
|
|
865
|
+
try {
|
|
866
|
+
writeFileSync(join(taskDir, "OPEN_QUESTIONS.md"), "# Open questions\n\nNothing open.\n", "utf8");
|
|
867
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 2, completion_promise: "DONE", required_outputs: ["ARCHITECTURE.md"] }));
|
|
868
|
+
|
|
869
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
870
|
+
writeFileSync(
|
|
871
|
+
scriptPath,
|
|
872
|
+
`#!/bin/bash
|
|
873
|
+
read line
|
|
874
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
875
|
+
mkdir -p "${taskDir}/notes"
|
|
876
|
+
echo "updated findings" > "${taskDir}/notes/findings.md"
|
|
877
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"<promise>DONE</promise> All done!"}]}]}'
|
|
878
|
+
`,
|
|
879
|
+
{ mode: 0o755 },
|
|
880
|
+
);
|
|
881
|
+
|
|
882
|
+
const result = await runRalphLoop({
|
|
883
|
+
ralphPath,
|
|
884
|
+
cwd: taskDir,
|
|
885
|
+
timeout: 10,
|
|
886
|
+
maxIterations: 2,
|
|
887
|
+
completionPromise: "DONE",
|
|
888
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
889
|
+
spawnCommand: "bash",
|
|
890
|
+
spawnArgs: [scriptPath],
|
|
891
|
+
runCommandsFn: async () => [],
|
|
892
|
+
pi: makeMockPi(),
|
|
893
|
+
});
|
|
894
|
+
|
|
895
|
+
const records = readIterationRecords(taskDir);
|
|
896
|
+
assert.equal(records.length >= 1, true);
|
|
897
|
+
assert.deepEqual(records[0].completion, {
|
|
898
|
+
promiseSeen: true,
|
|
899
|
+
durableProgressObserved: true,
|
|
900
|
+
gateChecked: true,
|
|
901
|
+
gatePassed: false,
|
|
902
|
+
gateBlocked: true,
|
|
903
|
+
blockingReasons: ["Missing required output: ARCHITECTURE.md"],
|
|
904
|
+
});
|
|
905
|
+
|
|
906
|
+
const events = readRunnerEvents(taskDir);
|
|
907
|
+
assert.deepEqual(
|
|
908
|
+
events.filter(hasIteration).filter((event) => event.iteration === 1).map((event) => event.type),
|
|
909
|
+
[
|
|
910
|
+
"iteration.started",
|
|
911
|
+
"durable.progress.observed",
|
|
912
|
+
"completion_promise_seen",
|
|
913
|
+
"completion.gate.checked",
|
|
914
|
+
"completion_gate_blocked",
|
|
915
|
+
"iteration.completed",
|
|
916
|
+
],
|
|
917
|
+
);
|
|
918
|
+
|
|
919
|
+
const completionPromiseEvent = events.find(isRunnerEventType("completion_promise_seen")) as Extract<RunnerEvent, { type: "completion_promise_seen" }> | undefined;
|
|
920
|
+
assert.ok(completionPromiseEvent);
|
|
921
|
+
const { timestamp: _completionPromiseTimestamp, ...completionPromisePayload } = completionPromiseEvent!;
|
|
922
|
+
assert.deepEqual(completionPromisePayload, {
|
|
923
|
+
type: "completion_promise_seen",
|
|
924
|
+
iteration: 1,
|
|
925
|
+
loopToken: completionPromiseEvent!.loopToken,
|
|
926
|
+
completionPromise: "DONE",
|
|
927
|
+
});
|
|
928
|
+
|
|
929
|
+
const gateCheckedEvent = events.find(isRunnerEventType("completion.gate.checked")) as Extract<RunnerEvent, { type: "completion.gate.checked" }> | undefined;
|
|
930
|
+
assert.ok(gateCheckedEvent);
|
|
931
|
+
const { timestamp: _gateCheckedTimestamp, ...gateCheckedPayload } = gateCheckedEvent!;
|
|
932
|
+
assert.deepEqual(gateCheckedPayload, {
|
|
933
|
+
type: "completion.gate.checked",
|
|
934
|
+
iteration: 1,
|
|
935
|
+
loopToken: gateCheckedEvent!.loopToken,
|
|
936
|
+
ready: false,
|
|
937
|
+
reasons: ["Missing required output: ARCHITECTURE.md"],
|
|
938
|
+
});
|
|
939
|
+
|
|
940
|
+
const blockedEvent = events.find(isRunnerEventType("completion_gate_blocked")) as Extract<RunnerEvent, { type: "completion_gate_blocked" }> | undefined;
|
|
941
|
+
assert.ok(blockedEvent);
|
|
942
|
+
const { timestamp: _blockedTimestamp, ...blockedPayload } = blockedEvent!;
|
|
943
|
+
assert.deepEqual(blockedPayload, {
|
|
944
|
+
type: "completion_gate_blocked",
|
|
945
|
+
iteration: 1,
|
|
946
|
+
loopToken: blockedEvent!.loopToken,
|
|
947
|
+
ready: false,
|
|
948
|
+
reasons: ["Missing required output: ARCHITECTURE.md"],
|
|
949
|
+
});
|
|
950
|
+
|
|
951
|
+
const iterationCompletedEvent = events.find(isRunnerEventType("iteration.completed")) as Extract<RunnerEvent, { type: "iteration.completed" }> | undefined;
|
|
952
|
+
assert.ok(iterationCompletedEvent);
|
|
953
|
+
const { timestamp: _iterationCompletedTimestamp, ...iterationCompletedPayload } = iterationCompletedEvent!;
|
|
954
|
+
assert.deepEqual(iterationCompletedPayload, {
|
|
955
|
+
type: "iteration.completed",
|
|
956
|
+
iteration: 1,
|
|
957
|
+
loopToken: iterationCompletedEvent!.loopToken,
|
|
958
|
+
status: "complete",
|
|
959
|
+
progress: true,
|
|
960
|
+
changedFiles: ["notes/findings.md"],
|
|
961
|
+
noProgressStreak: 0,
|
|
962
|
+
completionPromiseMatched: true,
|
|
963
|
+
completionGate: {
|
|
964
|
+
ready: false,
|
|
965
|
+
reasons: ["Missing required output: ARCHITECTURE.md"],
|
|
966
|
+
},
|
|
967
|
+
completion: {
|
|
968
|
+
promiseSeen: true,
|
|
969
|
+
durableProgressObserved: true,
|
|
970
|
+
gateChecked: true,
|
|
971
|
+
gatePassed: false,
|
|
972
|
+
gateBlocked: true,
|
|
973
|
+
blockingReasons: ["Missing required output: ARCHITECTURE.md"],
|
|
974
|
+
},
|
|
975
|
+
snapshotTruncated: false,
|
|
976
|
+
snapshotErrorCount: 0,
|
|
977
|
+
});
|
|
978
|
+
|
|
979
|
+
assert.notEqual(result.status, "complete");
|
|
980
|
+
} finally {
|
|
981
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
982
|
+
}
|
|
983
|
+
});
|
|
984
|
+
|
|
985
|
+
test("runRalphLoop records iteration results to JSONL", async () => {
|
|
986
|
+
const taskDir = createTempDir();
|
|
987
|
+
try {
|
|
988
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }));
|
|
989
|
+
|
|
990
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
991
|
+
writeFileSync(
|
|
992
|
+
scriptPath,
|
|
993
|
+
`#!/bin/bash
|
|
994
|
+
read line
|
|
995
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
996
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
997
|
+
`,
|
|
998
|
+
{ mode: 0o755 },
|
|
999
|
+
);
|
|
1000
|
+
|
|
1001
|
+
await runRalphLoop({
|
|
1002
|
+
ralphPath,
|
|
1003
|
+
cwd: taskDir,
|
|
1004
|
+
timeout: 5,
|
|
1005
|
+
maxIterations: 1,
|
|
1006
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1007
|
+
spawnCommand: "bash",
|
|
1008
|
+
spawnArgs: [scriptPath],
|
|
1009
|
+
runCommandsFn: async () => [],
|
|
1010
|
+
pi: makeMockPi(),
|
|
1011
|
+
});
|
|
1012
|
+
|
|
1013
|
+
const records = readIterationRecords(taskDir);
|
|
1014
|
+
assert.ok(records.length >= 1);
|
|
1015
|
+
assert.equal(records[0].iteration, 1);
|
|
1016
|
+
assert.equal(records[0].status, "complete");
|
|
1017
|
+
assert.ok(records[0].durationMs !== undefined && records[0].durationMs >= 0);
|
|
1018
|
+
assert.ok(records[0].startedAt.length > 0);
|
|
1019
|
+
assert.ok(records[0].completedAt !== undefined && records[0].completedAt.length > 0);
|
|
1020
|
+
} finally {
|
|
1021
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1022
|
+
}
|
|
1023
|
+
});
|
|
1024
|
+
|
|
1025
|
+
test("runRalphLoop reports no-progress-exhaustion when no files are written", async () => {
|
|
1026
|
+
const taskDir = createTempDir();
|
|
1027
|
+
try {
|
|
1028
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1, timeout: 5 }));
|
|
1029
|
+
|
|
1030
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
1031
|
+
writeFileSync(
|
|
1032
|
+
scriptPath,
|
|
1033
|
+
`#!/bin/bash
|
|
1034
|
+
read line
|
|
1035
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1036
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"I thought about it but wrote nothing"}]}]}'
|
|
1037
|
+
`,
|
|
1038
|
+
{ mode: 0o755 },
|
|
1039
|
+
);
|
|
1040
|
+
|
|
1041
|
+
const result = await runRalphLoop({
|
|
1042
|
+
ralphPath,
|
|
1043
|
+
cwd: taskDir,
|
|
1044
|
+
timeout: 5,
|
|
1045
|
+
maxIterations: 1,
|
|
1046
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1047
|
+
spawnCommand: "bash",
|
|
1048
|
+
spawnArgs: [scriptPath],
|
|
1049
|
+
runCommandsFn: async () => [],
|
|
1050
|
+
pi: makeMockPi(),
|
|
1051
|
+
});
|
|
1052
|
+
|
|
1053
|
+
assert.equal(result.iterations.length, 1);
|
|
1054
|
+
assert.equal(result.iterations[0].progress, false);
|
|
1055
|
+
// With only 1 iteration and no progress, should exhaust
|
|
1056
|
+
assert.ok(["no-progress-exhaustion", "error"].includes(result.status));
|
|
1057
|
+
} finally {
|
|
1058
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1059
|
+
}
|
|
1060
|
+
});
|
|
1061
|
+
|
|
1062
|
+
test("assessTaskDirectoryProgress returns unknown when a late snapshot hits an unreadable directory", async () => {
|
|
1063
|
+
const taskDir = createTempDir();
|
|
1064
|
+
try {
|
|
1065
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1, timeout: 5 }));
|
|
1066
|
+
writeFileSync(join(taskDir, "a.txt"), "seed\n", "utf8");
|
|
1067
|
+
|
|
1068
|
+
const before = captureTaskDirectorySnapshot(ralphPath);
|
|
1069
|
+
const lateDir = join(taskDir, "zz-late-dir");
|
|
1070
|
+
setTimeout(() => {
|
|
1071
|
+
mkdirSync(lateDir, { recursive: true });
|
|
1072
|
+
chmodSync(lateDir, 0o000);
|
|
1073
|
+
}, 50);
|
|
1074
|
+
|
|
1075
|
+
const result = await assessTaskDirectoryProgress(ralphPath, before);
|
|
1076
|
+
|
|
1077
|
+
assert.equal(result.progress, "unknown");
|
|
1078
|
+
assert.equal(result.changedFiles.length, 0);
|
|
1079
|
+
assert.equal(result.snapshotTruncated, false);
|
|
1080
|
+
assert.ok(result.snapshotErrorCount > 0);
|
|
1081
|
+
} finally {
|
|
1082
|
+
const lateDir = join(taskDir, "zz-late-dir");
|
|
1083
|
+
if (existsSync(lateDir)) {
|
|
1084
|
+
chmodSync(lateDir, 0o700);
|
|
1085
|
+
}
|
|
1086
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1087
|
+
}
|
|
1088
|
+
});
|
|
1089
|
+
|
|
1090
|
+
test("assessTaskDirectoryProgress returns unknown when a late snapshot is truncated", async () => {
|
|
1091
|
+
const taskDir = createTempDir();
|
|
1092
|
+
try {
|
|
1093
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1, timeout: 5 }));
|
|
1094
|
+
writeFileSync(join(taskDir, "a.txt"), Buffer.alloc(2_000_000, 1));
|
|
1095
|
+
|
|
1096
|
+
const before = captureTaskDirectorySnapshot(ralphPath);
|
|
1097
|
+
const lateFile = join(taskDir, "zz-late.bin");
|
|
1098
|
+
setTimeout(() => {
|
|
1099
|
+
writeFileSync(lateFile, Buffer.alloc(300_000, 2));
|
|
1100
|
+
}, 50);
|
|
1101
|
+
|
|
1102
|
+
const result = await assessTaskDirectoryProgress(ralphPath, before);
|
|
1103
|
+
|
|
1104
|
+
assert.equal(result.progress, "unknown");
|
|
1105
|
+
assert.equal(result.changedFiles.length, 0);
|
|
1106
|
+
assert.equal(result.snapshotTruncated, true);
|
|
1107
|
+
assert.equal(result.snapshotErrorCount, 0);
|
|
1108
|
+
} finally {
|
|
1109
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1110
|
+
}
|
|
1111
|
+
});
|
|
1112
|
+
|
|
1113
|
+
test("runRalphLoop reports max-iterations when progress was made", async () => {
|
|
1114
|
+
const taskDir = createTempDir();
|
|
1115
|
+
try {
|
|
1116
|
+
mkdirSync(join(taskDir, "notes"), { recursive: true });
|
|
1117
|
+
writeFileSync(join(taskDir, "notes", "findings.md"), "initial\n");
|
|
1118
|
+
|
|
1119
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1, timeout: 5 }));
|
|
1120
|
+
|
|
1121
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
1122
|
+
writeFileSync(
|
|
1123
|
+
scriptPath,
|
|
1124
|
+
`#!/bin/bash
|
|
1125
|
+
read line
|
|
1126
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1127
|
+
echo "progress!" > "${taskDir}/notes/findings.md"
|
|
1128
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"updated file"}]}]}'
|
|
1129
|
+
`,
|
|
1130
|
+
{ mode: 0o755 },
|
|
1131
|
+
);
|
|
1132
|
+
|
|
1133
|
+
const result = await runRalphLoop({
|
|
1134
|
+
ralphPath,
|
|
1135
|
+
cwd: taskDir,
|
|
1136
|
+
timeout: 5,
|
|
1137
|
+
maxIterations: 1,
|
|
1138
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1139
|
+
spawnCommand: "bash",
|
|
1140
|
+
spawnArgs: [scriptPath],
|
|
1141
|
+
runCommandsFn: async () => [],
|
|
1142
|
+
pi: makeMockPi(),
|
|
1143
|
+
});
|
|
1144
|
+
|
|
1145
|
+
assert.equal(result.iterations.length, 1);
|
|
1146
|
+
// With progress but max_iterations reached, could be either max-iterations or complete
|
|
1147
|
+
assert.ok(["max-iterations", "no-progress-exhaustion", "complete", "error"].includes(result.status));
|
|
1148
|
+
} finally {
|
|
1149
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1150
|
+
}
|
|
1151
|
+
});
|
|
1152
|
+
|
|
1153
|
+
test("runRalphLoop fails closed when live RALPH.md reparse sees malformed raw required_outputs", async () => {
|
|
1154
|
+
const taskDir = createTempDir();
|
|
1155
|
+
try {
|
|
1156
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3 }));
|
|
1157
|
+
|
|
1158
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
1159
|
+
writeFileSync(
|
|
1160
|
+
scriptPath,
|
|
1161
|
+
`#!/bin/bash
|
|
1162
|
+
read line
|
|
1163
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1164
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
1165
|
+
`,
|
|
1166
|
+
{ mode: 0o755 },
|
|
1167
|
+
);
|
|
1168
|
+
|
|
1169
|
+
const notifications: Array<{ message: string; level: string }> = [];
|
|
1170
|
+
let completedIterations = 0;
|
|
1171
|
+
const result = await runRalphLoop({
|
|
1172
|
+
ralphPath,
|
|
1173
|
+
cwd: taskDir,
|
|
1174
|
+
timeout: 5,
|
|
1175
|
+
maxIterations: 3,
|
|
1176
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1177
|
+
spawnCommand: "bash",
|
|
1178
|
+
spawnArgs: [scriptPath],
|
|
1179
|
+
onIterationComplete() {
|
|
1180
|
+
completedIterations++;
|
|
1181
|
+
if (completedIterations === 1) {
|
|
1182
|
+
writeFileSync(
|
|
1183
|
+
ralphPath,
|
|
1184
|
+
`---\ncommands: []\nmax_iterations: 3\ntimeout: 5\nrequired_outputs: ARCHITECTURE.md\nguardrails:\n block_commands: []\n protected_files: []\n---\nTask: Do something\n`,
|
|
1185
|
+
"utf8",
|
|
1186
|
+
);
|
|
1187
|
+
}
|
|
1188
|
+
},
|
|
1189
|
+
onNotify(message, level) {
|
|
1190
|
+
notifications.push({ message, level });
|
|
1191
|
+
},
|
|
1192
|
+
runCommandsFn: async () => [],
|
|
1193
|
+
pi: makeMockPi(),
|
|
1194
|
+
});
|
|
1195
|
+
|
|
1196
|
+
assert.equal(result.status, "error");
|
|
1197
|
+
assert.equal(result.iterations.length, 1);
|
|
1198
|
+
assert.ok(
|
|
1199
|
+
notifications.some((n) => n.message.includes("Invalid RALPH.md on iteration 2: Invalid RALPH frontmatter: required_outputs must be a YAML sequence")),
|
|
1200
|
+
);
|
|
1201
|
+
} finally {
|
|
1202
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1203
|
+
}
|
|
1204
|
+
});
|
|
1205
|
+
|
|
1206
|
+
test("runRalphLoop stops with error when RALPH.md becomes invalid during loop", async () => {
|
|
1207
|
+
const taskDir = createTempDir();
|
|
1208
|
+
try {
|
|
1209
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3 }));
|
|
1210
|
+
|
|
1211
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
1212
|
+
writeFileSync(
|
|
1213
|
+
scriptPath,
|
|
1214
|
+
`#!/bin/bash
|
|
1215
|
+
read line
|
|
1216
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1217
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
1218
|
+
`,
|
|
1219
|
+
{ mode: 0o755 },
|
|
1220
|
+
);
|
|
1221
|
+
|
|
1222
|
+
let iterationCount = 0;
|
|
1223
|
+
const result = await runRalphLoop({
|
|
1224
|
+
ralphPath,
|
|
1225
|
+
cwd: taskDir,
|
|
1226
|
+
timeout: 5,
|
|
1227
|
+
maxIterations: 3,
|
|
1228
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1229
|
+
spawnCommand: "bash",
|
|
1230
|
+
spawnArgs: [scriptPath],
|
|
1231
|
+
onIterationComplete() {
|
|
1232
|
+
iterationCount++;
|
|
1233
|
+
// Corrupt after first iteration finishes
|
|
1234
|
+
if (iterationCount === 1) {
|
|
1235
|
+
writeFileSync(ralphPath, "not valid yaml at all", "utf8");
|
|
1236
|
+
}
|
|
1237
|
+
},
|
|
1238
|
+
runCommandsFn: async () => [],
|
|
1239
|
+
pi: makeMockPi(),
|
|
1240
|
+
});
|
|
1241
|
+
|
|
1242
|
+
// The loop should have stopped (error from invalid RALPH.md on iteration 2)
|
|
1243
|
+
assert.ok(result.iterations.length >= 1);
|
|
1244
|
+
assert.ok(["error", "stopped", "no-progress-exhaustion", "max-iterations"].includes(result.status));
|
|
1245
|
+
} finally {
|
|
1246
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1247
|
+
}
|
|
1248
|
+
});
|
|
1249
|
+
|
|
1250
|
+
test("runRalphLoop reports error when RALPH.md is missing", async () => {
|
|
1251
|
+
const taskDir = createTempDir();
|
|
1252
|
+
try {
|
|
1253
|
+
const ralphPath = join(taskDir, "RALPH.md");
|
|
1254
|
+
// Don't create the file
|
|
1255
|
+
|
|
1256
|
+
const result = await runRalphLoop({
|
|
1257
|
+
ralphPath,
|
|
1258
|
+
cwd: taskDir,
|
|
1259
|
+
timeout: 5,
|
|
1260
|
+
maxIterations: 1,
|
|
1261
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1262
|
+
spawnCommand: "echo",
|
|
1263
|
+
spawnArgs: ["mock"],
|
|
1264
|
+
runCommandsFn: async () => [],
|
|
1265
|
+
pi: makeMockPi(),
|
|
1266
|
+
});
|
|
1267
|
+
|
|
1268
|
+
assert.equal(result.status, "error");
|
|
1269
|
+
assert.equal(result.iterations.length, 0);
|
|
1270
|
+
} finally {
|
|
1271
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1272
|
+
}
|
|
1273
|
+
});
|
|
1274
|
+
|
|
1275
|
+
test("runRalphLoop writes a transcript for a successful iteration", async () => {
|
|
1276
|
+
const taskDir = createTempDir();
|
|
1277
|
+
try {
|
|
1278
|
+
const ralphPath = writeRalphMd(
|
|
1279
|
+
taskDir,
|
|
1280
|
+
minimalRalphMd({ max_iterations: 1 }).replace("Task: Do something", "Task: Successful transcript case"),
|
|
1281
|
+
);
|
|
1282
|
+
mkdirSync(join(taskDir, "notes"), { recursive: true });
|
|
1283
|
+
writeFileSync(join(taskDir, "notes", "findings.md"), "initial\n", "utf8");
|
|
1284
|
+
|
|
1285
|
+
const scriptPath = join(taskDir, "mock-pi-success.sh");
|
|
1286
|
+
writeFileSync(
|
|
1287
|
+
scriptPath,
|
|
1288
|
+
`#!/bin/bash
|
|
1289
|
+
read line
|
|
1290
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1291
|
+
echo "updated findings" > "${taskDir}/notes/findings.md"
|
|
1292
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"all done"}]}]}'
|
|
1293
|
+
`,
|
|
1294
|
+
{ mode: 0o755 },
|
|
1295
|
+
);
|
|
1296
|
+
|
|
1297
|
+
const result = await runRalphLoop({
|
|
1298
|
+
ralphPath,
|
|
1299
|
+
cwd: taskDir,
|
|
1300
|
+
timeout: 5,
|
|
1301
|
+
maxIterations: 1,
|
|
1302
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1303
|
+
spawnCommand: "bash",
|
|
1304
|
+
spawnArgs: [scriptPath],
|
|
1305
|
+
runCommandsFn: async () => [{ name: "tests", output: "command output" }],
|
|
1306
|
+
pi: makeMockPi(),
|
|
1307
|
+
});
|
|
1308
|
+
|
|
1309
|
+
assert.ok(["complete", "max-iterations"].includes(result.status));
|
|
1310
|
+
const transcriptsDir = join(taskDir, ".ralph-runner", "transcripts");
|
|
1311
|
+
const transcriptFiles = readdirSync(transcriptsDir).filter((file) => file.endsWith(".md"));
|
|
1312
|
+
assert.equal(transcriptFiles.length, 1);
|
|
1313
|
+
const transcript = readFileSync(join(transcriptsDir, transcriptFiles[0]), "utf8");
|
|
1314
|
+
assert.ok(transcript.includes("Status: complete"));
|
|
1315
|
+
assert.ok(transcript.includes("Task: Successful transcript case"));
|
|
1316
|
+
assert.ok(transcript.includes("tests"));
|
|
1317
|
+
assert.ok(transcript.includes("command output"));
|
|
1318
|
+
assert.ok(transcript.includes("all done"));
|
|
1319
|
+
} finally {
|
|
1320
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1321
|
+
}
|
|
1322
|
+
});
|
|
1323
|
+
|
|
1324
|
+
test("runRalphLoop writes a transcript for a timed out iteration", async () => {
|
|
1325
|
+
const taskDir = createTempDir();
|
|
1326
|
+
try {
|
|
1327
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1, timeout: 1 }).replace("Task: Do something", "Task: Timeout transcript case"));
|
|
1328
|
+
|
|
1329
|
+
const scriptPath = join(taskDir, "mock-pi-timeout.sh");
|
|
1330
|
+
writeFileSync(
|
|
1331
|
+
scriptPath,
|
|
1332
|
+
`#!/bin/bash
|
|
1333
|
+
read line
|
|
1334
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1335
|
+
sleep 5
|
|
1336
|
+
`,
|
|
1337
|
+
{ mode: 0o755 },
|
|
1338
|
+
);
|
|
1339
|
+
|
|
1340
|
+
const result = await runRalphLoop({
|
|
1341
|
+
ralphPath,
|
|
1342
|
+
cwd: taskDir,
|
|
1343
|
+
timeout: 1,
|
|
1344
|
+
maxIterations: 1,
|
|
1345
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1346
|
+
spawnCommand: "bash",
|
|
1347
|
+
spawnArgs: [scriptPath],
|
|
1348
|
+
runCommandsFn: async () => [{ name: "tests", output: "command output" }],
|
|
1349
|
+
pi: makeMockPi(),
|
|
1350
|
+
});
|
|
1351
|
+
|
|
1352
|
+
assert.equal(result.status, "timeout");
|
|
1353
|
+
const transcriptsDir = join(taskDir, ".ralph-runner", "transcripts");
|
|
1354
|
+
const transcriptFiles = readdirSync(transcriptsDir).filter((file) => file.endsWith(".md"));
|
|
1355
|
+
assert.equal(transcriptFiles.length, 1);
|
|
1356
|
+
const transcript = readFileSync(join(transcriptsDir, transcriptFiles[0]), "utf8");
|
|
1357
|
+
assert.ok(transcript.includes("Status: timeout"));
|
|
1358
|
+
assert.ok(transcript.toLowerCase().includes("timed out"));
|
|
1359
|
+
assert.ok(transcript.includes("Task: Timeout transcript case"));
|
|
1360
|
+
} finally {
|
|
1361
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1362
|
+
}
|
|
1363
|
+
});
|
|
1364
|
+
|
|
1365
|
+
test("runRalphLoop preserves transcript files across reruns in the same task dir", async () => {
|
|
1366
|
+
const taskDir = createTempDir();
|
|
1367
|
+
try {
|
|
1368
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 1 }).replace("Task: Do something", "Task: Rerun transcript case"));
|
|
1369
|
+
|
|
1370
|
+
const scriptPath = join(taskDir, "mock-pi-rerun.sh");
|
|
1371
|
+
writeFileSync(
|
|
1372
|
+
scriptPath,
|
|
1373
|
+
`#!/bin/bash
|
|
1374
|
+
read line
|
|
1375
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1376
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"all done"}]}]}'
|
|
1377
|
+
`,
|
|
1378
|
+
{ mode: 0o755 },
|
|
1379
|
+
);
|
|
1380
|
+
|
|
1381
|
+
const firstRun = await runRalphLoop({
|
|
1382
|
+
ralphPath,
|
|
1383
|
+
cwd: taskDir,
|
|
1384
|
+
timeout: 5,
|
|
1385
|
+
maxIterations: 1,
|
|
1386
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1387
|
+
spawnCommand: "bash",
|
|
1388
|
+
spawnArgs: [scriptPath],
|
|
1389
|
+
runCommandsFn: async () => [{ name: "tests", output: "command output" }],
|
|
1390
|
+
pi: makeMockPi(),
|
|
1391
|
+
});
|
|
1392
|
+
|
|
1393
|
+
const secondRun = await runRalphLoop({
|
|
1394
|
+
ralphPath,
|
|
1395
|
+
cwd: taskDir,
|
|
1396
|
+
timeout: 5,
|
|
1397
|
+
maxIterations: 1,
|
|
1398
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1399
|
+
spawnCommand: "bash",
|
|
1400
|
+
spawnArgs: [scriptPath],
|
|
1401
|
+
runCommandsFn: async () => [{ name: "tests", output: "command output" }],
|
|
1402
|
+
pi: makeMockPi(),
|
|
1403
|
+
});
|
|
1404
|
+
|
|
1405
|
+
assert.ok(firstRun.iterations[0]?.loopToken);
|
|
1406
|
+
assert.ok(secondRun.iterations[0]?.loopToken);
|
|
1407
|
+
assert.notEqual(firstRun.iterations[0]?.loopToken, secondRun.iterations[0]?.loopToken);
|
|
1408
|
+
|
|
1409
|
+
const transcriptsDir = join(taskDir, ".ralph-runner", "transcripts");
|
|
1410
|
+
const transcriptFiles = readdirSync(transcriptsDir).filter((file) => file.endsWith(".md"));
|
|
1411
|
+
assert.equal(transcriptFiles.length, 2);
|
|
1412
|
+
|
|
1413
|
+
const firstTranscript = transcriptFiles.find((file) => file.includes(firstRun.iterations[0]!.loopToken!));
|
|
1414
|
+
const secondTranscript = transcriptFiles.find((file) => file.includes(secondRun.iterations[0]!.loopToken!));
|
|
1415
|
+
assert.ok(firstTranscript);
|
|
1416
|
+
assert.ok(secondTranscript);
|
|
1417
|
+
|
|
1418
|
+
const firstRaw = readFileSync(join(transcriptsDir, firstTranscript!), "utf8");
|
|
1419
|
+
const secondRaw = readFileSync(join(transcriptsDir, secondTranscript!), "utf8");
|
|
1420
|
+
assert.ok(firstRaw.includes("Task: Rerun transcript case"));
|
|
1421
|
+
assert.ok(secondRaw.includes("Task: Rerun transcript case"));
|
|
1422
|
+
assert.ok(firstRaw.includes("all done"));
|
|
1423
|
+
assert.ok(secondRaw.includes("all done"));
|
|
1424
|
+
} finally {
|
|
1425
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1426
|
+
}
|
|
1427
|
+
});
|
|
1428
|
+
|
|
1429
|
+
test("runRalphLoop stops on error when stopOnError is true (default)", async () => {
|
|
1430
|
+
const taskDir = createTempDir();
|
|
1431
|
+
try {
|
|
1432
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3 }));
|
|
1433
|
+
|
|
1434
|
+
const scriptPath = join(taskDir, "failing-pi.sh");
|
|
1435
|
+
writeFileSync(
|
|
1436
|
+
scriptPath,
|
|
1437
|
+
`#!/bin/bash
|
|
1438
|
+
read line
|
|
1439
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1440
|
+
exit 1
|
|
1441
|
+
`,
|
|
1442
|
+
{ mode: 0o755 },
|
|
1443
|
+
);
|
|
1444
|
+
|
|
1445
|
+
const result = await runRalphLoop({
|
|
1446
|
+
ralphPath,
|
|
1447
|
+
cwd: taskDir,
|
|
1448
|
+
timeout: 5,
|
|
1449
|
+
maxIterations: 3,
|
|
1450
|
+
stopOnError: true,
|
|
1451
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1452
|
+
spawnCommand: "bash",
|
|
1453
|
+
spawnArgs: [scriptPath],
|
|
1454
|
+
runCommandsFn: async () => [],
|
|
1455
|
+
pi: makeMockPi(),
|
|
1456
|
+
});
|
|
1457
|
+
|
|
1458
|
+
assert.equal(result.status, "error");
|
|
1459
|
+
assert.equal(result.iterations.length, 1);
|
|
1460
|
+
} finally {
|
|
1461
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1462
|
+
}
|
|
1463
|
+
});
|
|
1464
|
+
|
|
1465
|
+
test("runRalphLoop continues past error when stopOnError is false", async () => {
|
|
1466
|
+
const taskDir = createTempDir();
|
|
1467
|
+
try {
|
|
1468
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3 }));
|
|
1469
|
+
|
|
1470
|
+
const scriptPath = join(taskDir, "maybe-fail-pi.sh");
|
|
1471
|
+
writeFileSync(
|
|
1472
|
+
scriptPath,
|
|
1473
|
+
`#!/bin/bash
|
|
1474
|
+
read line
|
|
1475
|
+
COUNTER_FILE="${taskDir}/.call-counter"
|
|
1476
|
+
COUNT=0
|
|
1477
|
+
if [ -f "$COUNTER_FILE" ]; then
|
|
1478
|
+
COUNT=$(cat "$COUNTER_FILE")
|
|
1479
|
+
fi
|
|
1480
|
+
COUNT=$((COUNT + 1))
|
|
1481
|
+
echo "$COUNT" > "$COUNTER_FILE"
|
|
1482
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1483
|
+
if [ "$COUNT" -le 1 ]; then
|
|
1484
|
+
exit 1
|
|
1485
|
+
fi
|
|
1486
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
1487
|
+
`,
|
|
1488
|
+
{ mode: 0o755 },
|
|
1489
|
+
);
|
|
1490
|
+
|
|
1491
|
+
const result = await runRalphLoop({
|
|
1492
|
+
ralphPath,
|
|
1493
|
+
cwd: taskDir,
|
|
1494
|
+
timeout: 5,
|
|
1495
|
+
maxIterations: 3,
|
|
1496
|
+
stopOnError: false,
|
|
1497
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1498
|
+
spawnCommand: "bash",
|
|
1499
|
+
spawnArgs: [scriptPath],
|
|
1500
|
+
runCommandsFn: async () => [],
|
|
1501
|
+
pi: makeMockPi(),
|
|
1502
|
+
});
|
|
1503
|
+
|
|
1504
|
+
assert.ok(result.iterations.length > 1, `Expected >1 iteration, got ${result.iterations.length}`);
|
|
1505
|
+
assert.equal(result.iterations[0].status, "error");
|
|
1506
|
+
} finally {
|
|
1507
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1508
|
+
}
|
|
1509
|
+
});
|
|
1510
|
+
|
|
1511
|
+
test("runRalphLoop breaks on structural failure even with stopOnError false", async () => {
|
|
1512
|
+
const taskDir = createTempDir();
|
|
1513
|
+
try {
|
|
1514
|
+
const ralphPath = writeRalphMd(taskDir, minimalRalphMd({ max_iterations: 3, stop_on_error: false }));
|
|
1515
|
+
|
|
1516
|
+
const scriptPath = join(taskDir, "mock-pi.sh");
|
|
1517
|
+
writeFileSync(
|
|
1518
|
+
scriptPath,
|
|
1519
|
+
`#!/bin/bash
|
|
1520
|
+
read line
|
|
1521
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
1522
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"delete ralph"}]}]}'
|
|
1523
|
+
`,
|
|
1524
|
+
{ mode: 0o755 },
|
|
1525
|
+
);
|
|
1526
|
+
|
|
1527
|
+
let iterationCount = 0;
|
|
1528
|
+
const result = await runRalphLoop({
|
|
1529
|
+
ralphPath,
|
|
1530
|
+
cwd: taskDir,
|
|
1531
|
+
timeout: 5,
|
|
1532
|
+
maxIterations: 3,
|
|
1533
|
+
stopOnError: false,
|
|
1534
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
1535
|
+
spawnCommand: "bash",
|
|
1536
|
+
spawnArgs: [scriptPath],
|
|
1537
|
+
onIterationComplete() {
|
|
1538
|
+
iterationCount++;
|
|
1539
|
+
if (iterationCount >= 1) {
|
|
1540
|
+
rmSync(ralphPath, { force: true });
|
|
1541
|
+
}
|
|
1542
|
+
},
|
|
1543
|
+
runCommandsFn: async () => [],
|
|
1544
|
+
pi: makeMockPi(),
|
|
1545
|
+
});
|
|
1546
|
+
|
|
1547
|
+
assert.equal(result.status, "error");
|
|
1548
|
+
} finally {
|
|
1549
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
1550
|
+
}
|
|
1551
|
+
});
|
|
1552
|
+
|