@lnilluv/pi-ralph-loop 0.2.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +5 -2
- package/.github/workflows/release.yml +15 -43
- package/README.md +51 -113
- package/package.json +13 -5
- package/scripts/version-helper.ts +210 -0
- package/src/index.ts +1360 -275
- package/src/ralph-draft-context.ts +618 -0
- package/src/ralph-draft-llm.ts +297 -0
- package/src/ralph-draft.ts +33 -0
- package/src/ralph.ts +1457 -0
- package/src/runner-rpc.ts +434 -0
- package/src/runner-state.ts +822 -0
- package/src/runner.ts +957 -0
- package/src/secret-paths.ts +66 -0
- package/src/shims.d.ts +0 -3
- package/tests/fixtures/parity/migrate/OPEN_QUESTIONS.md +3 -0
- package/tests/fixtures/parity/migrate/RALPH.md +27 -0
- package/tests/fixtures/parity/migrate/golden/MIGRATED.md +15 -0
- package/tests/fixtures/parity/migrate/legacy/source.md +6 -0
- package/tests/fixtures/parity/migrate/legacy/source.yaml +3 -0
- package/tests/fixtures/parity/migrate/scripts/show-legacy.sh +10 -0
- package/tests/fixtures/parity/migrate/scripts/verify.sh +15 -0
- package/tests/fixtures/parity/research/OPEN_QUESTIONS.md +3 -0
- package/tests/fixtures/parity/research/RALPH.md +45 -0
- package/tests/fixtures/parity/research/claim-evidence-checklist.md +15 -0
- package/tests/fixtures/parity/research/expected-outputs.md +22 -0
- package/tests/fixtures/parity/research/scripts/show-snapshots.sh +13 -0
- package/tests/fixtures/parity/research/scripts/verify.sh +55 -0
- package/tests/fixtures/parity/research/snapshots/app-factory-ai-cli.md +11 -0
- package/tests/fixtures/parity/research/snapshots/docs-factory-ai-cli-features-missions.md +11 -0
- package/tests/fixtures/parity/research/snapshots/factory-ai-news-missions.md +11 -0
- package/tests/fixtures/parity/research/source-manifest.md +20 -0
- package/tests/index.test.ts +3529 -0
- package/tests/parity/README.md +9 -0
- package/tests/parity/harness.py +526 -0
- package/tests/parity-harness.test.ts +42 -0
- package/tests/parity-research-fixture.test.ts +34 -0
- package/tests/ralph-draft-context.test.ts +672 -0
- package/tests/ralph-draft-llm.test.ts +434 -0
- package/tests/ralph-draft.test.ts +168 -0
- package/tests/ralph.test.ts +1840 -0
- package/tests/runner-event-contract.test.ts +235 -0
- package/tests/runner-rpc.test.ts +358 -0
- package/tests/runner-state.test.ts +553 -0
- package/tests/runner.test.ts +1347 -0
- package/tests/secret-paths.test.ts +55 -0
- package/tests/version-helper.test.ts +75 -0
- package/tsconfig.json +3 -2
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import test from "node:test";
|
|
6
|
+
|
|
7
|
+
import { ensureRunnerDir, readRunnerEvents, type CompletionRecord, type ProgressState, type RunnerEvent, type RunnerStatus, type RunnerStatusFile } from "../src/runner-state.ts";
|
|
8
|
+
|
|
9
|
+
type Guardrails = RunnerStatusFile["guardrails"];
|
|
10
|
+
|
|
11
|
+
type ExpectedRunnerEvent =
|
|
12
|
+
| {
|
|
13
|
+
type: "runner.started";
|
|
14
|
+
timestamp: string;
|
|
15
|
+
loopToken: string;
|
|
16
|
+
cwd: string;
|
|
17
|
+
taskDir: string;
|
|
18
|
+
status: "initializing";
|
|
19
|
+
maxIterations: number;
|
|
20
|
+
timeout: number;
|
|
21
|
+
completionPromise?: string;
|
|
22
|
+
guardrails: Guardrails;
|
|
23
|
+
}
|
|
24
|
+
| {
|
|
25
|
+
type: "iteration.started";
|
|
26
|
+
timestamp: string;
|
|
27
|
+
iteration: number;
|
|
28
|
+
loopToken: string;
|
|
29
|
+
status: "running";
|
|
30
|
+
maxIterations: number;
|
|
31
|
+
timeout: number;
|
|
32
|
+
completionPromise?: string;
|
|
33
|
+
}
|
|
34
|
+
| {
|
|
35
|
+
type: "iteration.completed";
|
|
36
|
+
timestamp: string;
|
|
37
|
+
iteration: number;
|
|
38
|
+
loopToken: string;
|
|
39
|
+
status: "complete" | "timeout" | "error";
|
|
40
|
+
progress: ProgressState;
|
|
41
|
+
changedFiles: string[];
|
|
42
|
+
noProgressStreak: number;
|
|
43
|
+
completionPromiseMatched?: boolean;
|
|
44
|
+
completionGate?: { ready: boolean; reasons: string[] };
|
|
45
|
+
completion?: CompletionRecord;
|
|
46
|
+
snapshotTruncated?: boolean;
|
|
47
|
+
snapshotErrorCount?: number;
|
|
48
|
+
reason?: string;
|
|
49
|
+
}
|
|
50
|
+
| {
|
|
51
|
+
type: "durable.progress.observed";
|
|
52
|
+
timestamp: string;
|
|
53
|
+
iteration: number;
|
|
54
|
+
loopToken: string;
|
|
55
|
+
progress: true;
|
|
56
|
+
changedFiles: string[];
|
|
57
|
+
snapshotTruncated?: boolean;
|
|
58
|
+
snapshotErrorCount?: number;
|
|
59
|
+
}
|
|
60
|
+
| {
|
|
61
|
+
type: "durable.progress.missing";
|
|
62
|
+
timestamp: string;
|
|
63
|
+
iteration: number;
|
|
64
|
+
loopToken: string;
|
|
65
|
+
progress: false;
|
|
66
|
+
changedFiles: string[];
|
|
67
|
+
snapshotTruncated?: boolean;
|
|
68
|
+
snapshotErrorCount?: number;
|
|
69
|
+
}
|
|
70
|
+
| {
|
|
71
|
+
type: "durable.progress.unknown";
|
|
72
|
+
timestamp: string;
|
|
73
|
+
iteration: number;
|
|
74
|
+
loopToken: string;
|
|
75
|
+
progress: "unknown";
|
|
76
|
+
changedFiles: string[];
|
|
77
|
+
snapshotTruncated?: boolean;
|
|
78
|
+
snapshotErrorCount?: number;
|
|
79
|
+
}
|
|
80
|
+
| {
|
|
81
|
+
type: "completion_promise_seen";
|
|
82
|
+
timestamp: string;
|
|
83
|
+
iteration: number;
|
|
84
|
+
loopToken: string;
|
|
85
|
+
completionPromise: string;
|
|
86
|
+
}
|
|
87
|
+
| {
|
|
88
|
+
type: "completion.gate.checked";
|
|
89
|
+
timestamp: string;
|
|
90
|
+
iteration: number;
|
|
91
|
+
loopToken: string;
|
|
92
|
+
ready: boolean;
|
|
93
|
+
reasons: string[];
|
|
94
|
+
}
|
|
95
|
+
| {
|
|
96
|
+
type: "completion_gate_passed";
|
|
97
|
+
timestamp: string;
|
|
98
|
+
iteration: number;
|
|
99
|
+
loopToken: string;
|
|
100
|
+
ready: true;
|
|
101
|
+
reasons: string[];
|
|
102
|
+
}
|
|
103
|
+
| {
|
|
104
|
+
type: "completion_gate_blocked";
|
|
105
|
+
timestamp: string;
|
|
106
|
+
iteration: number;
|
|
107
|
+
loopToken: string;
|
|
108
|
+
ready: false;
|
|
109
|
+
reasons: string[];
|
|
110
|
+
}
|
|
111
|
+
| {
|
|
112
|
+
type: "runner.finished";
|
|
113
|
+
timestamp: string;
|
|
114
|
+
loopToken: string;
|
|
115
|
+
status: RunnerStatus;
|
|
116
|
+
iterations: number;
|
|
117
|
+
totalDurationMs: number;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
type Equal<Left, Right> =
|
|
121
|
+
(<T>() => T extends Left ? 1 : 2) extends
|
|
122
|
+
(<T>() => T extends Right ? 1 : 2)
|
|
123
|
+
? ((<T>() => T extends Right ? 1 : 2) extends (<T>() => T extends Left ? 1 : 2) ? true : false)
|
|
124
|
+
: false;
|
|
125
|
+
|
|
126
|
+
type Assert<T extends true> = T;
|
|
127
|
+
|
|
128
|
+
type _runnerEventContract = Assert<Equal<RunnerEvent, ExpectedRunnerEvent>>;
|
|
129
|
+
|
|
130
|
+
// Compile-time contract checks: these contradictory payloads must be rejected.
|
|
131
|
+
const invalidDurableProgressObservedEvent: Extract<ExpectedRunnerEvent, { type: "durable.progress.observed" }> = {
|
|
132
|
+
type: "durable.progress.observed",
|
|
133
|
+
timestamp: new Date("2026-04-13T12:00:01.000Z").toISOString(),
|
|
134
|
+
iteration: 1,
|
|
135
|
+
loopToken: "test-loop-token",
|
|
136
|
+
// @ts-expect-error durable.progress.observed requires progress: true
|
|
137
|
+
progress: false,
|
|
138
|
+
changedFiles: ["src/loop.ts"],
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
const invalidCompletionGatePassedEvent: Extract<ExpectedRunnerEvent, { type: "completion_gate_passed" }> = {
|
|
142
|
+
type: "completion_gate_passed",
|
|
143
|
+
timestamp: new Date("2026-04-13T12:00:02.000Z").toISOString(),
|
|
144
|
+
iteration: 1,
|
|
145
|
+
loopToken: "test-loop-token",
|
|
146
|
+
// @ts-expect-error completion_gate_passed requires ready: true
|
|
147
|
+
ready: false,
|
|
148
|
+
reasons: ["ready=false is contradictory"],
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const invalidCompletionGateBlockedEvent: Extract<ExpectedRunnerEvent, { type: "completion_gate_blocked" }> = {
|
|
152
|
+
type: "completion_gate_blocked",
|
|
153
|
+
timestamp: new Date("2026-04-13T12:00:03.000Z").toISOString(),
|
|
154
|
+
iteration: 1,
|
|
155
|
+
loopToken: "test-loop-token",
|
|
156
|
+
// @ts-expect-error completion_gate_blocked requires ready: false
|
|
157
|
+
ready: true,
|
|
158
|
+
reasons: ["ready=true is contradictory"],
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
function createTempDir(): string {
|
|
162
|
+
return mkdtempSync(join(tmpdir(), "pi-ralph-runner-event-contract-"));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function writeEventsFile(taskDir: string, events: unknown[]): void {
|
|
166
|
+
const runnerDir = ensureRunnerDir(taskDir);
|
|
167
|
+
const eventsFile = join(runnerDir, "events.jsonl");
|
|
168
|
+
writeFileSync(eventsFile, `${events.map((event) => JSON.stringify(event)).join("\n")}\n`, "utf8");
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function makeValidStartedEvent(taskDir: string): Extract<ExpectedRunnerEvent, { type: "runner.started" }> {
|
|
172
|
+
return {
|
|
173
|
+
type: "runner.started",
|
|
174
|
+
timestamp: new Date("2026-04-13T12:00:00.000Z").toISOString(),
|
|
175
|
+
loopToken: "test-loop-token",
|
|
176
|
+
cwd: taskDir,
|
|
177
|
+
taskDir,
|
|
178
|
+
status: "initializing",
|
|
179
|
+
maxIterations: 3,
|
|
180
|
+
timeout: 10,
|
|
181
|
+
completionPromise: "DONE",
|
|
182
|
+
guardrails: { blockCommands: [], protectedFiles: [] },
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const malformedRunnerEventCases = [
|
|
187
|
+
{
|
|
188
|
+
name: "durable.progress.observed with progress false",
|
|
189
|
+
event: {
|
|
190
|
+
type: "durable.progress.observed",
|
|
191
|
+
timestamp: new Date("2026-04-13T12:00:01.000Z").toISOString(),
|
|
192
|
+
iteration: 1,
|
|
193
|
+
loopToken: "test-loop-token",
|
|
194
|
+
progress: false,
|
|
195
|
+
changedFiles: ["src/loop.ts"],
|
|
196
|
+
},
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
name: "completion_gate_passed with ready false",
|
|
200
|
+
event: {
|
|
201
|
+
type: "completion_gate_passed",
|
|
202
|
+
timestamp: new Date("2026-04-13T12:00:02.000Z").toISOString(),
|
|
203
|
+
iteration: 1,
|
|
204
|
+
loopToken: "test-loop-token",
|
|
205
|
+
ready: false,
|
|
206
|
+
reasons: ["ready=false is contradictory"],
|
|
207
|
+
},
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
name: "completion_gate_blocked with ready true",
|
|
211
|
+
event: {
|
|
212
|
+
type: "completion_gate_blocked",
|
|
213
|
+
timestamp: new Date("2026-04-13T12:00:03.000Z").toISOString(),
|
|
214
|
+
iteration: 1,
|
|
215
|
+
loopToken: "test-loop-token",
|
|
216
|
+
ready: true,
|
|
217
|
+
reasons: ["ready=true is contradictory"],
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
] as const;
|
|
221
|
+
|
|
222
|
+
for (const { name, event } of malformedRunnerEventCases) {
|
|
223
|
+
test(`readRunnerEvents rejects ${name}`, () => {
|
|
224
|
+
const taskDir = createTempDir();
|
|
225
|
+
try {
|
|
226
|
+
const validEvent = makeValidStartedEvent(taskDir);
|
|
227
|
+
writeEventsFile(taskDir, [validEvent, event]);
|
|
228
|
+
|
|
229
|
+
const events = readRunnerEvents(taskDir);
|
|
230
|
+
assert.deepEqual(events, [validEvent]);
|
|
231
|
+
} finally {
|
|
232
|
+
rmSync(taskDir, { recursive: true, force: true });
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
}
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
6
|
+
import test from "node:test";
|
|
7
|
+
|
|
8
|
+
import { parseRpcEvent, runRpcIteration } from "../src/runner-rpc.ts";
|
|
9
|
+
|
|
10
|
+
// --- parseRpcEvent ---
|
|
11
|
+
|
|
12
|
+
test("parseRpcEvent parses agent_end events", () => {
|
|
13
|
+
const event = parseRpcEvent('{"type":"agent_end","messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"text","text":"done"}]}]}');
|
|
14
|
+
assert.equal(event.type, "agent_end");
|
|
15
|
+
assert.ok(Array.isArray(event.messages));
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test("parseRpcEvent returns unknown for unrecognized lines", () => {
|
|
19
|
+
const event = parseRpcEvent("not json at all");
|
|
20
|
+
assert.equal(event.type, "unknown");
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("parseRpcEvent returns unknown for lines without type", () => {
|
|
24
|
+
const event = parseRpcEvent('{"foo":"bar"}');
|
|
25
|
+
assert.equal(event.type, "unknown");
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("parseRpcEvent handles response events", () => {
|
|
29
|
+
const event = parseRpcEvent('{"type":"response","command":"prompt","success":true,"id":"req-1"}');
|
|
30
|
+
assert.equal(event.type, "response");
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test("parseRpcEvent handles message_update events with text deltas", () => {
|
|
34
|
+
const event = parseRpcEvent('{"type":"message_update","message":{"role":"assistant"},"assistantMessageEvent":{"type":"text_delta","delta":"Hello"}}');
|
|
35
|
+
assert.equal(event.type, "message_update");
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test("parseRpcEvent handles extension_ui_request events", () => {
|
|
39
|
+
const event = parseRpcEvent('{"type":"extension_ui_request","id":"ui-1","method":"notify","message":"test"}');
|
|
40
|
+
assert.equal(event.type, "extension_ui_request");
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// --- runRpcIteration with mock subprocess ---
|
|
44
|
+
|
|
45
|
+
async function writeMockScript(cwd: string, name: string, script: string): Promise<string> {
|
|
46
|
+
const path = join(cwd, name);
|
|
47
|
+
writeFileSync(path, script, { mode: 0o755 });
|
|
48
|
+
return path;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
test("runRpcIteration returns success when subprocess completes", async () => {
|
|
52
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
53
|
+
try {
|
|
54
|
+
const mockScript = await writeMockScript(cwd, "mock-pi.sh", `#!/bin/bash
|
|
55
|
+
read line
|
|
56
|
+
printf 'mock stderr\n' >&2
|
|
57
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
58
|
+
echo '{"type":"agent_start"}'
|
|
59
|
+
echo '{"type":"message_update","message":{"role":"assistant"},"assistantMessageEvent":{"type":"text_delta","delta":"done"}}'
|
|
60
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
61
|
+
`);
|
|
62
|
+
|
|
63
|
+
const result = await runRpcIteration({
|
|
64
|
+
prompt: "test prompt",
|
|
65
|
+
cwd,
|
|
66
|
+
timeoutMs: 5000,
|
|
67
|
+
spawnCommand: "bash",
|
|
68
|
+
spawnArgs: [mockScript],
|
|
69
|
+
});
|
|
70
|
+
assert.equal(result.success, true);
|
|
71
|
+
assert.equal(result.timedOut, false);
|
|
72
|
+
assert.equal(result.lastAssistantText, "done");
|
|
73
|
+
assert.equal(result.agentEndMessages.length, 1);
|
|
74
|
+
assert.equal(result.error, undefined);
|
|
75
|
+
assert.ok(result.telemetry.spawnedAt.length > 0);
|
|
76
|
+
assert.ok(result.telemetry.promptSentAt);
|
|
77
|
+
assert.ok(result.telemetry.firstStdoutEventAt);
|
|
78
|
+
assert.ok(result.telemetry.lastEventAt);
|
|
79
|
+
assert.equal(result.telemetry.lastEventType, "agent_end");
|
|
80
|
+
assert.ok(result.telemetry.exitedAt);
|
|
81
|
+
assert.equal(result.telemetry.timedOutAt, undefined);
|
|
82
|
+
assert.match(result.telemetry.stderrText ?? "", /mock stderr/);
|
|
83
|
+
} finally {
|
|
84
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
test("runRpcIteration captures close telemetry after agent_end", async () => {
|
|
89
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
90
|
+
try {
|
|
91
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-close.sh", `#!/bin/bash
|
|
92
|
+
read line
|
|
93
|
+
printf 'mock stderr\n' >&2
|
|
94
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
95
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
96
|
+
sleep 0.2
|
|
97
|
+
`);
|
|
98
|
+
|
|
99
|
+
const result = await runRpcIteration({
|
|
100
|
+
prompt: "test prompt",
|
|
101
|
+
cwd,
|
|
102
|
+
timeoutMs: 5000,
|
|
103
|
+
spawnCommand: "bash",
|
|
104
|
+
spawnArgs: [mockScript],
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
assert.equal(result.success, true);
|
|
108
|
+
assert.equal(result.timedOut, false);
|
|
109
|
+
assert.equal(result.lastAssistantText, "done");
|
|
110
|
+
assert.ok(result.telemetry.exitedAt);
|
|
111
|
+
assert.equal(result.telemetry.exitCode, 0);
|
|
112
|
+
assert.equal(result.telemetry.exitSignal, null);
|
|
113
|
+
assert.equal(result.telemetry.error, undefined);
|
|
114
|
+
} finally {
|
|
115
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test("runRpcIteration records close-derived failure telemetry errors", async () => {
|
|
120
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
121
|
+
try {
|
|
122
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-close-failure.sh", `#!/bin/bash
|
|
123
|
+
read line
|
|
124
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
125
|
+
exit 7
|
|
126
|
+
`);
|
|
127
|
+
|
|
128
|
+
const result = await runRpcIteration({
|
|
129
|
+
prompt: "test prompt",
|
|
130
|
+
cwd,
|
|
131
|
+
timeoutMs: 5000,
|
|
132
|
+
spawnCommand: "bash",
|
|
133
|
+
spawnArgs: [mockScript],
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
assert.equal(result.success, false);
|
|
137
|
+
assert.equal(result.timedOut, false);
|
|
138
|
+
assert.ok(result.telemetry.exitedAt);
|
|
139
|
+
assert.equal(result.telemetry.exitCode, 7);
|
|
140
|
+
assert.equal(result.telemetry.exitSignal, null);
|
|
141
|
+
assert.match(result.telemetry.error ?? "", /Subprocess exited with code 7/);
|
|
142
|
+
} finally {
|
|
143
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
test("runRpcIteration closes stdin after agent_end so the subprocess can exit", async () => {
|
|
148
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
149
|
+
try {
|
|
150
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-wait-for-stdin-close.sh", `#!/bin/bash
|
|
151
|
+
read line
|
|
152
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
153
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
154
|
+
cat >/dev/null
|
|
155
|
+
`);
|
|
156
|
+
|
|
157
|
+
const result = await runRpcIteration({
|
|
158
|
+
prompt: "test prompt",
|
|
159
|
+
cwd,
|
|
160
|
+
timeoutMs: 5000,
|
|
161
|
+
spawnCommand: "bash",
|
|
162
|
+
spawnArgs: [mockScript],
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
assert.equal(result.success, true);
|
|
166
|
+
assert.equal(result.timedOut, false);
|
|
167
|
+
assert.equal(result.lastAssistantText, "done");
|
|
168
|
+
assert.equal(result.telemetry.exitCode, 0);
|
|
169
|
+
assert.equal(result.telemetry.exitSignal, null);
|
|
170
|
+
assert.ok(result.telemetry.exitedAt);
|
|
171
|
+
} finally {
|
|
172
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
test("runRpcIteration records timeout telemetry when subprocess takes too long", async () => {
|
|
177
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
178
|
+
try {
|
|
179
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-slow.sh", `#!/bin/bash
|
|
180
|
+
read line
|
|
181
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
182
|
+
sleep 30
|
|
183
|
+
`);
|
|
184
|
+
|
|
185
|
+
const result = await runRpcIteration({
|
|
186
|
+
prompt: "test prompt",
|
|
187
|
+
cwd,
|
|
188
|
+
timeoutMs: 500,
|
|
189
|
+
spawnCommand: "bash",
|
|
190
|
+
spawnArgs: [mockScript],
|
|
191
|
+
});
|
|
192
|
+
assert.equal(result.success, false);
|
|
193
|
+
assert.equal(result.timedOut, true);
|
|
194
|
+
assert.ok(result.telemetry.spawnedAt.length > 0);
|
|
195
|
+
assert.ok(result.telemetry.promptSentAt);
|
|
196
|
+
assert.ok(result.telemetry.firstStdoutEventAt);
|
|
197
|
+
assert.ok(result.telemetry.lastEventAt);
|
|
198
|
+
assert.equal(result.telemetry.lastEventType, "response");
|
|
199
|
+
assert.ok(result.telemetry.timedOutAt);
|
|
200
|
+
assert.equal(result.telemetry.exitedAt, undefined);
|
|
201
|
+
} finally {
|
|
202
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
test("runRpcIteration returns error when subprocess fails to start", async () => {
|
|
207
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
208
|
+
try {
|
|
209
|
+
const result = await runRpcIteration({
|
|
210
|
+
prompt: "test prompt",
|
|
211
|
+
cwd,
|
|
212
|
+
timeoutMs: 5000,
|
|
213
|
+
spawnCommand: "/nonexistent/command/that/does/not/exist",
|
|
214
|
+
spawnArgs: [],
|
|
215
|
+
});
|
|
216
|
+
assert.equal(result.success, false);
|
|
217
|
+
assert.equal(result.timedOut, false);
|
|
218
|
+
assert.ok(result.error);
|
|
219
|
+
assert.ok(result.error.length > 0);
|
|
220
|
+
} finally {
|
|
221
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test("runRpcIteration collects completion promise text from agent_end", async () => {
|
|
226
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
227
|
+
try {
|
|
228
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-promise.sh", `#!/bin/bash
|
|
229
|
+
read line
|
|
230
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
231
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"I am done. <promise>DONE</promise> Please review."}]}]}'
|
|
232
|
+
`);
|
|
233
|
+
|
|
234
|
+
const result = await runRpcIteration({
|
|
235
|
+
prompt: "test prompt",
|
|
236
|
+
cwd,
|
|
237
|
+
timeoutMs: 5000,
|
|
238
|
+
spawnCommand: "bash",
|
|
239
|
+
spawnArgs: [mockScript],
|
|
240
|
+
});
|
|
241
|
+
assert.equal(result.success, true);
|
|
242
|
+
assert.equal(result.lastAssistantText, "I am done. <promise>DONE</promise> Please review.");
|
|
243
|
+
} finally {
|
|
244
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
245
|
+
}
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
test("runRpcIteration handles empty agent_end messages gracefully", async () => {
|
|
249
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
250
|
+
try {
|
|
251
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-empty.sh", `#!/bin/bash
|
|
252
|
+
read line
|
|
253
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
254
|
+
echo '{"type":"agent_end","messages":[]}'
|
|
255
|
+
`);
|
|
256
|
+
|
|
257
|
+
const result = await runRpcIteration({
|
|
258
|
+
prompt: "test prompt",
|
|
259
|
+
cwd,
|
|
260
|
+
timeoutMs: 5000,
|
|
261
|
+
spawnCommand: "bash",
|
|
262
|
+
spawnArgs: [mockScript],
|
|
263
|
+
});
|
|
264
|
+
assert.equal(result.success, true);
|
|
265
|
+
assert.equal(result.lastAssistantText, "");
|
|
266
|
+
assert.equal(result.agentEndMessages.length, 0);
|
|
267
|
+
} finally {
|
|
268
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
test("runRpcIteration passes explicit extension loading and task-dir env into the subprocess", async () => {
|
|
273
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
274
|
+
try {
|
|
275
|
+
const taskDir = join(cwd, "task-dir");
|
|
276
|
+
const argsFile = join(cwd, "args.txt");
|
|
277
|
+
const envFile = join(cwd, "env.txt");
|
|
278
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-capture.sh", `#!/bin/bash
|
|
279
|
+
printf '%s\n' "$@" > "${argsFile}"
|
|
280
|
+
printf 'taskDir=%s\n' "\${RALPH_RUNNER_TASK_DIR}" > "${envFile}"
|
|
281
|
+
printf 'cwd=%s\n' "\${RALPH_RUNNER_CWD}" >> "${envFile}"
|
|
282
|
+
printf 'loopToken=%s\n' "\${RALPH_RUNNER_LOOP_TOKEN}" >> "${envFile}"
|
|
283
|
+
printf 'currentIteration=%s\n' "\${RALPH_RUNNER_CURRENT_ITERATION}" >> "${envFile}"
|
|
284
|
+
printf 'maxIterations=%s\n' "\${RALPH_RUNNER_MAX_ITERATIONS}" >> "${envFile}"
|
|
285
|
+
printf 'noProgressStreak=%s\n' "\${RALPH_RUNNER_NO_PROGRESS_STREAK}" >> "${envFile}"
|
|
286
|
+
printf 'guardrails=%s\n' "\${RALPH_RUNNER_GUARDRAILS}" >> "${envFile}"
|
|
287
|
+
read line
|
|
288
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
289
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"done"}]}]}'
|
|
290
|
+
`);
|
|
291
|
+
|
|
292
|
+
const guardrails = { blockCommands: ["git\\s+push"], protectedFiles: ["src/generated/**"] };
|
|
293
|
+
const result = await runRpcIteration({
|
|
294
|
+
prompt: "test prompt",
|
|
295
|
+
cwd,
|
|
296
|
+
timeoutMs: 5000,
|
|
297
|
+
spawnCommand: mockScript,
|
|
298
|
+
env: {
|
|
299
|
+
RALPH_RUNNER_TASK_DIR: taskDir,
|
|
300
|
+
RALPH_RUNNER_CWD: cwd,
|
|
301
|
+
RALPH_RUNNER_LOOP_TOKEN: "test-loop-token",
|
|
302
|
+
RALPH_RUNNER_CURRENT_ITERATION: "2",
|
|
303
|
+
RALPH_RUNNER_MAX_ITERATIONS: "5",
|
|
304
|
+
RALPH_RUNNER_NO_PROGRESS_STREAK: "1",
|
|
305
|
+
RALPH_RUNNER_GUARDRAILS: JSON.stringify(guardrails),
|
|
306
|
+
},
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
assert.equal(result.success, true);
|
|
310
|
+
assert.deepEqual(readFileSync(argsFile, "utf8").trim().split("\n"), [
|
|
311
|
+
"--mode",
|
|
312
|
+
"rpc",
|
|
313
|
+
"--no-session",
|
|
314
|
+
"-e",
|
|
315
|
+
fileURLToPath(new URL("../src/index.ts", import.meta.url)),
|
|
316
|
+
]);
|
|
317
|
+
assert.deepEqual(readFileSync(envFile, "utf8").trim().split("\n"), [
|
|
318
|
+
`taskDir=${taskDir}`,
|
|
319
|
+
`cwd=${cwd}`,
|
|
320
|
+
`loopToken=test-loop-token`,
|
|
321
|
+
`currentIteration=2`,
|
|
322
|
+
`maxIterations=5`,
|
|
323
|
+
`noProgressStreak=1`,
|
|
324
|
+
`guardrails=${JSON.stringify(guardrails)}`,
|
|
325
|
+
]);
|
|
326
|
+
} finally {
|
|
327
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
test("runRpcIteration calls onEvent callback for streamed events", async () => {
|
|
332
|
+
const cwd = mkdtempSync(join(tmpdir(), "pi-ralph-rpc-"));
|
|
333
|
+
try {
|
|
334
|
+
const mockScript = await writeMockScript(cwd, "mock-pi-events.sh", `#!/bin/bash
|
|
335
|
+
read line
|
|
336
|
+
echo '{"type":"response","command":"prompt","success":true}'
|
|
337
|
+
echo '{"type":"agent_start"}'
|
|
338
|
+
echo '{"type":"agent_end","messages":[{"role":"assistant","content":[{"type":"text","text":"hello"}]}]}'
|
|
339
|
+
`);
|
|
340
|
+
|
|
341
|
+
const events: string[] = [];
|
|
342
|
+
const result = await runRpcIteration({
|
|
343
|
+
prompt: "test prompt",
|
|
344
|
+
cwd,
|
|
345
|
+
timeoutMs: 5000,
|
|
346
|
+
spawnCommand: "bash",
|
|
347
|
+
spawnArgs: [mockScript],
|
|
348
|
+
onEvent(event) {
|
|
349
|
+
events.push(event.type);
|
|
350
|
+
},
|
|
351
|
+
});
|
|
352
|
+
assert.equal(result.success, true);
|
|
353
|
+
assert.ok(events.includes("agent_start"));
|
|
354
|
+
assert.ok(events.includes("agent_end"));
|
|
355
|
+
} finally {
|
|
356
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
357
|
+
}
|
|
358
|
+
});
|