@lnilluv/pi-ralph-loop 0.1.3 → 0.1.4-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.github/workflows/ci.yml +5 -2
  2. package/.github/workflows/release.yml +7 -4
  3. package/README.md +151 -15
  4. package/package.json +13 -4
  5. package/src/index.ts +1419 -176
  6. package/src/ralph-draft-context.ts +618 -0
  7. package/src/ralph-draft-llm.ts +297 -0
  8. package/src/ralph-draft.ts +33 -0
  9. package/src/ralph.ts +1457 -0
  10. package/src/runner-rpc.ts +434 -0
  11. package/src/runner-state.ts +822 -0
  12. package/src/runner.ts +957 -0
  13. package/src/secret-paths.ts +66 -0
  14. package/src/shims.d.ts +23 -0
  15. package/tests/fixtures/parity/migrate/OPEN_QUESTIONS.md +3 -0
  16. package/tests/fixtures/parity/migrate/RALPH.md +27 -0
  17. package/tests/fixtures/parity/migrate/golden/MIGRATED.md +15 -0
  18. package/tests/fixtures/parity/migrate/legacy/source.md +6 -0
  19. package/tests/fixtures/parity/migrate/legacy/source.yaml +3 -0
  20. package/tests/fixtures/parity/migrate/scripts/show-legacy.sh +10 -0
  21. package/tests/fixtures/parity/migrate/scripts/verify.sh +15 -0
  22. package/tests/fixtures/parity/research/OPEN_QUESTIONS.md +3 -0
  23. package/tests/fixtures/parity/research/RALPH.md +45 -0
  24. package/tests/fixtures/parity/research/claim-evidence-checklist.md +15 -0
  25. package/tests/fixtures/parity/research/expected-outputs.md +22 -0
  26. package/tests/fixtures/parity/research/scripts/show-snapshots.sh +13 -0
  27. package/tests/fixtures/parity/research/scripts/verify.sh +55 -0
  28. package/tests/fixtures/parity/research/snapshots/app-factory-ai-cli.md +11 -0
  29. package/tests/fixtures/parity/research/snapshots/docs-factory-ai-cli-features-missions.md +11 -0
  30. package/tests/fixtures/parity/research/snapshots/factory-ai-news-missions.md +11 -0
  31. package/tests/fixtures/parity/research/source-manifest.md +20 -0
  32. package/tests/index.test.ts +3529 -0
  33. package/tests/parity/README.md +9 -0
  34. package/tests/parity/harness.py +526 -0
  35. package/tests/parity-harness.test.ts +42 -0
  36. package/tests/parity-research-fixture.test.ts +34 -0
  37. package/tests/ralph-draft-context.test.ts +672 -0
  38. package/tests/ralph-draft-llm.test.ts +434 -0
  39. package/tests/ralph-draft.test.ts +168 -0
  40. package/tests/ralph.test.ts +1840 -0
  41. package/tests/runner-event-contract.test.ts +235 -0
  42. package/tests/runner-rpc.test.ts +358 -0
  43. package/tests/runner-state.test.ts +553 -0
  44. package/tests/runner.test.ts +1347 -0
  45. package/tests/secret-paths.test.ts +55 -0
  46. package/tsconfig.json +3 -2
@@ -0,0 +1,3529 @@
1
+ import assert from "node:assert/strict";
2
+ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { dirname, join } from "node:path";
5
+ import test from "node:test";
6
+ import registerRalphCommands, { runCommands } from "../src/index.ts";
7
+ import { SECRET_PATH_POLICY_TOKEN } from "../src/secret-paths.ts";
8
+ import { generateDraft, parseRalphMarkdown, slugifyTask, validateFrontmatter, type DraftPlan, type DraftTarget } from "../src/ralph.ts";
9
+ import type { StrengthenDraftRuntime } from "../src/ralph-draft-llm.ts";
10
+ import type { RunnerConfig, RunnerResult } from "../src/runner.ts";
11
+ import { runRalphLoop as realRunRalphLoop, captureTaskDirectorySnapshot, assessTaskDirectoryProgress, summarizeChangedFiles } from "../src/runner.ts";
12
+ import {
13
+ appendIterationRecord,
14
+ listActiveLoopRegistryEntries,
15
+ readActiveLoopRegistry,
16
+ recordActiveLoopStopRequest,
17
+ writeActiveLoopRegistryEntry,
18
+ writeStatusFile,
19
+ type ActiveLoopRegistryEntry,
20
+ type IterationRecord,
21
+ type RunnerStatusFile,
22
+ } from "../src/runner-state.ts";
23
+
24
+ function createTempDir(): string {
25
+ return mkdtempSync(join(tmpdir(), "pi-ralph-loop-index-"));
26
+ }
27
+
28
+ function setRunnerEnv(values: Record<string, string>): () => void {
29
+ const previous = new Map<string, string | undefined>();
30
+ for (const [key, value] of Object.entries(values)) {
31
+ previous.set(key, process.env[key]);
32
+ process.env[key] = value;
33
+ }
34
+ return () => {
35
+ for (const [key, value] of previous.entries()) {
36
+ if (value === undefined) {
37
+ delete process.env[key];
38
+ } else {
39
+ process.env[key] = value;
40
+ }
41
+ }
42
+ };
43
+ }
44
+
45
+ function createTarget(cwd: string, task: string): DraftTarget {
46
+ const slug = slugifyTask(task);
47
+ return {
48
+ slug,
49
+ dirPath: join(cwd, slug),
50
+ ralphPath: join(cwd, slug, "RALPH.md"),
51
+ };
52
+ }
53
+
54
+ function makeDraftPlan(task: string, target: DraftTarget, source: DraftPlan["source"], cwd: string): DraftPlan {
55
+ const base = generateDraft(task, target, {
56
+ packageManager: "npm",
57
+ testCommand: "npm test",
58
+ lintCommand: "npm run lint",
59
+ hasGit: true,
60
+ topLevelDirs: ["src", "tests"],
61
+ topLevelFiles: ["package.json"],
62
+ });
63
+
64
+ return {
65
+ ...base,
66
+ source,
67
+ target,
68
+ content: base.content,
69
+ };
70
+ }
71
+
72
+ function createHarness(options?: {
73
+ createDraftPlan?: (...args: Array<any>) => Promise<DraftPlan>;
74
+ exec?: (...args: Array<any>) => Promise<any>;
75
+ sendUserMessage?: (...args: Array<any>) => any;
76
+ appendEntry?: (customType: string, data: unknown) => void;
77
+ runRalphLoopFn?: (config: RunnerConfig) => Promise<RunnerResult>;
78
+ }) {
79
+ const handlers = new Map<string, (args: string, ctx: any) => Promise<string | undefined>>();
80
+ const eventHandlers = new Map<string, (...args: Array<any>) => Promise<any> | any>();
81
+ const appendedEntries: Array<any> = [];
82
+ const observedTaskDirPaths = new Set<string>();
83
+ let activeCtx: any;
84
+ const resolveRuntimeCtx = () => activeCtx?.getRuntimeCtx?.() ?? activeCtx;
85
+ const appendSessionEntry = (entry: any) => {
86
+ const currentCtx = resolveRuntimeCtx();
87
+ if (typeof currentCtx?.appendSessionEntry === "function") {
88
+ currentCtx.appendSessionEntry(entry);
89
+ return;
90
+ }
91
+ appendedEntries.push(entry);
92
+ };
93
+ const sendUserMessage = async (message: string, sendOptions?: { deliverAs?: string }) => {
94
+ const currentCtx = resolveRuntimeCtx();
95
+ const entriesBefore = currentCtx?.sessionManager?.getEntries?.().length ?? 0;
96
+ await options?.sendUserMessage?.(message, sendOptions);
97
+ if (currentCtx?.suppressAutoAgentEnd) return;
98
+ await currentCtx?.waitForIdle?.();
99
+ const agentEnd = eventHandlers.get("agent_end");
100
+ if (!agentEnd || !currentCtx) return;
101
+ const messages = Array.isArray(currentCtx.agentEndMessages)
102
+ ? currentCtx.agentEndMessages
103
+ : currentCtx.sessionManager?.getEntries?.().slice(entriesBefore) ?? [];
104
+ await agentEnd({ messages }, currentCtx);
105
+ };
106
+ const pi = {
107
+ on: (eventName: string, handler: (...args: Array<any>) => Promise<any> | any) => {
108
+ eventHandlers.set(eventName, handler);
109
+ },
110
+ registerCommand: (name: string, spec: { handler: (args: string, ctx: any) => Promise<string | undefined> }) => {
111
+ handlers.set(name, spec.handler);
112
+ },
113
+ appendEntry: (customType: string, data: unknown) => {
114
+ appendSessionEntry({ type: "custom", customType, data });
115
+ options?.appendEntry?.(customType, data);
116
+ },
117
+ sendUserMessage,
118
+ exec:
119
+ options?.exec ??
120
+ (async () => ({
121
+ killed: false,
122
+ stdout: "",
123
+ stderr: "",
124
+ })),
125
+ } as any;
126
+
127
+ // Default mock runner that simulates iterations using the test context's
128
+ // waitForIdle and directory snapshot detection
129
+ const defaultRunLoopFn = async (config: RunnerConfig): Promise<RunnerResult> => {
130
+ const { ralphPath, cwd, maxIterations, onIterationStart, onIterationComplete, onStatusChange, onNotify, runCommandsFn, pi } = config;
131
+ const iterations: IterationRecord[] = [];
132
+ let noProgressStreak = 0;
133
+ let finalStatus: RunnerResult["status"] = "max-iterations";
134
+
135
+ onStatusChange?.("running");
136
+
137
+ for (let i = 1; i <= maxIterations; i++) {
138
+ const iterStart = Date.now();
139
+ onIterationStart?.(i, maxIterations);
140
+
141
+ const raw = readFileSync(ralphPath, "utf8");
142
+ const parsed = parseRalphMarkdown(raw);
143
+ const draftError = validateFrontmatter(parsed.frontmatter);
144
+ if (draftError) {
145
+ onNotify?.(`Invalid RALPH.md on iteration ${i}: ${draftError}`, "error");
146
+ finalStatus = "error";
147
+ break;
148
+ }
149
+
150
+ const fm = parsed.frontmatter;
151
+ const currentTimeout = fm.timeout;
152
+ const currentCompletionPromise = fm.completionPromise;
153
+ const currentGuardrails = {
154
+ blockCommands: fm.guardrails.blockCommands,
155
+ protectedFiles: fm.guardrails.protectedFiles,
156
+ };
157
+
158
+ const { cancelled } = await (activeCtx?.newSession?.() ?? { cancelled: false });
159
+ if (cancelled) {
160
+ const record: IterationRecord = {
161
+ iteration: i,
162
+ status: "error",
163
+ startedAt: new Date(iterStart).toISOString(),
164
+ completedAt: new Date().toISOString(),
165
+ durationMs: Date.now() - iterStart,
166
+ progress: "unknown" as any,
167
+ changedFiles: [],
168
+ noProgressStreak,
169
+ };
170
+ iterations.push(record);
171
+ onIterationComplete?.(record);
172
+ finalStatus = "stopped";
173
+ break;
174
+ }
175
+
176
+ const runtimeCtx = resolveRuntimeCtx();
177
+
178
+ if (runCommandsFn && pi) {
179
+ await runCommandsFn(fm.commands, currentGuardrails.blockCommands, pi, cwd, dirname(ralphPath));
180
+ }
181
+
182
+ const snapshotBefore = captureTaskDirectorySnapshot(ralphPath);
183
+ observedTaskDirPaths.clear();
184
+ const entriesBefore = runtimeCtx?.sessionManager?.getEntries?.().length ?? 0;
185
+ const waitForIdlePromise = Promise.resolve(runtimeCtx?.waitForIdle?.());
186
+ const timeoutMs = currentTimeout * 1000;
187
+ let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
188
+ const waitResult = timeoutMs > 0
189
+ ? await Promise.race([
190
+ waitForIdlePromise.then(() => "done" as const),
191
+ new Promise<"timeout">((resolve) => {
192
+ timeoutHandle = setTimeout(() => resolve("timeout"), timeoutMs);
193
+ }),
194
+ ])
195
+ : await waitForIdlePromise.then(() => "done" as const);
196
+ if (timeoutHandle) clearTimeout(timeoutHandle);
197
+
198
+ if (waitResult === "timeout") {
199
+ const elapsed = Date.now() - iterStart;
200
+ const record: IterationRecord = {
201
+ iteration: i,
202
+ status: "timeout",
203
+ startedAt: new Date(iterStart).toISOString(),
204
+ completedAt: new Date().toISOString(),
205
+ durationMs: elapsed,
206
+ progress: false,
207
+ changedFiles: [],
208
+ noProgressStreak: noProgressStreak + 1,
209
+ };
210
+ iterations.push(record);
211
+ onIterationComplete?.(record);
212
+ onNotify?.(`Iteration ${i} timed out after ${currentTimeout}s, stopping loop`, "warning");
213
+ finalStatus = "timeout";
214
+ break;
215
+ }
216
+
217
+ const { progress: assessedProgress, changedFiles: assessedChangedFiles, snapshotTruncated, snapshotErrorCount } =
218
+ await assessTaskDirectoryProgress(ralphPath, snapshotBefore);
219
+ let progress = assessedProgress;
220
+ let changedFiles = assessedChangedFiles;
221
+ const iterationEntries = runtimeCtx?.sessionManager?.getEntries?.().slice(entriesBefore) ?? [];
222
+ if (
223
+ progress === false &&
224
+ [...observedTaskDirPaths].some((observedPath) => observedPath.startsWith(dirname(ralphPath)))
225
+ ) {
226
+ progress = "unknown";
227
+ changedFiles = [];
228
+ }
229
+
230
+ if (progress === true) {
231
+ noProgressStreak = 0;
232
+ } else if (progress === false) {
233
+ noProgressStreak += 1;
234
+ }
235
+
236
+ let completionPromiseMatched = false;
237
+ if (currentCompletionPromise) {
238
+ const completionMessages = Array.isArray(runtimeCtx?.agentEndMessages) && runtimeCtx.agentEndMessages.length > 0
239
+ ? runtimeCtx.agentEndMessages
240
+ : iterationEntries;
241
+ const completionText = completionMessages
242
+ .map((entry: any) => {
243
+ if (entry?.type === "message" && entry?.message?.role === "assistant") {
244
+ const text = entry.message.content?.filter((b: any) => b.type === "text")?.map((b: any) => b.text)?.join("") ?? "";
245
+ return text;
246
+ }
247
+ try {
248
+ return JSON.stringify(entry);
249
+ } catch {
250
+ return String(entry);
251
+ }
252
+ })
253
+ .join("\n");
254
+ completionPromiseMatched = completionText.includes(currentCompletionPromise);
255
+ }
256
+
257
+ const elapsed = Date.now() - iterStart;
258
+ const record: IterationRecord = {
259
+ iteration: i,
260
+ status: "complete",
261
+ startedAt: new Date(iterStart).toISOString(),
262
+ completedAt: new Date().toISOString(),
263
+ durationMs: elapsed,
264
+ progress,
265
+ changedFiles,
266
+ noProgressStreak,
267
+ completionPromiseMatched: completionPromiseMatched || undefined,
268
+ snapshotTruncated,
269
+ snapshotErrorCount,
270
+ };
271
+ iterations.push(record);
272
+ onIterationComplete?.(record);
273
+
274
+ if (progress === true) {
275
+ onNotify?.(`Iteration ${i} durable progress: ${summarizeChangedFiles(changedFiles)}`, "info");
276
+ } else if (progress === false) {
277
+ onNotify?.(`Iteration ${i} made no durable progress. No-progress streak: ${noProgressStreak}.`, "warning");
278
+ } else {
279
+ onNotify?.(
280
+ `Iteration ${i} durable progress could not be verified${snapshotTruncated ? " (snapshot truncated)" : ""}. No-progress streak remains ${noProgressStreak}.`,
281
+ "warning",
282
+ );
283
+ }
284
+ onNotify?.(`Iteration ${i} complete (${Math.round(elapsed / 1000)}s)`, "info");
285
+
286
+ if (completionPromiseMatched) {
287
+ if (progress === false) {
288
+ onNotify?.(`Completion promise matched on iteration ${i}, but no durable progress was detected. Continuing.`, "warning");
289
+ } else {
290
+ if (progress === "unknown") {
291
+ onNotify?.(`Completion promise matched on iteration ${i}, and durable progress could not be verified. Stopping.`, "info");
292
+ } else {
293
+ onNotify?.(`Completion promise matched on iteration ${i} after durable progress`, "info");
294
+ }
295
+ finalStatus = "complete";
296
+ break;
297
+ }
298
+ }
299
+ }
300
+
301
+ const hadConfirmedProgress = iterations.some((r) => r.progress === true);
302
+ if (finalStatus !== "complete" && finalStatus !== "stopped" && finalStatus !== "timeout") {
303
+ finalStatus = hadConfirmedProgress ? "max-iterations" : "no-progress-exhaustion";
304
+ }
305
+
306
+ return {
307
+ status: finalStatus,
308
+ iterations,
309
+ totalDurationMs: iterations.reduce((a, r) => a + (r.durationMs ?? 0), 0),
310
+ };
311
+ };
312
+
313
+ registerRalphCommands(pi, {
314
+ createDraftPlan: options?.createDraftPlan,
315
+ runRalphLoopFn: options?.runRalphLoopFn ?? defaultRunLoopFn,
316
+ } as any);
317
+
318
+ return {
319
+ appendedEntries,
320
+ handler(name: string) {
321
+ const handler = handlers.get(name);
322
+ assert.ok(handler, `missing handler for ${name}`);
323
+ return async (args: string, ctx: any) => {
324
+ const effectiveCtx =
325
+ typeof ctx?.getRuntimeCtx === "function"
326
+ ? ctx
327
+ : {
328
+ ...ctx,
329
+ appendSessionEntry: (entry: any) => appendedEntries.push(entry),
330
+ sessionManager: {
331
+ ...ctx.sessionManager,
332
+ getEntries: () => appendedEntries,
333
+ },
334
+ };
335
+ activeCtx = effectiveCtx;
336
+ try {
337
+ return await handler(args, effectiveCtx);
338
+ } finally {
339
+ activeCtx = undefined;
340
+ }
341
+ };
342
+ },
343
+ event(name: string) {
344
+ const handler = eventHandlers.get(name);
345
+ assert.ok(handler, `missing event handler for ${name}`);
346
+ return async (event: any, ctx: any) => {
347
+ if (name === "tool_call" && (event?.toolName === "write" || event?.toolName === "edit") && typeof event?.input?.path === "string") {
348
+ observedTaskDirPaths.add(event.input.path);
349
+ }
350
+ return await handler(event, ctx);
351
+ };
352
+ },
353
+ };
354
+ }
355
+
356
+ function latestLoopState(entries: Array<any>): any {
357
+ for (let i = entries.length - 1; i >= 0; i--) {
358
+ const entry = entries[i];
359
+ if (entry?.type === "custom" && entry.customType === "ralph-loop-state") {
360
+ return entry.data;
361
+ }
362
+ }
363
+ return undefined;
364
+ }
365
+
366
+ function createSessionManager(entries: Array<any>, sessionFile: string) {
367
+ return {
368
+ getEntries: () => entries,
369
+ getSessionFile: () => sessionFile,
370
+ };
371
+ }
372
+
373
+ function createRuntimeSession(entries: Array<any>, sessionFile: string, waitForIdle: () => Promise<void> | void) {
374
+ return {
375
+ sessionManager: createSessionManager(entries, sessionFile),
376
+ appendSessionEntry: (entry: any) => entries.push(entry),
377
+ waitForIdle: async () => {
378
+ await waitForIdle();
379
+ },
380
+ };
381
+ }
382
+
383
+ test("registerRalphCommands is idempotent for the same extension API instance", () => {
384
+ const registeredCommands: string[] = [];
385
+ const registeredEvents: string[] = [];
386
+ const pi = {
387
+ on: (eventName: string) => {
388
+ registeredEvents.push(eventName);
389
+ },
390
+ registerCommand: (name: string) => {
391
+ registeredCommands.push(name);
392
+ },
393
+ appendEntry: () => undefined,
394
+ sendUserMessage: () => undefined,
395
+ exec: async () => ({ killed: false, stdout: "", stderr: "" }),
396
+ } as any;
397
+
398
+ registerRalphCommands(pi, {} as any);
399
+ registerRalphCommands(pi, {} as any);
400
+
401
+ assert.deepEqual(registeredCommands, ["ralph", "ralph-draft", "ralph-stop"]);
402
+ assert.deepEqual(registeredEvents, [
403
+ "tool_call",
404
+ "tool_execution_start",
405
+ "tool_execution_end",
406
+ "agent_end",
407
+ "before_agent_start",
408
+ "tool_result",
409
+ ]);
410
+ });
411
+
412
+ test("runCommands keeps plain frontmatter commands in the repo cwd", async () => {
413
+ const repoCwd = createTempDir();
414
+ const taskDir = join(repoCwd, "task");
415
+ mkdirSync(taskDir, { recursive: true });
416
+ try {
417
+ const originalCwd = process.cwd();
418
+ const observed: Array<{ tool: string; args: string[]; options?: { cwd?: string }; cwdAtExec: string }> = [];
419
+ const pi = {
420
+ exec: async (tool: string, args: string[], options?: { cwd?: string }) => {
421
+ observed.push({ tool, args, options, cwdAtExec: process.cwd() });
422
+ return { killed: false, stdout: "", stderr: "" };
423
+ },
424
+ } as any;
425
+
426
+ await runCommands(
427
+ [
428
+ { name: "npm test", run: "npm test", timeout: 1 },
429
+ { name: "git log", run: "git log --oneline", timeout: 1 },
430
+ ],
431
+ [],
432
+ pi,
433
+ {},
434
+ repoCwd,
435
+ taskDir,
436
+ );
437
+
438
+ assert.equal(observed.length, 2);
439
+ assert.deepEqual(observed.map(({ options }) => options?.cwd), [repoCwd, repoCwd]);
440
+ assert.equal(observed[0].cwdAtExec, originalCwd);
441
+ assert.equal(process.cwd(), originalCwd);
442
+ } finally {
443
+ rmSync(repoCwd, { recursive: true, force: true });
444
+ }
445
+ });
446
+
447
+ test("runCommands runs ./-prefixed frontmatter commands from the task directory", async () => {
448
+ const repoCwd = createTempDir();
449
+ const taskDir = join(repoCwd, "task");
450
+ mkdirSync(taskDir, { recursive: true });
451
+ try {
452
+ const originalCwd = process.cwd();
453
+ const observed: Array<{ tool: string; args: string[]; options?: { cwd?: string }; cwdAtExec: string }> = [];
454
+ const pi = {
455
+ exec: async (tool: string, args: string[], options?: { cwd?: string }) => {
456
+ observed.push({ tool, args, options, cwdAtExec: process.cwd() });
457
+ return { killed: false, stdout: "", stderr: "" };
458
+ },
459
+ } as any;
460
+
461
+ await runCommands([{ name: "build", run: " ./scripts/build", timeout: 1 }], [], pi, {}, repoCwd, taskDir);
462
+
463
+ assert.equal(observed.length, 1);
464
+ assert.equal(observed[0].tool, "bash");
465
+ assert.equal(observed[0].options?.cwd, taskDir);
466
+ assert.equal(observed[0].cwdAtExec, originalCwd);
467
+ assert.equal(process.cwd(), originalCwd);
468
+ } finally {
469
+ rmSync(repoCwd, { recursive: true, force: true });
470
+ }
471
+ });
472
+
473
+ test("runCommands uses the semantic command form to choose taskDir for templated ./-prefixed args", async () => {
474
+ const repoCwd = createTempDir();
475
+ const taskDir = join(repoCwd, "task");
476
+ mkdirSync(taskDir, { recursive: true });
477
+ try {
478
+ const originalCwd = process.cwd();
479
+ const observed: Array<{ tool: string; args: string[]; options?: { cwd?: string }; cwdAtExec: string }> = [];
480
+ const pi = {
481
+ exec: async (tool: string, args: string[], options?: { cwd?: string }) => {
482
+ observed.push({ tool, args, options, cwdAtExec: process.cwd() });
483
+ return { killed: false, stdout: "", stderr: "" };
484
+ },
485
+ } as any;
486
+
487
+ await runCommands(
488
+ [{ name: "check", run: "{{ args.tool }} --flag", timeout: 1 }],
489
+ [],
490
+ pi,
491
+ { tool: "./scripts/check.sh" },
492
+ repoCwd,
493
+ taskDir,
494
+ );
495
+
496
+ assert.equal(observed.length, 1);
497
+ assert.equal(observed[0].tool, "bash");
498
+ assert.equal(observed[0].args[1], "'./scripts/check.sh' --flag");
499
+ assert.equal(observed[0].options?.cwd, taskDir);
500
+ assert.equal(observed[0].cwdAtExec, originalCwd);
501
+ assert.equal(process.cwd(), originalCwd);
502
+ } finally {
503
+ rmSync(repoCwd, { recursive: true, force: true });
504
+ }
505
+ });
506
+
507
+ test("runCommands surfaces blocked-command appendEntry failures", async () => {
508
+ const repoCwd = createTempDir();
509
+ const taskDir = join(repoCwd, "task");
510
+ mkdirSync(taskDir, { recursive: true });
511
+ try {
512
+ const pi = {
513
+ appendEntry: () => {
514
+ throw new Error("append failed");
515
+ },
516
+ exec: async () => ({ killed: false, stdout: "", stderr: "" }),
517
+ } as any;
518
+
519
+ await assert.rejects(
520
+ runCommands([{ name: "blocked", run: "git push origin main", timeout: 1 }], ["git\\s+push"], pi, {}, repoCwd, taskDir),
521
+ /append failed/,
522
+ );
523
+ } finally {
524
+ rmSync(repoCwd, { recursive: true, force: true });
525
+ }
526
+ });
527
+
528
+ test("/ralph-stop writes the durable stop flag from persisted active loop state after reload", async (t) => {
529
+ const cwd = createTempDir();
530
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
531
+
532
+ const taskDir = join(cwd, "persisted-loop-task");
533
+ mkdirSync(taskDir, { recursive: true });
534
+ const persistedState = {
535
+ active: true,
536
+ loopToken: "persisted-loop-token",
537
+ cwd,
538
+ taskDir,
539
+ iteration: 3,
540
+ maxIterations: 5,
541
+ noProgressStreak: 0,
542
+ iterationSummaries: [],
543
+ guardrails: { blockCommands: [], protectedFiles: [] },
544
+ stopRequested: false,
545
+ };
546
+ const notifications: Array<{ message: string; level: string }> = [];
547
+ const harness = createHarness();
548
+ const handler = harness.handler("ralph-stop");
549
+ let ctx: any;
550
+ ctx = {
551
+ cwd,
552
+ hasUI: true,
553
+ ui: {
554
+ notify: (message: string, level: string) => notifications.push({ message, level }),
555
+ select: async () => undefined,
556
+ input: async () => undefined,
557
+ editor: async () => undefined,
558
+ setStatus: () => undefined,
559
+ },
560
+ sessionManager: createSessionManager([
561
+ {
562
+ type: "custom",
563
+ customType: "ralph-loop-state",
564
+ data: persistedState,
565
+ },
566
+ ], "session-a"),
567
+ getRuntimeCtx: () => ctx,
568
+ };
569
+
570
+ await handler("", ctx);
571
+
572
+ assert.equal(existsSync(join(taskDir, ".ralph-runner", "stop.flag")), true);
573
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop stopping after current iteration")));
574
+ assert.equal(notifications.some(({ message }) => message.includes("No active ralph loop")), false);
575
+ });
576
+
577
+ test("/ralph reverse engineer this app with an injected llm-strengthened draft still shows review before start", async (t) => {
578
+ const cwd = createTempDir();
579
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
580
+
581
+ const task = "reverse engineer this app";
582
+ const target = createTarget(cwd, task);
583
+ const draftCalls: Array<{ task: string; target: DraftTarget; cwd: string }> = [];
584
+ const draftPlan = makeDraftPlan(task, target, "llm-strengthened", cwd);
585
+ const harness = createHarness({
586
+ createDraftPlan: async (taskArg: string, targetArg: DraftTarget, cwdArg: string) => {
587
+ draftCalls.push({ task: taskArg, target: targetArg, cwd: cwdArg });
588
+ return draftPlan;
589
+ },
590
+ });
591
+
592
+ const notifications: Array<{ message: string; level: string }> = [];
593
+ let selectTitle = "";
594
+ let selectOptions: string[] = [];
595
+ let newSessionCalls = 0;
596
+ const handler = harness.handler("ralph");
597
+ const ctx = {
598
+ cwd,
599
+ hasUI: true,
600
+ ui: {
601
+ select: async (title: string, options: string[]) => {
602
+ selectTitle = title;
603
+ selectOptions = options;
604
+ assert.deepEqual(draftCalls, [{ task, target, cwd }]);
605
+ assert.equal(existsSync(target.ralphPath), false, "draft file should not exist before review acceptance");
606
+ return "Start";
607
+ },
608
+ input: async () => undefined,
609
+ editor: async () => undefined,
610
+ notify: (message: string, level: string) => notifications.push({ message, level }),
611
+ setStatus: () => undefined,
612
+ },
613
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
614
+ newSession: async () => {
615
+ newSessionCalls += 1;
616
+ assert.equal(existsSync(target.ralphPath), true, "draft file should be written before the loop starts");
617
+ return { cancelled: true };
618
+ },
619
+ waitForIdle: async () => {
620
+ throw new Error("loop should not continue after cancelled session start");
621
+ },
622
+ };
623
+
624
+ await handler(task, ctx);
625
+
626
+ assert.equal(draftCalls.length, 1);
627
+ assert.equal(newSessionCalls, 1);
628
+ assert.equal(existsSync(target.ralphPath), true);
629
+ assert.match(selectTitle, /Mission Brief/);
630
+ assert.deepEqual(selectOptions, ["Start", "Open RALPH.md", "Cancel"]);
631
+ assert.equal(notifications.some(({ message }) => message.includes("Invalid RALPH.md")), false);
632
+ });
633
+
634
+ test("/ralph-draft with an injected fallback draft reviews and writes without surfacing model failure details", async (t) => {
635
+ const cwd = createTempDir();
636
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
637
+
638
+ const task = "reverse engineer this app";
639
+ const target = createTarget(cwd, task);
640
+ const draftCalls: Array<{ task: string; target: DraftTarget; cwd: string }> = [];
641
+ const draftPlan = makeDraftPlan(task, target, "fallback", cwd);
642
+ const harness = createHarness({
643
+ createDraftPlan: async (taskArg: string, targetArg: DraftTarget, cwdArg: string) => {
644
+ draftCalls.push({ task: taskArg, target: targetArg, cwd: cwdArg });
645
+ return draftPlan;
646
+ },
647
+ });
648
+
649
+ let selectTitle = "";
650
+ let selectOptions: string[] = [];
651
+ const handler = harness.handler("ralph-draft");
652
+ const ctx = {
653
+ cwd,
654
+ hasUI: true,
655
+ ui: {
656
+ select: async (title: string, options: string[]) => {
657
+ selectTitle = title;
658
+ selectOptions = options;
659
+ assert.deepEqual(draftCalls, [{ task, target, cwd }]);
660
+ assert.equal(existsSync(target.ralphPath), false, "draft file should not exist before Save draft");
661
+ return "Save draft";
662
+ },
663
+ input: async () => undefined,
664
+ editor: async () => undefined,
665
+ notify: () => undefined,
666
+ setStatus: () => undefined,
667
+ },
668
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
669
+ newSession: async () => {
670
+ throw new Error("/ralph-draft should not start the loop");
671
+ },
672
+ waitForIdle: async () => {
673
+ throw new Error("/ralph-draft should not wait for idle");
674
+ },
675
+ };
676
+
677
+ await handler(task, ctx);
678
+
679
+ assert.equal(draftCalls.length, 1);
680
+ assert.equal(existsSync(target.ralphPath), true);
681
+ assert.match(selectTitle, /Mission Brief/);
682
+ assert.match(selectTitle, /Task\s+reverse engineer this app/);
683
+ assert.doesNotMatch(selectTitle, /fallback|source|provenance|model failure/i);
684
+ assert.deepEqual(selectOptions, ["Save draft", "Open RALPH.md", "Cancel"]);
685
+ });
686
+
687
+ test("Mission Brief surface stays limited to the visible fields", async (t) => {
688
+ const cwd = createTempDir();
689
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
690
+
691
+ const task = "reverse engineer this app";
692
+ const target = createTarget(cwd, task);
693
+ const draftPlan = makeDraftPlan(task, target, "llm-strengthened", cwd);
694
+ draftPlan.content = draftPlan.content
695
+ .replace("max_iterations: 12", "max_iterations: 8")
696
+ .replace("timeout: 300\n", "timeout: 45\ncompletion_promise: ready\n");
697
+ const harness = createHarness({
698
+ createDraftPlan: async () => draftPlan,
699
+ });
700
+
701
+ let brief = "";
702
+ const handler = harness.handler("ralph-draft");
703
+ const ctx = {
704
+ cwd,
705
+ hasUI: true,
706
+ ui: {
707
+ select: async (title: string) => {
708
+ brief = title;
709
+ return "Cancel";
710
+ },
711
+ input: async () => undefined,
712
+ editor: async () => undefined,
713
+ notify: () => undefined,
714
+ setStatus: () => undefined,
715
+ },
716
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
717
+ newSession: async () => ({ cancelled: true }),
718
+ waitForIdle: async () => undefined,
719
+ };
720
+
721
+ await handler(task, ctx);
722
+
723
+ assert.match(brief, /^Mission Brief/m);
724
+ assert.match(brief, /^Task$/m);
725
+ assert.match(brief, /^File$/m);
726
+ assert.match(brief, /^Suggested checks$/m);
727
+ assert.match(brief, /^Finish behavior$/m);
728
+ assert.match(brief, /- Stop after 8 iterations or \/ralph-stop/);
729
+ assert.match(brief, /- Stop if an iteration exceeds 45s/);
730
+ assert.match(brief, /- Stop early on <promise>ready<\/promise>/);
731
+ assert.match(brief, /^Safety$/m);
732
+ assert.doesNotMatch(brief, /source|fallback|provenance|model failure/i);
733
+ assert.doesNotMatch(brief, /Draft status/);
734
+ });
735
+
736
+ test("natural-language drafting without UI warns and exits without creating a draft", async (t) => {
737
+ const cwd = createTempDir();
738
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
739
+
740
+ const task = "reverse engineer this app";
741
+ const target = createTarget(cwd, task);
742
+ const draftCalls: Array<{ task: string; target: DraftTarget; cwd: string }> = [];
743
+ const harness = createHarness({
744
+ createDraftPlan: async (taskArg: string, targetArg: DraftTarget, cwdArg: string) => {
745
+ draftCalls.push({ task: taskArg, target: targetArg, cwd: cwdArg });
746
+ return makeDraftPlan(task, target, "llm-strengthened", cwd);
747
+ },
748
+ });
749
+
750
+ const notifications: Array<{ message: string; level: string }> = [];
751
+ const handler = harness.handler("ralph");
752
+ const ctx = {
753
+ cwd,
754
+ hasUI: false,
755
+ ui: {
756
+ notify: (message: string, level: string) => notifications.push({ message, level }),
757
+ select: async () => {
758
+ throw new Error("should not open review UI");
759
+ },
760
+ input: async () => undefined,
761
+ editor: async () => undefined,
762
+ setStatus: () => undefined,
763
+ },
764
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
765
+ newSession: async () => ({ cancelled: true }),
766
+ waitForIdle: async () => undefined,
767
+ };
768
+
769
+ await handler(task, ctx);
770
+
771
+ assert.equal(draftCalls.length, 0);
772
+ assert.equal(existsSync(target.ralphPath), false);
773
+ assert.deepEqual(notifications, [
774
+ {
775
+ level: "warning",
776
+ message: "Draft review requires an interactive session. Use /ralph with a task folder or RALPH.md path instead.",
777
+ },
778
+ ]);
779
+ });
780
+
781
+ test("/ralph --path existing-task/RALPH.md bypasses the drafting pipeline", async (t) => {
782
+ const cwd = createTempDir();
783
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
784
+
785
+ const task = "reverse engineer this app";
786
+ const target = createTarget(cwd, task);
787
+ const draftCalls: Array<{ task: string; target: DraftTarget; cwd: string }> = [];
788
+ const draftPlan = makeDraftPlan(task, target, "llm-strengthened", cwd);
789
+ const harness = createHarness({
790
+ createDraftPlan: async (taskArg: string, targetArg: DraftTarget, cwdArg: string) => {
791
+ draftCalls.push({ task: taskArg, target: targetArg, cwd: cwdArg });
792
+ return draftPlan;
793
+ },
794
+ });
795
+
796
+ const existingDir = join(cwd, "existing-task");
797
+ const existingRalphPath = join(existingDir, "RALPH.md");
798
+ await t.test("setup", () => undefined);
799
+ await import("node:fs").then(({ mkdirSync, writeFileSync }) => {
800
+ mkdirSync(existingDir, { recursive: true });
801
+ writeFileSync(existingRalphPath, draftPlan.content, "utf8");
802
+ });
803
+
804
+ const handler = harness.handler("ralph");
805
+ const ctx = {
806
+ cwd,
807
+ hasUI: false,
808
+ ui: {
809
+ notify: () => undefined,
810
+ select: async () => {
811
+ throw new Error("should not show review UI for existing RALPH.md");
812
+ },
813
+ input: async () => undefined,
814
+ editor: async () => undefined,
815
+ setStatus: () => undefined,
816
+ },
817
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
818
+ newSession: async () => ({ cancelled: true }),
819
+ waitForIdle: async () => undefined,
820
+ };
821
+
822
+ await handler(`--path ${existingRalphPath}`, ctx);
823
+
824
+ assert.equal(draftCalls.length, 0);
825
+ });
826
+
827
+ test("/ralph --path existing-task/RALPH.md with args resolves them safely at runtime", async (t) => {
828
+ const cwd = createTempDir();
829
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
830
+
831
+ const taskDir = join(cwd, "arg-task");
832
+ const ralphPath = join(taskDir, "RALPH.md");
833
+ mkdirSync(taskDir, { recursive: true });
834
+ writeFileSync(
835
+ ralphPath,
836
+ [
837
+ "---",
838
+ "args:",
839
+ " - owner",
840
+ "commands:",
841
+ " - name: greet",
842
+ " run: echo {{ args.owner }}",
843
+ " timeout: 1",
844
+ "max_iterations: 1",
845
+ "timeout: 1",
846
+ "guardrails:",
847
+ " block_commands: []",
848
+ " protected_files: []",
849
+ "---",
850
+ "Hello {{ args.owner }}",
851
+ ].join("\n"),
852
+ "utf8",
853
+ );
854
+
855
+ const execCalls: string[] = [];
856
+ let observedRuntimeArgs: Record<string, string> | undefined;
857
+ const harness = createHarness({
858
+ exec: async (_tool: string, args: string[]) => {
859
+ execCalls.push(args.join(" "));
860
+ return { killed: false, stdout: "hello Ada", stderr: "" };
861
+ },
862
+ runRalphLoopFn: async (config: RunnerConfig) => {
863
+ observedRuntimeArgs = config.runtimeArgs;
864
+ await config.runCommandsFn?.(
865
+ [{ name: "greet", run: "echo {{ args.owner }}", timeout: 1 }],
866
+ [],
867
+ config.pi,
868
+ config.cwd,
869
+ dirname(config.ralphPath),
870
+ );
871
+ return {
872
+ status: "complete",
873
+ iterations: [
874
+ {
875
+ iteration: 1,
876
+ status: "complete",
877
+ startedAt: new Date().toISOString(),
878
+ completedAt: new Date().toISOString(),
879
+ durationMs: 0,
880
+ progress: false,
881
+ changedFiles: [],
882
+ noProgressStreak: 0,
883
+ },
884
+ ],
885
+ totalDurationMs: 0,
886
+ };
887
+ },
888
+ });
889
+
890
+ const handler = harness.handler("ralph");
891
+ const notifications: Array<{ message: string; level: string }> = [];
892
+ const ctx = {
893
+ cwd,
894
+ hasUI: false,
895
+ ui: {
896
+ notify: (message: string, level: string) => notifications.push({ message, level }),
897
+ select: async () => {
898
+ throw new Error("should not prompt");
899
+ },
900
+ input: async () => undefined,
901
+ editor: async () => undefined,
902
+ setStatus: () => undefined,
903
+ },
904
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
905
+ newSession: async () => ({ cancelled: true }),
906
+ waitForIdle: async () => undefined,
907
+ };
908
+
909
+ await handler(`--path ${ralphPath} --arg owner=Ada`, ctx);
910
+
911
+ assert.equal(Object.getPrototypeOf(observedRuntimeArgs), null);
912
+ assert.deepEqual({ ...observedRuntimeArgs }, { owner: "Ada" });
913
+ assert.deepEqual(execCalls, ["-c echo 'Ada'"]);
914
+ assert.equal(notifications.some(({ message }) => message.includes("Invalid RALPH.md")), false);
915
+ });
916
+
917
+ test("/ralph --path existing-task/RALPH.md rejects missing and extra args", async (t) => {
918
+ const cwd = createTempDir();
919
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
920
+
921
+ const taskDir = join(cwd, "arg-task");
922
+ const ralphPath = join(taskDir, "RALPH.md");
923
+ mkdirSync(taskDir, { recursive: true });
924
+ writeFileSync(
925
+ ralphPath,
926
+ [
927
+ "---",
928
+ "args:",
929
+ " - owner",
930
+ "commands:",
931
+ " - name: greet",
932
+ " run: echo {{ args.owner }}",
933
+ " timeout: 1",
934
+ "max_iterations: 1",
935
+ "timeout: 1",
936
+ "guardrails:",
937
+ " block_commands: []",
938
+ " protected_files: []",
939
+ "---",
940
+ "Hello {{ args.owner }}",
941
+ ].join("\n"),
942
+ "utf8",
943
+ );
944
+
945
+ const harness = createHarness({
946
+ runRalphLoopFn: async () => {
947
+ throw new Error("loop should not start when args are invalid");
948
+ },
949
+ });
950
+ const handler = harness.handler("ralph");
951
+ const notifications: Array<{ message: string; level: string }> = [];
952
+ const ctx = {
953
+ cwd,
954
+ hasUI: false,
955
+ ui: {
956
+ notify: (message: string, level: string) => notifications.push({ message, level }),
957
+ select: async () => {
958
+ throw new Error("should not prompt");
959
+ },
960
+ input: async () => undefined,
961
+ editor: async () => undefined,
962
+ setStatus: () => undefined,
963
+ },
964
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
965
+ newSession: async () => ({ cancelled: true }),
966
+ waitForIdle: async () => undefined,
967
+ };
968
+
969
+ await handler(`--path ${ralphPath}`, ctx);
970
+ await handler(`--path ${ralphPath} --arg extra=value`, ctx);
971
+
972
+ assert.deepEqual(notifications, [
973
+ { level: "error", message: "Missing required arg: owner" },
974
+ { level: "error", message: "Undeclared arg: extra" },
975
+ ]);
976
+ });
977
+
978
+ test("/ralph --task ... --arg ... is rejected", async (t) => {
979
+ const cwd = createTempDir();
980
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
981
+
982
+ const harness = createHarness({
983
+ runRalphLoopFn: async () => {
984
+ throw new Error("loop should not start");
985
+ },
986
+ });
987
+ const handler = harness.handler("ralph");
988
+ const notifications: Array<{ message: string; level: string }> = [];
989
+ const ctx = {
990
+ cwd,
991
+ hasUI: false,
992
+ ui: {
993
+ notify: (message: string, level: string) => notifications.push({ message, level }),
994
+ select: async () => {
995
+ throw new Error("should not prompt");
996
+ },
997
+ input: async () => undefined,
998
+ editor: async () => undefined,
999
+ setStatus: () => undefined,
1000
+ },
1001
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
1002
+ newSession: async () => ({ cancelled: true }),
1003
+ waitForIdle: async () => undefined,
1004
+ };
1005
+
1006
+ await handler("--task reverse engineer auth --arg owner=Ada", ctx);
1007
+
1008
+ assert.deepEqual(notifications, [
1009
+ { level: "error", message: "--arg is only supported with /ralph --path" },
1010
+ ]);
1011
+ });
1012
+
1013
+ test("/ralph-draft rejects runtime args for now", async (t) => {
1014
+ const cwd = createTempDir();
1015
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1016
+
1017
+ const harness = createHarness({
1018
+ runRalphLoopFn: async () => {
1019
+ throw new Error("loop should not start");
1020
+ },
1021
+ });
1022
+ const handler = harness.handler("ralph-draft");
1023
+ const notifications: Array<{ message: string; level: string }> = [];
1024
+ const ctx = {
1025
+ cwd,
1026
+ hasUI: false,
1027
+ ui: {
1028
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1029
+ select: async () => {
1030
+ throw new Error("should not prompt");
1031
+ },
1032
+ input: async () => undefined,
1033
+ editor: async () => undefined,
1034
+ setStatus: () => undefined,
1035
+ },
1036
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
1037
+ newSession: async () => ({ cancelled: true }),
1038
+ waitForIdle: async () => undefined,
1039
+ };
1040
+
1041
+ await handler("--path task-folder --arg owner=Ada", ctx);
1042
+
1043
+ assert.deepEqual(notifications, [
1044
+ { level: "error", message: "--arg is only supported with /ralph --path" },
1045
+ ]);
1046
+ });
1047
+
1048
+ test("/ralph rejects raw invalid completion_promise values before parsing loop state", async (t) => {
1049
+ const cwd = createTempDir();
1050
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1051
+
1052
+ const targetDir = join(cwd, "raw-invalid-completion-promise");
1053
+ const ralphPath = join(targetDir, "RALPH.md");
1054
+ mkdirSync(targetDir, { recursive: true });
1055
+ writeFileSync(
1056
+ ralphPath,
1057
+ [
1058
+ "---",
1059
+ "commands:",
1060
+ " - name: tests",
1061
+ " run: npm test",
1062
+ " timeout: 20",
1063
+ "max_iterations: 2",
1064
+ "timeout: 300",
1065
+ "completion_promise: |",
1066
+ " DONE",
1067
+ "guardrails:",
1068
+ " block_commands: []",
1069
+ " protected_files: []",
1070
+ "---",
1071
+ "Task: Fix flaky auth tests",
1072
+ "",
1073
+ "Keep the change small.",
1074
+ ].join("\n"),
1075
+ "utf8",
1076
+ );
1077
+
1078
+ const notifications: Array<{ message: string; level: string }> = [];
1079
+ let newSessionCalls = 0;
1080
+ let execCalls = 0;
1081
+ const harness = createHarness({
1082
+ exec: async () => {
1083
+ execCalls += 1;
1084
+ return { killed: false, stdout: "ok", stderr: "" };
1085
+ },
1086
+ });
1087
+ const handler = harness.handler("ralph");
1088
+ const ctx = {
1089
+ cwd,
1090
+ hasUI: false,
1091
+ ui: {
1092
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1093
+ select: async () => {
1094
+ throw new Error("should not prompt");
1095
+ },
1096
+ input: async () => {
1097
+ throw new Error("should not prompt");
1098
+ },
1099
+ editor: async () => undefined,
1100
+ setStatus: () => undefined,
1101
+ },
1102
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
1103
+ newSession: async () => {
1104
+ newSessionCalls += 1;
1105
+ return { cancelled: true };
1106
+ },
1107
+ waitForIdle: async () => {
1108
+ throw new Error("should not reach the loop");
1109
+ },
1110
+ };
1111
+
1112
+ await handler(`--path ${ralphPath}`, ctx);
1113
+
1114
+ assert.equal(newSessionCalls, 0);
1115
+ assert.equal(execCalls, 0);
1116
+ assert.equal(notifications.length, 1);
1117
+ assert.equal(notifications[0]?.level, "error");
1118
+ assert.match(notifications[0]?.message ?? "", /Invalid completion_promise/);
1119
+ });
1120
+
1121
+ test("/ralph rejects raw malformed guardrails shapes before starting the loop", async (t) => {
1122
+ const cwd = createTempDir();
1123
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1124
+
1125
+ const targetDir = join(cwd, "raw-invalid-guardrails");
1126
+ const ralphPath = join(targetDir, "RALPH.md");
1127
+ mkdirSync(targetDir, { recursive: true });
1128
+
1129
+ let newSessionCalls = 0;
1130
+ let execCalls = 0;
1131
+ const notifications: Array<{ message: string; level: string }> = [];
1132
+ const harness = createHarness({
1133
+ exec: async () => {
1134
+ execCalls += 1;
1135
+ return { killed: false, stdout: "", stderr: "" };
1136
+ },
1137
+ });
1138
+ const handler = harness.handler("ralph");
1139
+ const ctx = {
1140
+ cwd,
1141
+ hasUI: false,
1142
+ ui: {
1143
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1144
+ select: async () => {
1145
+ throw new Error("should not prompt");
1146
+ },
1147
+ input: async () => {
1148
+ throw new Error("should not prompt");
1149
+ },
1150
+ editor: async () => undefined,
1151
+ setStatus: () => undefined,
1152
+ },
1153
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
1154
+ newSession: async () => {
1155
+ newSessionCalls += 1;
1156
+ return { cancelled: false };
1157
+ },
1158
+ waitForIdle: async () => {
1159
+ throw new Error("should not reach the loop");
1160
+ },
1161
+ };
1162
+
1163
+ for (const [label, raw] of [
1164
+ [
1165
+ "block_commands scalar",
1166
+ [
1167
+ "---",
1168
+ "commands:",
1169
+ " - name: tests",
1170
+ " run: npm test",
1171
+ " timeout: 20",
1172
+ "max_iterations: 2",
1173
+ "timeout: 300",
1174
+ "guardrails:",
1175
+ " block_commands: 'git\\s+push'",
1176
+ " protected_files: []",
1177
+ "---",
1178
+ "Task: Fix flaky auth tests",
1179
+ "",
1180
+ "Keep the change small.",
1181
+ ].join("\n"),
1182
+ ],
1183
+ [
1184
+ "block_commands null",
1185
+ [
1186
+ "---",
1187
+ "commands:",
1188
+ " - name: tests",
1189
+ " run: npm test",
1190
+ " timeout: 20",
1191
+ "max_iterations: 2",
1192
+ "timeout: 300",
1193
+ "guardrails:",
1194
+ " block_commands: null",
1195
+ " protected_files: []",
1196
+ "---",
1197
+ "Task: Fix flaky auth tests",
1198
+ "",
1199
+ "Keep the change small.",
1200
+ ].join("\n"),
1201
+ ],
1202
+ [
1203
+ "protected_files scalar",
1204
+ [
1205
+ "---",
1206
+ "commands:",
1207
+ " - name: tests",
1208
+ " run: npm test",
1209
+ " timeout: 20",
1210
+ "max_iterations: 2",
1211
+ "timeout: 300",
1212
+ "guardrails:",
1213
+ " block_commands: []",
1214
+ " protected_files: 'src/generated/**'",
1215
+ "---",
1216
+ "Task: Fix flaky auth tests",
1217
+ "",
1218
+ "Keep the change small.",
1219
+ ].join("\n"),
1220
+ ],
1221
+ [
1222
+ "protected_files null",
1223
+ [
1224
+ "---",
1225
+ "commands:",
1226
+ " - name: tests",
1227
+ " run: npm test",
1228
+ " timeout: 20",
1229
+ "max_iterations: 2",
1230
+ "timeout: 300",
1231
+ "guardrails:",
1232
+ " block_commands: []",
1233
+ " protected_files: null",
1234
+ "---",
1235
+ "Task: Fix flaky auth tests",
1236
+ "",
1237
+ "Keep the change small.",
1238
+ ].join("\n"),
1239
+ ],
1240
+ ] as const) {
1241
+ writeFileSync(ralphPath, raw, "utf8");
1242
+ notifications.length = 0;
1243
+ newSessionCalls = 0;
1244
+ execCalls = 0;
1245
+
1246
+ await handler(`--path ${ralphPath}`, ctx);
1247
+
1248
+ assert.equal(newSessionCalls, 0, label);
1249
+ assert.equal(execCalls, 0, label);
1250
+ assert.equal(notifications.length, 1, label);
1251
+ assert.equal(notifications[0]?.level, "error", label);
1252
+ assert.match(notifications[0]?.message ?? "", /Invalid RALPH\.md: Invalid RALPH frontmatter: guardrails\.(block_commands|protected_files) must be a YAML sequence/, label);
1253
+ }
1254
+ });
1255
+
1256
+ test("/ralph rejects raw malformed max_iterations arrays before starting the loop", async (t) => {
1257
+ const cwd = createTempDir();
1258
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1259
+
1260
+ const targetDir = join(cwd, "raw-invalid-max-iterations");
1261
+ const ralphPath = join(targetDir, "RALPH.md");
1262
+ mkdirSync(targetDir, { recursive: true });
1263
+ writeFileSync(
1264
+ ralphPath,
1265
+ [
1266
+ "---",
1267
+ "commands: []",
1268
+ "max_iterations:",
1269
+ " - 2",
1270
+ "timeout: 300",
1271
+ "guardrails:",
1272
+ " block_commands: []",
1273
+ " protected_files: []",
1274
+ "---",
1275
+ "Task: Fix flaky auth tests",
1276
+ "",
1277
+ "Keep the change small.",
1278
+ ].join("\n"),
1279
+ "utf8",
1280
+ );
1281
+
1282
+ const notifications: Array<{ message: string; level: string }> = [];
1283
+ let newSessionCalls = 0;
1284
+ let execCalls = 0;
1285
+ const harness = createHarness({
1286
+ exec: async () => {
1287
+ execCalls += 1;
1288
+ return { killed: false, stdout: "ok", stderr: "" };
1289
+ },
1290
+ });
1291
+ const handler = harness.handler("ralph");
1292
+ const ctx = {
1293
+ cwd,
1294
+ hasUI: false,
1295
+ ui: {
1296
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1297
+ select: async () => {
1298
+ throw new Error("should not prompt");
1299
+ },
1300
+ input: async () => {
1301
+ throw new Error("should not prompt");
1302
+ },
1303
+ editor: async () => undefined,
1304
+ setStatus: () => undefined,
1305
+ },
1306
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
1307
+ newSession: async () => {
1308
+ newSessionCalls += 1;
1309
+ return { cancelled: true };
1310
+ },
1311
+ waitForIdle: async () => {
1312
+ throw new Error("should not reach the loop");
1313
+ },
1314
+ };
1315
+
1316
+ await handler(`--path ${ralphPath}`, ctx);
1317
+
1318
+ assert.equal(newSessionCalls, 0);
1319
+ assert.equal(execCalls, 0);
1320
+ assert.equal(notifications.length, 1);
1321
+ assert.equal(notifications[0]?.level, "error");
1322
+ assert.match(notifications[0]?.message ?? "", /Invalid RALPH\.md: Invalid RALPH frontmatter: max_iterations must be a YAML number/);
1323
+ });
1324
+
1325
+ test("/ralph re-validates raw draft content before each loop iteration", async (t) => {
1326
+ const cwd = createTempDir();
1327
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1328
+
1329
+ const task = "Fix flaky auth tests";
1330
+ const target = createTarget(cwd, task);
1331
+ const targetDir = target.dirPath;
1332
+ mkdirSync(targetDir, { recursive: true });
1333
+ const draft = generateDraft(task, target, {
1334
+ packageManager: "npm",
1335
+ testCommand: "npm test",
1336
+ lintCommand: "npm run lint",
1337
+ hasGit: true,
1338
+ topLevelDirs: ["src", "tests"],
1339
+ topLevelFiles: ["package.json"],
1340
+ });
1341
+ const validContent = draft.content.replace("max_iterations: 25", "max_iterations: 2");
1342
+ writeFileSync(target.ralphPath, validContent, "utf8");
1343
+
1344
+ const notifications: Array<{ message: string; level: string }> = [];
1345
+ let newSessionCalls = 0;
1346
+ let mutated = false;
1347
+ const expectedExecCalls = parseRalphMarkdown(validContent).frontmatter.commands.length;
1348
+ let execCalls = 0;
1349
+ const harness = createHarness({
1350
+ exec: async () => {
1351
+ execCalls += 1;
1352
+ return { killed: false, stdout: "ok", stderr: "" };
1353
+ },
1354
+ });
1355
+ const handler = harness.handler("ralph");
1356
+ const ctx = {
1357
+ cwd,
1358
+ hasUI: false,
1359
+ ui: {
1360
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1361
+ select: async () => {
1362
+ throw new Error("should not prompt");
1363
+ },
1364
+ input: async () => {
1365
+ throw new Error("should not prompt");
1366
+ },
1367
+ editor: async () => undefined,
1368
+ setStatus: () => undefined,
1369
+ },
1370
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
1371
+ newSession: async () => {
1372
+ newSessionCalls += 1;
1373
+ return { cancelled: false };
1374
+ },
1375
+ waitForIdle: async () => {
1376
+ if (!mutated) {
1377
+ mutated = true;
1378
+ const invalidContent = validContent.replace("max_iterations: 2", "max_iterations: two");
1379
+ writeFileSync(target.ralphPath, invalidContent, "utf8");
1380
+ }
1381
+ },
1382
+ };
1383
+
1384
+ await handler(`--path ${target.ralphPath}`, ctx);
1385
+
1386
+ assert.equal(execCalls, expectedExecCalls);
1387
+ assert.ok(
1388
+ notifications.some(
1389
+ ({ level, message }) => level === "error" && message.includes("Invalid RALPH.md on iteration 2"),
1390
+ ),
1391
+ );
1392
+ });
1393
+
1394
+ test("/ralph uses follow-up delivery for later iterations that resume a busy session", async (t) => {
1395
+ const cwd = createTempDir();
1396
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1397
+
1398
+ const task = "Fix flaky auth tests";
1399
+ const target = createTarget(cwd, task);
1400
+ const draft = generateDraft(task, target, {
1401
+ packageManager: "npm",
1402
+ testCommand: "npm test",
1403
+ lintCommand: "npm run lint",
1404
+ hasGit: true,
1405
+ topLevelDirs: ["src", "tests"],
1406
+ topLevelFiles: ["package.json"],
1407
+ });
1408
+ mkdirSync(target.dirPath, { recursive: true });
1409
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 2"), "utf8");
1410
+
1411
+ const notifications: Array<{ message: string; level: string }> = [];
1412
+ let newSessionCalls = 0;
1413
+ const harness = createHarness();
1414
+
1415
+ const handler = harness.handler("ralph");
1416
+ const ctx = {
1417
+ cwd,
1418
+ hasUI: false,
1419
+ ui: {
1420
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1421
+ select: async () => {
1422
+ throw new Error("should not prompt");
1423
+ },
1424
+ input: async () => {
1425
+ throw new Error("should not prompt");
1426
+ },
1427
+ editor: async () => undefined,
1428
+ setStatus: () => undefined,
1429
+ },
1430
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
1431
+ newSession: async () => {
1432
+ newSessionCalls += 1;
1433
+ return { cancelled: false };
1434
+ },
1435
+ waitForIdle: async () => undefined,
1436
+ };
1437
+
1438
+ await handler(`--path ${target.ralphPath}`, ctx);
1439
+
1440
+ assert.equal(newSessionCalls, 2);
1441
+ assert.ok(
1442
+ notifications.some(({ message }) =>
1443
+ message.includes("Ralph loop reached max iterations: 2 iterations") || message.includes("Ralph loop exhausted without verified progress: 2 iterations"),
1444
+ ),
1445
+ );
1446
+ assert.equal(notifications.some(({ level, message }) => level === "error" && message.includes("Ralph loop failed")), false);
1447
+ });
1448
+
1449
+ test("/ralph completes iterations when runtime session rebinds after newSession", { concurrency: false }, async (t) => {
1450
+ const cwd = createTempDir();
1451
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1452
+
1453
+ const task = "Fix flaky auth tests";
1454
+ const target = createTarget(cwd, task);
1455
+ const draft = generateDraft(task, target, {
1456
+ packageManager: "npm",
1457
+ testCommand: "npm test",
1458
+ lintCommand: "npm run lint",
1459
+ hasGit: true,
1460
+ topLevelDirs: ["src", "tests"],
1461
+ topLevelFiles: ["package.json"],
1462
+ });
1463
+ mkdirSync(target.dirPath, { recursive: true });
1464
+ writeFileSync(
1465
+ target.ralphPath,
1466
+ draft.content
1467
+ .replaceAll("timeout: 120", "timeout: 1")
1468
+ .replaceAll("timeout: 90", "timeout: 1")
1469
+ .replaceAll("timeout: 20", "timeout: 1")
1470
+ .replace("max_iterations: 25", "max_iterations: 1")
1471
+ .replace("timeout: 300\n", "timeout: 1\n"),
1472
+ "utf8",
1473
+ );
1474
+
1475
+ const notifications: Array<{ message: string; level: string }> = [];
1476
+ const harness = createHarness();
1477
+ const oldEntries: Array<any> = [];
1478
+ const newEntries: Array<any> = [];
1479
+ const handler = harness.handler("ralph");
1480
+ const oldRuntimeCtx = createRuntimeSession(oldEntries, "session-a", async () => {
1481
+ throw new Error("runtime should rebind before the agent runs");
1482
+ });
1483
+ const newRuntimeCtx = createRuntimeSession(newEntries, "session-b", async () => {
1484
+ mkdirSync(join(target.dirPath, "notes"), { recursive: true });
1485
+ writeFileSync(join(target.dirPath, "notes", "findings.md"), "persisted change\n", "utf8");
1486
+ });
1487
+ let runtimeCtx = oldRuntimeCtx;
1488
+ const ctx: any = {
1489
+ cwd,
1490
+ hasUI: false,
1491
+ ui: {
1492
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1493
+ select: async () => {
1494
+ throw new Error("should not prompt");
1495
+ },
1496
+ input: async () => {
1497
+ throw new Error("should not prompt");
1498
+ },
1499
+ editor: async () => undefined,
1500
+ setStatus: () => undefined,
1501
+ },
1502
+ getRuntimeCtx: () => runtimeCtx,
1503
+ sessionManager: createSessionManager(oldEntries, "session-a"),
1504
+ newSession: async () => {
1505
+ runtimeCtx = newRuntimeCtx;
1506
+ return { cancelled: false };
1507
+ },
1508
+ waitForIdle: async () => {
1509
+ throw new Error("command ctx should stay stale after newSession");
1510
+ },
1511
+ };
1512
+
1513
+ await handler(`--path ${target.ralphPath}`, ctx);
1514
+
1515
+ const finalState = latestLoopState(newEntries);
1516
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, true);
1517
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, ["notes/findings.md"]);
1518
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 durable progress: notes/findings.md")));
1519
+ assert.equal(notifications.some(({ message }) => message.includes("timed out")), false);
1520
+ });
1521
+
1522
+ test("tool_call scopes guardrails to the session with the active persisted Ralph token", { concurrency: false }, async () => {
1523
+ const harness = createHarness();
1524
+ const toolCall = harness.event("tool_call");
1525
+ const loopToken = "loop-rebind-token";
1526
+ const protectedPath = "src/generated/output.ts";
1527
+ const oldCtx = {
1528
+ sessionManager: {
1529
+ getEntries: () => [
1530
+ {
1531
+ type: "custom",
1532
+ customType: "ralph-loop-state",
1533
+ data: {
1534
+ active: false,
1535
+ loopToken,
1536
+ iteration: 1,
1537
+ guardrails: { blockCommands: [], protectedFiles: ["src/generated/**"] },
1538
+ },
1539
+ },
1540
+ ],
1541
+ getSessionFile: () => "session-a",
1542
+ },
1543
+ };
1544
+ const activeCtx = {
1545
+ sessionManager: {
1546
+ getEntries: () => [
1547
+ {
1548
+ type: "custom",
1549
+ customType: "ralph-loop-state",
1550
+ data: {
1551
+ active: true,
1552
+ loopToken,
1553
+ iteration: 1,
1554
+ guardrails: { blockCommands: [], protectedFiles: ["src/generated/**"] },
1555
+ },
1556
+ },
1557
+ ],
1558
+ getSessionFile: () => "session-b",
1559
+ },
1560
+ };
1561
+
1562
+ const inactiveResult = await toolCall({ toolName: "write", input: { path: protectedPath } }, oldCtx);
1563
+ const activeResult = await toolCall({ toolName: "write", input: { path: protectedPath } }, activeCtx);
1564
+
1565
+ assert.equal(inactiveResult, undefined);
1566
+ assert.deepEqual(activeResult, { block: true, reason: `ralph: ${protectedPath} is protected` });
1567
+ });
1568
+
1569
+ test("tool_call blocks when durable status is restrictive even if env contract is permissive", { concurrency: false }, async (t) => {
1570
+ const cwd = createTempDir();
1571
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1572
+
1573
+ const taskDir = join(cwd, "task");
1574
+ mkdirSync(taskDir, { recursive: true });
1575
+ const durableStatus: RunnerStatusFile = {
1576
+ loopToken: "loop-status-token",
1577
+ ralphPath: join(taskDir, "RALPH.md"),
1578
+ taskDir,
1579
+ cwd: taskDir,
1580
+ status: "running",
1581
+ currentIteration: 2,
1582
+ maxIterations: 4,
1583
+ timeout: 300,
1584
+ startedAt: new Date().toISOString(),
1585
+ guardrails: { blockCommands: ["git\\s+push"], protectedFiles: ["src/generated/**"] },
1586
+ };
1587
+ writeStatusFile(taskDir, durableStatus);
1588
+
1589
+ const harness = createHarness();
1590
+ const toolCall = harness.event("tool_call");
1591
+ const restoreEnv = setRunnerEnv({
1592
+ RALPH_RUNNER_TASK_DIR: taskDir,
1593
+ RALPH_RUNNER_CWD: taskDir,
1594
+ RALPH_RUNNER_LOOP_TOKEN: "loop-status-token",
1595
+ RALPH_RUNNER_CURRENT_ITERATION: "2",
1596
+ RALPH_RUNNER_MAX_ITERATIONS: "4",
1597
+ RALPH_RUNNER_NO_PROGRESS_STREAK: "0",
1598
+ RALPH_RUNNER_GUARDRAILS: JSON.stringify({ blockCommands: [], protectedFiles: [] }),
1599
+ });
1600
+
1601
+ try {
1602
+ const result = await toolCall({ toolName: "write", input: { path: "src/generated/output.ts" } }, {
1603
+ sessionManager: {
1604
+ getEntries: () => [],
1605
+ getSessionFile: () => "session-a",
1606
+ },
1607
+ });
1608
+
1609
+ assert.equal(result?.block, true);
1610
+ } finally {
1611
+ restoreEnv();
1612
+ }
1613
+ });
1614
+
1615
+
1616
+ test("/ralph still resolves completion_promise after runtime session rebinding", { concurrency: false }, async (t) => {
1617
+ const cwd = createTempDir();
1618
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1619
+
1620
+ const task = "Fix flaky auth tests";
1621
+ const target = createTarget(cwd, task);
1622
+ const draft = generateDraft(task, target, {
1623
+ packageManager: "npm",
1624
+ testCommand: "npm test",
1625
+ lintCommand: "npm run lint",
1626
+ hasGit: true,
1627
+ topLevelDirs: ["src", "tests"],
1628
+ topLevelFiles: ["package.json"],
1629
+ });
1630
+ mkdirSync(target.dirPath, { recursive: true });
1631
+ writeFileSync(
1632
+ target.ralphPath,
1633
+ draft.content
1634
+ .replaceAll("timeout: 120", "timeout: 1")
1635
+ .replaceAll("timeout: 90", "timeout: 1")
1636
+ .replaceAll("timeout: 20", "timeout: 1")
1637
+ .replace("max_iterations: 25", "max_iterations: 2")
1638
+ .replace("timeout: 300\n", "timeout: 1\ncompletion_promise: done\n"),
1639
+ "utf8",
1640
+ );
1641
+
1642
+ const notifications: Array<{ message: string; level: string }> = [];
1643
+ const harness = createHarness();
1644
+ const oldEntries: Array<any> = [];
1645
+ const newEntries: Array<any> = [];
1646
+ const handler = harness.handler("ralph");
1647
+ const oldRuntimeCtx = createRuntimeSession(oldEntries, "session-a", async () => {
1648
+ throw new Error("runtime should rebind before the agent runs");
1649
+ });
1650
+ const newRuntimeCtx = createRuntimeSession(newEntries, "session-b", async () => {
1651
+ mkdirSync(join(target.dirPath, "notes"), { recursive: true });
1652
+ writeFileSync(join(target.dirPath, "notes", "findings.md"), "persisted change\n", "utf8");
1653
+ newEntries.push({
1654
+ type: "message",
1655
+ message: { role: "assistant", content: [{ type: "text", text: "<promise>done</promise>" }] },
1656
+ });
1657
+ });
1658
+ let runtimeCtx = oldRuntimeCtx;
1659
+ const ctx: any = {
1660
+ cwd,
1661
+ hasUI: false,
1662
+ ui: {
1663
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1664
+ select: async () => {
1665
+ throw new Error("should not prompt");
1666
+ },
1667
+ input: async () => {
1668
+ throw new Error("should not prompt");
1669
+ },
1670
+ editor: async () => undefined,
1671
+ setStatus: () => undefined,
1672
+ },
1673
+ getRuntimeCtx: () => runtimeCtx,
1674
+ sessionManager: createSessionManager(oldEntries, "session-a"),
1675
+ newSession: async () => {
1676
+ runtimeCtx = newRuntimeCtx;
1677
+ return { cancelled: false };
1678
+ },
1679
+ waitForIdle: async () => {
1680
+ throw new Error("command ctx should stay stale after newSession");
1681
+ },
1682
+ };
1683
+
1684
+ await handler(`--path ${target.ralphPath}`, ctx);
1685
+
1686
+ const finalState = latestLoopState(newEntries);
1687
+ assert.equal(finalState?.iterationSummaries?.length, 1);
1688
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, true);
1689
+ });
1690
+
1691
+ test("/ralph records durable progress from task-directory file mutations", async (t) => {
1692
+ const cwd = createTempDir();
1693
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1694
+
1695
+ const task = "Fix flaky auth tests";
1696
+ const target = createTarget(cwd, task);
1697
+ const draft = generateDraft(task, target, {
1698
+ packageManager: "npm",
1699
+ testCommand: "npm test",
1700
+ lintCommand: "npm run lint",
1701
+ hasGit: true,
1702
+ topLevelDirs: ["src", "tests"],
1703
+ topLevelFiles: ["package.json"],
1704
+ });
1705
+ mkdirSync(target.dirPath, { recursive: true });
1706
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
1707
+
1708
+ const notifications: Array<{ message: string; level: string }> = [];
1709
+ const harness = createHarness();
1710
+ const entries = harness.appendedEntries;
1711
+ const handler = harness.handler("ralph");
1712
+ const ctx = {
1713
+ cwd,
1714
+ hasUI: false,
1715
+ ui: {
1716
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1717
+ select: async () => {
1718
+ throw new Error("should not prompt");
1719
+ },
1720
+ input: async () => {
1721
+ throw new Error("should not prompt");
1722
+ },
1723
+ editor: async () => undefined,
1724
+ setStatus: () => undefined,
1725
+ },
1726
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
1727
+ newSession: async () => ({ cancelled: false }),
1728
+ waitForIdle: async () => {
1729
+ mkdirSync(join(target.dirPath, "notes"), { recursive: true });
1730
+ writeFileSync(join(target.dirPath, "notes", "findings.md"), "persisted change\n", "utf8");
1731
+ },
1732
+ };
1733
+
1734
+ await handler(`--path ${target.ralphPath}`, ctx);
1735
+
1736
+ const finalState = latestLoopState(entries);
1737
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, true);
1738
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, ["notes/findings.md"]);
1739
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 0);
1740
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 durable progress: notes/findings.md")));
1741
+ });
1742
+
1743
+ test("/ralph confirms late task-dir writes after agent_end with a bounded snapshot poll even without observed write/edit tool results", async (t) => {
1744
+ const cwd = createTempDir();
1745
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1746
+
1747
+ const task = "Fix flaky auth tests";
1748
+ const target = createTarget(cwd, task);
1749
+ const draft = generateDraft(task, target, {
1750
+ packageManager: "npm",
1751
+ testCommand: "npm test",
1752
+ lintCommand: "npm run lint",
1753
+ hasGit: true,
1754
+ topLevelDirs: ["src", "tests"],
1755
+ topLevelFiles: ["package.json"],
1756
+ });
1757
+ mkdirSync(target.dirPath, { recursive: true });
1758
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
1759
+
1760
+ const notifications: Array<{ message: string; level: string }> = [];
1761
+ const harness = createHarness();
1762
+ const entries = harness.appendedEntries;
1763
+ const handler = harness.handler("ralph");
1764
+ let lateWriteScheduled = false;
1765
+ const ctx = {
1766
+ cwd,
1767
+ hasUI: false,
1768
+ ui: {
1769
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1770
+ select: async () => {
1771
+ throw new Error("should not prompt");
1772
+ },
1773
+ input: async () => {
1774
+ throw new Error("should not prompt");
1775
+ },
1776
+ editor: async () => undefined,
1777
+ setStatus: () => undefined,
1778
+ },
1779
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
1780
+ newSession: async () => ({ cancelled: false }),
1781
+ waitForIdle: async () => {
1782
+ if (!lateWriteScheduled) {
1783
+ lateWriteScheduled = true;
1784
+ setTimeout(() => {
1785
+ mkdirSync(join(target.dirPath, "notes"), { recursive: true });
1786
+ writeFileSync(join(target.dirPath, "notes", "findings.md"), "persisted change\n", "utf8");
1787
+ }, 40);
1788
+ }
1789
+ },
1790
+ };
1791
+
1792
+ await handler(`--path ${target.ralphPath}`, ctx);
1793
+
1794
+ const finalState = latestLoopState(entries);
1795
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, true);
1796
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, ["notes/findings.md"]);
1797
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 0);
1798
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 durable progress: notes/findings.md")));
1799
+ });
1800
+
1801
+ test("/ralph downgrades observed task-dir edits without a final diff to unknown progress", async (t) => {
1802
+ const cwd = createTempDir();
1803
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1804
+
1805
+ const task = "Fix flaky auth tests";
1806
+ const target = createTarget(cwd, task);
1807
+ const draft = generateDraft(task, target, {
1808
+ packageManager: "npm",
1809
+ testCommand: "npm test",
1810
+ lintCommand: "npm run lint",
1811
+ hasGit: true,
1812
+ topLevelDirs: ["src", "tests"],
1813
+ topLevelFiles: ["package.json"],
1814
+ });
1815
+ mkdirSync(target.dirPath, { recursive: true });
1816
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
1817
+
1818
+ const notifications: Array<{ message: string; level: string }> = [];
1819
+ const harness = createHarness();
1820
+ const entries = harness.appendedEntries;
1821
+ const toolCall = harness.event("tool_call");
1822
+ const toolExecutionEnd = harness.event("tool_execution_end");
1823
+ const handler = harness.handler("ralph");
1824
+ let ctx: any;
1825
+ ctx = {
1826
+ cwd,
1827
+ hasUI: false,
1828
+ ui: {
1829
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1830
+ select: async () => {
1831
+ throw new Error("should not prompt");
1832
+ },
1833
+ input: async () => {
1834
+ throw new Error("should not prompt");
1835
+ },
1836
+ editor: async () => undefined,
1837
+ setStatus: () => undefined,
1838
+ },
1839
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
1840
+ newSession: async () => ({ cancelled: false }),
1841
+ waitForIdle: async () => {
1842
+ await toolCall(
1843
+ {
1844
+ toolName: "edit",
1845
+ toolCallId: "edit-call-1",
1846
+ input: { path: join(target.dirPath, "notes", "findings.md") },
1847
+ },
1848
+ ctx,
1849
+ );
1850
+ await toolExecutionEnd(
1851
+ {
1852
+ toolName: "edit",
1853
+ toolCallId: "edit-call-1",
1854
+ isError: false,
1855
+ },
1856
+ ctx,
1857
+ );
1858
+ },
1859
+ };
1860
+
1861
+ await handler(`--path ${target.ralphPath}`, ctx);
1862
+
1863
+ const finalState = latestLoopState(entries);
1864
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, "unknown");
1865
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, []);
1866
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 0);
1867
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 durable progress could not be verified")));
1868
+ assert.equal(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")), false);
1869
+ });
1870
+
1871
+ test("/ralph still reports no progress when no task-dir write or edit activity was observed", async (t) => {
1872
+ const cwd = createTempDir();
1873
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1874
+
1875
+ const task = "Fix flaky auth tests";
1876
+ const target = createTarget(cwd, task);
1877
+ const draft = generateDraft(task, target, {
1878
+ packageManager: "npm",
1879
+ testCommand: "npm test",
1880
+ lintCommand: "npm run lint",
1881
+ hasGit: true,
1882
+ topLevelDirs: ["src", "tests"],
1883
+ topLevelFiles: ["package.json"],
1884
+ });
1885
+ mkdirSync(target.dirPath, { recursive: true });
1886
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
1887
+
1888
+ const notifications: Array<{ message: string; level: string }> = [];
1889
+ const harness = createHarness();
1890
+ const entries = harness.appendedEntries;
1891
+ const handler = harness.handler("ralph");
1892
+ const ctx = {
1893
+ cwd,
1894
+ hasUI: false,
1895
+ ui: {
1896
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1897
+ select: async () => {
1898
+ throw new Error("should not prompt");
1899
+ },
1900
+ input: async () => {
1901
+ throw new Error("should not prompt");
1902
+ },
1903
+ editor: async () => undefined,
1904
+ setStatus: () => undefined,
1905
+ },
1906
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
1907
+ newSession: async () => ({ cancelled: false }),
1908
+ waitForIdle: async () => undefined,
1909
+ };
1910
+
1911
+ await handler(`--path ${target.ralphPath}`, ctx);
1912
+
1913
+ const finalState = latestLoopState(entries);
1914
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, false);
1915
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, []);
1916
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 1);
1917
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")));
1918
+ });
1919
+
1920
+ test("/ralph ignores observed write activity outside the Ralph task directory", async (t) => {
1921
+ const cwd = createTempDir();
1922
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1923
+
1924
+ const task = "Fix flaky auth tests";
1925
+ const target = createTarget(cwd, task);
1926
+ const draft = generateDraft(task, target, {
1927
+ packageManager: "npm",
1928
+ testCommand: "npm test",
1929
+ lintCommand: "npm run lint",
1930
+ hasGit: true,
1931
+ topLevelDirs: ["src", "tests"],
1932
+ topLevelFiles: ["package.json"],
1933
+ });
1934
+ mkdirSync(target.dirPath, { recursive: true });
1935
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
1936
+
1937
+ const notifications: Array<{ message: string; level: string }> = [];
1938
+ const harness = createHarness();
1939
+ const entries = harness.appendedEntries;
1940
+ const toolCall = harness.event("tool_call");
1941
+ const toolExecutionEnd = harness.event("tool_execution_end");
1942
+ const handler = harness.handler("ralph");
1943
+ const outsidePath = join(cwd, "outside.txt");
1944
+ let ctx: any;
1945
+ ctx = {
1946
+ cwd,
1947
+ hasUI: false,
1948
+ ui: {
1949
+ notify: (message: string, level: string) => notifications.push({ message, level }),
1950
+ select: async () => {
1951
+ throw new Error("should not prompt");
1952
+ },
1953
+ input: async () => {
1954
+ throw new Error("should not prompt");
1955
+ },
1956
+ editor: async () => undefined,
1957
+ setStatus: () => undefined,
1958
+ },
1959
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
1960
+ newSession: async () => ({ cancelled: false }),
1961
+ waitForIdle: async () => {
1962
+ await toolCall(
1963
+ {
1964
+ toolName: "write",
1965
+ toolCallId: "write-call-1",
1966
+ input: { path: outsidePath },
1967
+ },
1968
+ ctx,
1969
+ );
1970
+ await toolExecutionEnd(
1971
+ {
1972
+ toolName: "write",
1973
+ toolCallId: "write-call-1",
1974
+ isError: false,
1975
+ },
1976
+ ctx,
1977
+ );
1978
+ writeFileSync(outsidePath, "outside\n", "utf8");
1979
+ },
1980
+ };
1981
+
1982
+ await handler(`--path ${target.ralphPath}`, ctx);
1983
+
1984
+ const finalState = latestLoopState(entries);
1985
+ assert.equal(existsSync(outsidePath), true);
1986
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, false);
1987
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, []);
1988
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 1);
1989
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")));
1990
+ });
1991
+
1992
+ test("/ralph does not count pre-agent command mutations as durable progress", async (t) => {
1993
+ const cwd = createTempDir();
1994
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
1995
+
1996
+ const task = "Fix flaky auth tests";
1997
+ const target = createTarget(cwd, task);
1998
+ const draft = generateDraft(task, target, {
1999
+ packageManager: "npm",
2000
+ testCommand: "npm test",
2001
+ lintCommand: "npm run lint",
2002
+ hasGit: true,
2003
+ topLevelDirs: ["src", "tests"],
2004
+ topLevelFiles: ["package.json"],
2005
+ });
2006
+ mkdirSync(target.dirPath, { recursive: true });
2007
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
2008
+
2009
+ const notifications: Array<{ message: string; level: string }> = [];
2010
+ const harness = createHarness({
2011
+ exec: async () => {
2012
+ writeFileSync(join(target.dirPath, "command-log.txt"), "from command\n", "utf8");
2013
+ return { killed: false, stdout: "ok", stderr: "" };
2014
+ },
2015
+ });
2016
+ const entries = harness.appendedEntries;
2017
+ const handler = harness.handler("ralph");
2018
+ const ctx = {
2019
+ cwd,
2020
+ hasUI: false,
2021
+ ui: {
2022
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2023
+ select: async () => {
2024
+ throw new Error("should not prompt");
2025
+ },
2026
+ input: async () => {
2027
+ throw new Error("should not prompt");
2028
+ },
2029
+ editor: async () => undefined,
2030
+ setStatus: () => undefined,
2031
+ },
2032
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2033
+ newSession: async () => ({ cancelled: false }),
2034
+ waitForIdle: async () => undefined,
2035
+ };
2036
+
2037
+ await handler(`--path ${target.ralphPath}`, ctx);
2038
+
2039
+ const finalState = latestLoopState(entries);
2040
+ assert.equal(existsSync(join(target.dirPath, "command-log.txt")), true);
2041
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, false);
2042
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, []);
2043
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 1);
2044
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")));
2045
+ });
2046
+
2047
+ test("/ralph does not count RALPH_PROGRESS.md churn as durable progress", async (t) => {
2048
+ const cwd = createTempDir();
2049
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2050
+
2051
+ const task = "Fix flaky auth tests";
2052
+ const target = createTarget(cwd, task);
2053
+ const draft = generateDraft(task, target, {
2054
+ packageManager: "npm",
2055
+ testCommand: "npm test",
2056
+ lintCommand: "npm run lint",
2057
+ hasGit: true,
2058
+ topLevelDirs: ["src", "tests"],
2059
+ topLevelFiles: ["package.json"],
2060
+ });
2061
+ mkdirSync(target.dirPath, { recursive: true });
2062
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
2063
+
2064
+ const notifications: Array<{ message: string; level: string }> = [];
2065
+ const harness = createHarness({
2066
+ exec: async () => {
2067
+ writeFileSync(join(target.dirPath, "RALPH_PROGRESS.md"), "rolling note\n", "utf8");
2068
+ return { killed: false, stdout: "ok", stderr: "" };
2069
+ },
2070
+ });
2071
+ const entries = harness.appendedEntries;
2072
+ const handler = harness.handler("ralph");
2073
+ const ctx = {
2074
+ cwd,
2075
+ hasUI: false,
2076
+ ui: {
2077
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2078
+ select: async () => {
2079
+ throw new Error("should not prompt");
2080
+ },
2081
+ input: async () => {
2082
+ throw new Error("should not prompt");
2083
+ },
2084
+ editor: async () => undefined,
2085
+ setStatus: () => undefined,
2086
+ },
2087
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2088
+ newSession: async () => ({ cancelled: false }),
2089
+ waitForIdle: async () => undefined,
2090
+ };
2091
+
2092
+ await handler(`--path ${target.ralphPath}`, ctx);
2093
+
2094
+ const finalState = latestLoopState(entries);
2095
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, false);
2096
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, []);
2097
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 1);
2098
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")));
2099
+ });
2100
+
2101
+ test("/ralph reports non-success when all iterations exhaust with unknown durable progress", async (t) => {
2102
+ const cwd = createTempDir();
2103
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2104
+
2105
+ const task = "Fix flaky auth tests";
2106
+ const target = createTarget(cwd, task);
2107
+ const draft = generateDraft(task, target, {
2108
+ packageManager: "npm",
2109
+ testCommand: "npm test",
2110
+ lintCommand: "npm run lint",
2111
+ hasGit: true,
2112
+ topLevelDirs: ["src", "tests"],
2113
+ topLevelFiles: ["package.json"],
2114
+ });
2115
+ mkdirSync(target.dirPath, { recursive: true });
2116
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
2117
+ for (let i = 0; i < 205; i++) {
2118
+ writeFileSync(join(target.dirPath, `note-${String(i).padStart(3, "0")}.txt`), `seed ${i}\n`, "utf8");
2119
+ }
2120
+
2121
+ const notifications: Array<{ message: string; level: string }> = [];
2122
+ const harness = createHarness();
2123
+ const entries = harness.appendedEntries;
2124
+ const handler = harness.handler("ralph");
2125
+ const ctx = {
2126
+ cwd,
2127
+ hasUI: false,
2128
+ ui: {
2129
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2130
+ select: async () => {
2131
+ throw new Error("should not prompt");
2132
+ },
2133
+ input: async () => {
2134
+ throw new Error("should not prompt");
2135
+ },
2136
+ editor: async () => undefined,
2137
+ setStatus: () => undefined,
2138
+ },
2139
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2140
+ newSession: async () => ({ cancelled: false }),
2141
+ waitForIdle: async () => undefined,
2142
+ };
2143
+
2144
+ await handler(`--path ${target.ralphPath}`, ctx);
2145
+
2146
+ const finalState = latestLoopState(entries);
2147
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, "unknown");
2148
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, []);
2149
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 0);
2150
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 durable progress could not be verified")));
2151
+ assert.equal(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")), false);
2152
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop exhausted without verified progress: 1 iterations")));
2153
+ assert.equal(notifications.some(({ message }) => message.includes("Ralph loop reached max iterations")), false);
2154
+ });
2155
+
2156
+ test("/ralph reports non-success when false and unknown progress exhaust without any verified progress", async (t) => {
2157
+ const cwd = createTempDir();
2158
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2159
+
2160
+ const task = "Fix flaky auth tests";
2161
+ const target = createTarget(cwd, task);
2162
+ const draft = generateDraft(task, target, {
2163
+ packageManager: "npm",
2164
+ testCommand: "npm test",
2165
+ lintCommand: "npm run lint",
2166
+ hasGit: true,
2167
+ topLevelDirs: ["src", "tests"],
2168
+ topLevelFiles: ["package.json"],
2169
+ });
2170
+ mkdirSync(target.dirPath, { recursive: true });
2171
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 2"), "utf8");
2172
+
2173
+ const notifications: Array<{ message: string; level: string }> = [];
2174
+ let newSessionCalls = 0;
2175
+ let seededUnknownState = false;
2176
+ const harness = createHarness({
2177
+ exec: async () => {
2178
+ if (newSessionCalls === 2 && !seededUnknownState) {
2179
+ seededUnknownState = true;
2180
+ for (let i = 0; i < 205; i++) {
2181
+ writeFileSync(join(target.dirPath, `note-${String(i).padStart(3, "0")}.txt`), `seed ${i}\n`, "utf8");
2182
+ }
2183
+ }
2184
+ return { killed: false, stdout: "ok", stderr: "" };
2185
+ },
2186
+ });
2187
+ const entries = harness.appendedEntries;
2188
+ const handler = harness.handler("ralph");
2189
+ const ctx = {
2190
+ cwd,
2191
+ hasUI: false,
2192
+ ui: {
2193
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2194
+ select: async () => {
2195
+ throw new Error("should not prompt");
2196
+ },
2197
+ input: async () => {
2198
+ throw new Error("should not prompt");
2199
+ },
2200
+ editor: async () => undefined,
2201
+ setStatus: () => undefined,
2202
+ },
2203
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2204
+ newSession: async () => {
2205
+ newSessionCalls += 1;
2206
+ return { cancelled: false };
2207
+ },
2208
+ waitForIdle: async () => undefined,
2209
+ };
2210
+
2211
+ await handler(`--path ${target.ralphPath}`, ctx);
2212
+
2213
+ const finalState = latestLoopState(entries);
2214
+ assert.equal(finalState?.iterationSummaries?.length, 2);
2215
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, false);
2216
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 1);
2217
+ assert.equal(finalState?.iterationSummaries?.[1]?.progress, "unknown");
2218
+ assert.equal(finalState?.iterationSummaries?.[1]?.noProgressStreak, 1);
2219
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")));
2220
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 2 durable progress could not be verified")));
2221
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop exhausted without verified progress: 2 iterations")));
2222
+ assert.equal(notifications.some(({ message }) => message.includes("Ralph loop reached max iterations")), false);
2223
+ });
2224
+
2225
+ test("/ralph treats byte-budget snapshot truncation as unknown progress and a non-success exhaustion", async (t) => {
2226
+ const cwd = createTempDir();
2227
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2228
+
2229
+ const task = "Fix flaky auth tests";
2230
+ const target = createTarget(cwd, task);
2231
+ const draft = generateDraft(task, target, {
2232
+ packageManager: "npm",
2233
+ testCommand: "npm test",
2234
+ lintCommand: "npm run lint",
2235
+ hasGit: true,
2236
+ topLevelDirs: ["src", "tests"],
2237
+ topLevelFiles: ["package.json"],
2238
+ });
2239
+ mkdirSync(target.dirPath, { recursive: true });
2240
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
2241
+ const largeContent = "x".repeat(800_000);
2242
+ for (let i = 0; i < 3; i++) {
2243
+ writeFileSync(join(target.dirPath, `large-${i}.txt`), largeContent, "utf8");
2244
+ }
2245
+
2246
+ const notifications: Array<{ message: string; level: string }> = [];
2247
+ const harness = createHarness();
2248
+ const entries = harness.appendedEntries;
2249
+ const handler = harness.handler("ralph");
2250
+ const ctx = {
2251
+ cwd,
2252
+ hasUI: false,
2253
+ ui: {
2254
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2255
+ select: async () => {
2256
+ throw new Error("should not prompt");
2257
+ },
2258
+ input: async () => {
2259
+ throw new Error("should not prompt");
2260
+ },
2261
+ editor: async () => undefined,
2262
+ setStatus: () => undefined,
2263
+ },
2264
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2265
+ newSession: async () => ({ cancelled: false }),
2266
+ waitForIdle: async () => undefined,
2267
+ };
2268
+
2269
+ await handler(`--path ${target.ralphPath}`, ctx);
2270
+
2271
+ const finalState = latestLoopState(entries);
2272
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, "unknown");
2273
+ assert.deepEqual(finalState?.iterationSummaries?.[0]?.changedFiles, []);
2274
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 durable progress could not be verified (snapshot truncated)")));
2275
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop exhausted without verified progress: 1 iterations")));
2276
+ assert.equal(notifications.some(({ message }) => message.includes("Ralph loop reached max iterations")), false);
2277
+ });
2278
+
2279
+ test("/ralph can stop on completion_promise when durable progress detection is unknown", async (t) => {
2280
+ const cwd = createTempDir();
2281
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2282
+
2283
+ const task = "Fix flaky auth tests";
2284
+ const target = createTarget(cwd, task);
2285
+ const draft = generateDraft(task, target, {
2286
+ packageManager: "npm",
2287
+ testCommand: "npm test",
2288
+ lintCommand: "npm run lint",
2289
+ hasGit: true,
2290
+ topLevelDirs: ["src", "tests"],
2291
+ topLevelFiles: ["package.json"],
2292
+ });
2293
+ mkdirSync(target.dirPath, { recursive: true });
2294
+ writeFileSync(
2295
+ target.ralphPath,
2296
+ draft.content
2297
+ .replace("max_iterations: 25", "max_iterations: 2")
2298
+ .replace("timeout: 300\n", "timeout: 300\ncompletion_promise: done\n"),
2299
+ "utf8",
2300
+ );
2301
+ for (let i = 0; i < 205; i++) {
2302
+ writeFileSync(join(target.dirPath, `note-${String(i).padStart(3, "0")}.txt`), `seed ${i}\n`, "utf8");
2303
+ }
2304
+
2305
+ const notifications: Array<{ message: string; level: string }> = [];
2306
+ const harness = createHarness();
2307
+ const entries = harness.appendedEntries;
2308
+ let waitCalls = 0;
2309
+ const handler = harness.handler("ralph");
2310
+ const ctx = {
2311
+ cwd,
2312
+ hasUI: false,
2313
+ ui: {
2314
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2315
+ select: async () => {
2316
+ throw new Error("should not prompt");
2317
+ },
2318
+ input: async () => {
2319
+ throw new Error("should not prompt");
2320
+ },
2321
+ editor: async () => undefined,
2322
+ setStatus: () => undefined,
2323
+ },
2324
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2325
+ newSession: async () => ({ cancelled: false }),
2326
+ waitForIdle: async () => {
2327
+ waitCalls += 1;
2328
+ entries.push({
2329
+ type: "message",
2330
+ message: { role: "assistant", content: [{ type: "text", text: "<promise>done</promise>" }] },
2331
+ });
2332
+ },
2333
+ };
2334
+
2335
+ await handler(`--path ${target.ralphPath}`, ctx);
2336
+
2337
+ const finalState = latestLoopState(entries);
2338
+ assert.equal(waitCalls, 1);
2339
+ assert.equal(finalState?.iterationSummaries?.length, 1);
2340
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, "unknown");
2341
+ assert.equal(finalState?.iterationSummaries?.[0]?.noProgressStreak, 0);
2342
+ });
2343
+
2344
+ test("/ralph matches completion_promise from agent_end messages instead of session entry slices", async (t) => {
2345
+ const cwd = createTempDir();
2346
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2347
+
2348
+ const task = "Fix flaky auth tests";
2349
+ const target = createTarget(cwd, task);
2350
+ const draft = generateDraft(task, target, {
2351
+ packageManager: "npm",
2352
+ testCommand: "npm test",
2353
+ lintCommand: "npm run lint",
2354
+ hasGit: true,
2355
+ topLevelDirs: ["src", "tests"],
2356
+ topLevelFiles: ["package.json"],
2357
+ });
2358
+ mkdirSync(target.dirPath, { recursive: true });
2359
+ writeFileSync(
2360
+ target.ralphPath,
2361
+ draft.content
2362
+ .replace("max_iterations: 25", "max_iterations: 2")
2363
+ .replace("timeout: 300\n", "timeout: 300\ncompletion_promise: done\n"),
2364
+ "utf8",
2365
+ );
2366
+
2367
+ const notifications: Array<{ message: string; level: string }> = [];
2368
+ const harness = createHarness();
2369
+ const entries = harness.appendedEntries;
2370
+ const handler = harness.handler("ralph");
2371
+ const ctx: any = {
2372
+ cwd,
2373
+ hasUI: false,
2374
+ ui: {
2375
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2376
+ select: async () => {
2377
+ throw new Error("should not prompt");
2378
+ },
2379
+ input: async () => {
2380
+ throw new Error("should not prompt");
2381
+ },
2382
+ editor: async () => undefined,
2383
+ setStatus: () => undefined,
2384
+ },
2385
+ agentEndMessages: [
2386
+ {
2387
+ type: "message",
2388
+ message: { role: "assistant", content: [{ type: "text", text: "<promise>done</promise>" }] },
2389
+ },
2390
+ ],
2391
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2392
+ newSession: async () => ({ cancelled: false }),
2393
+ waitForIdle: async () => undefined,
2394
+ };
2395
+
2396
+ await handler(`--path ${target.ralphPath}`, ctx);
2397
+
2398
+ const finalState = latestLoopState(entries);
2399
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, false);
2400
+ assert.equal(finalState?.iterationSummaries?.length, 2);
2401
+ assert.ok(
2402
+ notifications.some(({ message }) =>
2403
+ message.includes("Completion promise matched on iteration 1") && message.includes("no durable progress"),
2404
+ ),
2405
+ );
2406
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop exhausted without verified progress: 2 iterations")));
2407
+ });
2408
+
2409
+ test("/ralph scopes successful write/edit bookkeeping to the active loop session and iteration", async (t) => {
2410
+ const cwd = createTempDir();
2411
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2412
+
2413
+ const task = "Fix flaky auth tests";
2414
+ const target = createTarget(cwd, task);
2415
+ const draft = generateDraft(task, target, {
2416
+ packageManager: "npm",
2417
+ testCommand: "npm test",
2418
+ lintCommand: "npm run lint",
2419
+ hasGit: true,
2420
+ topLevelDirs: ["src", "tests"],
2421
+ topLevelFiles: ["package.json"],
2422
+ });
2423
+ mkdirSync(target.dirPath, { recursive: true });
2424
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 2"), "utf8");
2425
+
2426
+ const notifications: Array<{ message: string; level: string }> = [];
2427
+ const harness = createHarness();
2428
+ const entries = harness.appendedEntries;
2429
+ const toolCall = harness.event("tool_call");
2430
+ const toolExecutionEnd = harness.event("tool_execution_end");
2431
+ const handler = harness.handler("ralph");
2432
+ const sessionFiles = ["session-a", "session-b"];
2433
+ let sessionIndex = -1;
2434
+ let currentSessionFile = "session-a";
2435
+ let ctx: any;
2436
+ ctx = {
2437
+ cwd,
2438
+ hasUI: false,
2439
+ ui: {
2440
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2441
+ select: async () => {
2442
+ throw new Error("should not prompt");
2443
+ },
2444
+ input: async () => {
2445
+ throw new Error("should not prompt");
2446
+ },
2447
+ editor: async () => undefined,
2448
+ setStatus: () => undefined,
2449
+ },
2450
+ sessionManager: { getEntries: () => entries, getSessionFile: () => currentSessionFile },
2451
+ newSession: async () => {
2452
+ sessionIndex += 1;
2453
+ currentSessionFile = sessionFiles[sessionIndex] ?? sessionFiles[sessionFiles.length - 1]!;
2454
+ return { cancelled: false };
2455
+ },
2456
+ waitForIdle: async () => {
2457
+ if (currentSessionFile === "session-a") {
2458
+ await toolCall(
2459
+ {
2460
+ toolName: "write",
2461
+ toolCallId: "write-call-1",
2462
+ input: { path: join(target.dirPath, "notes", "findings.md") },
2463
+ },
2464
+ ctx,
2465
+ );
2466
+ await toolExecutionEnd(
2467
+ {
2468
+ toolName: "write",
2469
+ toolCallId: "write-call-1",
2470
+ isError: false,
2471
+ },
2472
+ ctx,
2473
+ );
2474
+ return;
2475
+ }
2476
+
2477
+ await toolExecutionEnd(
2478
+ {
2479
+ toolName: "write",
2480
+ toolCallId: "write-call-1",
2481
+ isError: false,
2482
+ },
2483
+ { ...ctx, sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" } },
2484
+ );
2485
+ },
2486
+ };
2487
+
2488
+ await handler(`--path ${target.ralphPath}`, ctx);
2489
+
2490
+ const finalState = latestLoopState(entries);
2491
+ assert.equal(finalState?.iterationSummaries?.length, 2);
2492
+ assert.equal(finalState?.iterationSummaries?.[0]?.progress, "unknown");
2493
+ assert.equal(finalState?.iterationSummaries?.[1]?.progress, false);
2494
+ assert.equal(finalState?.iterationSummaries?.[1]?.noProgressStreak, 1);
2495
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 durable progress could not be verified")));
2496
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 2 made no durable progress")));
2497
+ });
2498
+
2499
+ test("/ralph times out when agent_end never arrives", async (t) => {
2500
+ const cwd = createTempDir();
2501
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2502
+
2503
+ const task = "Fix flaky auth tests";
2504
+ const target = createTarget(cwd, task);
2505
+ mkdirSync(target.dirPath, { recursive: true });
2506
+ writeFileSync(
2507
+ target.ralphPath,
2508
+ [
2509
+ "---",
2510
+ "commands: []",
2511
+ "max_iterations: 1",
2512
+ "timeout: 1",
2513
+ "guardrails:",
2514
+ " block_commands: []",
2515
+ " protected_files: []",
2516
+ "---",
2517
+ "Task: Fix flaky auth tests",
2518
+ "",
2519
+ "Wait for the agent to finish.",
2520
+ ].join("\n"),
2521
+ "utf8",
2522
+ );
2523
+
2524
+ const notifications: Array<{ message: string; level: string }> = [];
2525
+ const harness = createHarness();
2526
+ const entries = harness.appendedEntries;
2527
+ const handler = harness.handler("ralph");
2528
+ let waitCalls = 0;
2529
+ const ctx: any = {
2530
+ cwd,
2531
+ hasUI: false,
2532
+ ui: {
2533
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2534
+ select: async () => {
2535
+ throw new Error("should not prompt");
2536
+ },
2537
+ input: async () => {
2538
+ throw new Error("should not prompt");
2539
+ },
2540
+ editor: async () => undefined,
2541
+ setStatus: () => undefined,
2542
+ },
2543
+ suppressAutoAgentEnd: true,
2544
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2545
+ newSession: async () => ({ cancelled: false }),
2546
+ waitForIdle: async () => {
2547
+ waitCalls += 1;
2548
+ await new Promise<void>(() => undefined);
2549
+ },
2550
+ };
2551
+
2552
+ await handler(`--path ${target.ralphPath}`, ctx);
2553
+
2554
+ assert.equal(waitCalls, 1);
2555
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 timed out after 1s, stopping loop")));
2556
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop stopped after a timeout: 1 iterations")));
2557
+ });
2558
+
2559
+ test("/ralph reports no-progress iterations in notifications and next-iteration handoff", async (t) => {
2560
+ const cwd = createTempDir();
2561
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2562
+
2563
+ const task = "Fix flaky auth tests";
2564
+ const target = createTarget(cwd, task);
2565
+ const draft = generateDraft(task, target, {
2566
+ packageManager: "npm",
2567
+ testCommand: "npm test",
2568
+ lintCommand: "npm run lint",
2569
+ hasGit: true,
2570
+ topLevelDirs: ["src", "tests"],
2571
+ topLevelFiles: ["package.json"],
2572
+ });
2573
+ mkdirSync(target.dirPath, { recursive: true });
2574
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 2"), "utf8");
2575
+
2576
+ const notifications: Array<{ message: string; level: string }> = [];
2577
+ const harness = createHarness();
2578
+ const entries = harness.appendedEntries;
2579
+ let waitCalls = 0;
2580
+ const handler = harness.handler("ralph");
2581
+ const ctx = {
2582
+ cwd,
2583
+ hasUI: false,
2584
+ ui: {
2585
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2586
+ select: async () => {
2587
+ throw new Error("should not prompt");
2588
+ },
2589
+ input: async () => {
2590
+ throw new Error("should not prompt");
2591
+ },
2592
+ editor: async () => undefined,
2593
+ setStatus: () => undefined,
2594
+ },
2595
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2596
+ newSession: async () => ({ cancelled: false }),
2597
+ waitForIdle: async () => {
2598
+ waitCalls += 1;
2599
+ },
2600
+ };
2601
+
2602
+ await handler(`--path ${target.ralphPath}`, ctx);
2603
+
2604
+ assert.equal(waitCalls, 2);
2605
+ assert.ok(notifications.some(({ message }) => message.includes("Iteration 1 made no durable progress")));
2606
+ });
2607
+
2608
+ test("/ralph ignores completion_promise matches when the iteration made no durable progress", async (t) => {
2609
+ const cwd = createTempDir();
2610
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2611
+
2612
+ const task = "Fix flaky auth tests";
2613
+ const target = createTarget(cwd, task);
2614
+ const draft = generateDraft(task, target, {
2615
+ packageManager: "npm",
2616
+ testCommand: "npm test",
2617
+ lintCommand: "npm run lint",
2618
+ hasGit: true,
2619
+ topLevelDirs: ["src", "tests"],
2620
+ topLevelFiles: ["package.json"],
2621
+ });
2622
+ mkdirSync(target.dirPath, { recursive: true });
2623
+ writeFileSync(
2624
+ target.ralphPath,
2625
+ draft.content
2626
+ .replace("max_iterations: 25", "max_iterations: 2")
2627
+ .replace("timeout: 300\n", "timeout: 300\ncompletion_promise: done\n"),
2628
+ "utf8",
2629
+ );
2630
+
2631
+ const notifications: Array<{ message: string; level: string }> = [];
2632
+ const harness = createHarness();
2633
+ const entries = harness.appendedEntries;
2634
+ let waitCalls = 0;
2635
+ const handler = harness.handler("ralph");
2636
+ const ctx = {
2637
+ cwd,
2638
+ hasUI: false,
2639
+ ui: {
2640
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2641
+ select: async () => {
2642
+ throw new Error("should not prompt");
2643
+ },
2644
+ input: async () => {
2645
+ throw new Error("should not prompt");
2646
+ },
2647
+ editor: async () => undefined,
2648
+ setStatus: () => undefined,
2649
+ },
2650
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2651
+ newSession: async () => ({ cancelled: false }),
2652
+ waitForIdle: async () => {
2653
+ waitCalls += 1;
2654
+ if (waitCalls === 1) {
2655
+ entries.push({
2656
+ type: "message",
2657
+ message: { role: "assistant", content: [{ type: "text", text: "<promise>done</promise>" }] },
2658
+ });
2659
+ }
2660
+ },
2661
+ };
2662
+
2663
+ await handler(`--path ${target.ralphPath}`, ctx);
2664
+
2665
+ assert.equal(waitCalls, 2);
2666
+ assert.ok(
2667
+ notifications.some(({ message }) =>
2668
+ message.includes("Completion promise matched on iteration 1") && message.includes("no durable progress"),
2669
+ ),
2670
+ );
2671
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop exhausted without verified progress")));
2672
+ });
2673
+
2674
+ test("/ralph reports non-success when the loop exhausts without verified progress", async (t) => {
2675
+ const cwd = createTempDir();
2676
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2677
+
2678
+ const task = "Fix flaky auth tests";
2679
+ const target = createTarget(cwd, task);
2680
+ const draft = generateDraft(task, target, {
2681
+ packageManager: "npm",
2682
+ testCommand: "npm test",
2683
+ lintCommand: "npm run lint",
2684
+ hasGit: true,
2685
+ topLevelDirs: ["src", "tests"],
2686
+ topLevelFiles: ["package.json"],
2687
+ });
2688
+ mkdirSync(target.dirPath, { recursive: true });
2689
+ writeFileSync(target.ralphPath, draft.content.replace("max_iterations: 25", "max_iterations: 1"), "utf8");
2690
+
2691
+ const notifications: Array<{ message: string; level: string }> = [];
2692
+ const harness = createHarness();
2693
+ const entries = harness.appendedEntries;
2694
+ const handler = harness.handler("ralph");
2695
+ const ctx = {
2696
+ cwd,
2697
+ hasUI: false,
2698
+ ui: {
2699
+ notify: (message: string, level: string) => notifications.push({ message, level }),
2700
+ select: async () => {
2701
+ throw new Error("should not prompt");
2702
+ },
2703
+ input: async () => {
2704
+ throw new Error("should not prompt");
2705
+ },
2706
+ editor: async () => undefined,
2707
+ setStatus: () => undefined,
2708
+ },
2709
+ sessionManager: { getEntries: () => entries, getSessionFile: () => "session-a" },
2710
+ newSession: async () => ({ cancelled: false }),
2711
+ waitForIdle: async () => undefined,
2712
+ };
2713
+
2714
+ await handler(`--path ${target.ralphPath}`, ctx);
2715
+
2716
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop exhausted without verified progress")));
2717
+ assert.equal(notifications.some(({ message }) => message.includes("Ralph loop done")), false);
2718
+ });
2719
+
2720
+ test("/ralph-draft passes the active model runtime to the draft planner", async (t) => {
2721
+ const cwd = createTempDir();
2722
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2723
+
2724
+ const task = "reverse engineer this app";
2725
+ const target = createTarget(cwd, task);
2726
+ const draftCalls: Array<{ task: string; target: DraftTarget; cwd: string; runtime: StrengthenDraftRuntime | undefined }> = [];
2727
+ const draftPlan = makeDraftPlan(task, target, "llm-strengthened", cwd);
2728
+ const runtime = {
2729
+ model: {
2730
+ provider: "anthropic",
2731
+ id: "claude-sonnet-4-5",
2732
+ name: "Claude Sonnet 4.5",
2733
+ api: "anthropic-messages",
2734
+ baseUrl: "https://example.invalid",
2735
+ reasoning: false,
2736
+ input: ["text"],
2737
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
2738
+ contextWindow: 200_000,
2739
+ maxTokens: 8_192,
2740
+ },
2741
+ modelRegistry: {
2742
+ async getApiKeyAndHeaders(model) {
2743
+ assert.equal(model.id, "claude-sonnet-4-5");
2744
+ return { ok: true, apiKey: "active-api-key", headers: { "x-runtime": "1" } };
2745
+ },
2746
+ },
2747
+ } satisfies StrengthenDraftRuntime;
2748
+ const harness = createHarness({
2749
+ createDraftPlan: async (taskArg: string, targetArg: DraftTarget, cwdArg: string, runtimeArg: StrengthenDraftRuntime | undefined) => {
2750
+ draftCalls.push({ task: taskArg, target: targetArg, cwd: cwdArg, runtime: runtimeArg });
2751
+ assert.ok(runtimeArg, "expected the active model runtime to reach the draft planner");
2752
+ assert.equal(runtimeArg?.model?.id, runtime.model.id);
2753
+ assert.equal(runtimeArg?.modelRegistry, runtime.modelRegistry);
2754
+ return draftPlan;
2755
+ },
2756
+ });
2757
+
2758
+ const handler = harness.handler("ralph-draft");
2759
+ const ctx = {
2760
+ cwd,
2761
+ hasUI: true,
2762
+ ui: {
2763
+ select: async () => "Save draft",
2764
+ input: async () => undefined,
2765
+ editor: async () => undefined,
2766
+ notify: () => undefined,
2767
+ setStatus: () => undefined,
2768
+ },
2769
+ model: runtime.model,
2770
+ modelRegistry: runtime.modelRegistry,
2771
+ sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" },
2772
+ newSession: async () => {
2773
+ throw new Error("/ralph-draft should not start the loop");
2774
+ },
2775
+ waitForIdle: async () => {
2776
+ throw new Error("/ralph-draft should not wait for idle");
2777
+ },
2778
+ };
2779
+
2780
+ await handler(task, ctx);
2781
+
2782
+ assert.equal(draftCalls.length, 1);
2783
+ assert.equal(existsSync(target.ralphPath), true);
2784
+ });
2785
+
2786
+ test("tool_call blocks write and edit for token-covered secret paths", async () => {
2787
+ const harness = createHarness();
2788
+ const toolCall = harness.event("tool_call");
2789
+ const ctx = {
2790
+ sessionManager: {
2791
+ getEntries: () => [
2792
+ {
2793
+ type: "custom",
2794
+ customType: "ralph-loop-state",
2795
+ data: {
2796
+ active: true,
2797
+ loopToken: "loop-secret-token",
2798
+ iteration: 1,
2799
+ guardrails: { blockCommands: [], protectedFiles: [SECRET_PATH_POLICY_TOKEN] },
2800
+ },
2801
+ },
2802
+ ],
2803
+ getSessionFile: () => "session-a",
2804
+ },
2805
+ };
2806
+
2807
+ for (const toolName of ["write", "edit"] as const) {
2808
+ const result = await toolCall({ toolName, input: { path: ".ssh/config" } }, ctx);
2809
+ assert.deepEqual(result, { block: true, reason: "ralph: .ssh/config is protected" });
2810
+ }
2811
+ });
2812
+
2813
+ test("tool_call blocks absolute write paths against repo-relative protected globs", async () => {
2814
+ const harness = createHarness();
2815
+ const toolCall = harness.event("tool_call");
2816
+ const cwd = "/repo/project";
2817
+ const absolutePath = join(cwd, "src", "generated", "output.ts");
2818
+ const ctx = {
2819
+ sessionManager: {
2820
+ getEntries: () => [
2821
+ {
2822
+ type: "custom",
2823
+ customType: "ralph-loop-state",
2824
+ data: {
2825
+ active: true,
2826
+ loopToken: "loop-absolute-token",
2827
+ iteration: 1,
2828
+ cwd,
2829
+ guardrails: { blockCommands: [], protectedFiles: ["src/generated/**"] },
2830
+ },
2831
+ },
2832
+ ],
2833
+ getSessionFile: () => "session-a",
2834
+ },
2835
+ };
2836
+
2837
+ for (const toolName of ["write", "edit"] as const) {
2838
+ const result = await toolCall({ toolName, input: { path: absolutePath } }, ctx);
2839
+ assert.deepEqual(result, { block: true, reason: `ralph: ${absolutePath} is protected` });
2840
+ }
2841
+ });
2842
+
2843
+ test("tool_call keeps explicit protected-file globs working", async () => {
2844
+ const proofEntries: Array<{ customType: string; data: any }> = [];
2845
+ const harness = createHarness({
2846
+ appendEntry: (customType, data) => {
2847
+ proofEntries.push({ customType, data });
2848
+ },
2849
+ });
2850
+ const toolCall = harness.event("tool_call");
2851
+ const ctx = {
2852
+ sessionManager: {
2853
+ getEntries: () => [
2854
+ {
2855
+ type: "custom",
2856
+ customType: "ralph-loop-state",
2857
+ data: {
2858
+ active: true,
2859
+ loopToken: "loop-glob-token",
2860
+ iteration: 1,
2861
+ guardrails: { blockCommands: [], protectedFiles: ["src/generated/**"] },
2862
+ },
2863
+ },
2864
+ ],
2865
+ getSessionFile: () => "session-a",
2866
+ },
2867
+ };
2868
+
2869
+ for (const toolName of ["write", "edit"] as const) {
2870
+ const result = await toolCall({ toolName, input: { path: "src/generated/output.ts" } }, ctx);
2871
+ assert.deepEqual(result, { block: true, reason: "ralph: src/generated/output.ts is protected" });
2872
+ }
2873
+
2874
+ const allowed = await toolCall({ toolName: "write", input: { path: "src/app.ts" } }, ctx);
2875
+
2876
+ assert.equal(allowed, undefined);
2877
+ assert.equal(proofEntries.filter((entry) => entry.customType === "ralph-blocked-write").length, 2);
2878
+ assert.ok(proofEntries.some((entry) => entry.data.toolName === "write" && entry.data.path === "src/generated/output.ts"));
2879
+ assert.ok(proofEntries.some((entry) => entry.data.toolName === "edit" && entry.data.path === "src/generated/output.ts"));
2880
+ });
2881
+
2882
+ test("/ralph subprocess child surfaces proof appendEntry failures", { concurrency: false }, async (t) => {
2883
+ const cwd = createTempDir();
2884
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2885
+
2886
+ const taskDir = join(cwd, "subprocess-child-task");
2887
+ mkdirSync(taskDir, { recursive: true });
2888
+ writeStatusFile(taskDir, {
2889
+ loopToken: "subprocess-loop-token",
2890
+ ralphPath: join(taskDir, "RALPH.md"),
2891
+ taskDir,
2892
+ cwd,
2893
+ status: "running",
2894
+ currentIteration: 2,
2895
+ maxIterations: 4,
2896
+ timeout: 300,
2897
+ startedAt: new Date().toISOString(),
2898
+ guardrails: { blockCommands: [], protectedFiles: [] },
2899
+ });
2900
+ appendIterationRecord(taskDir, {
2901
+ iteration: 1,
2902
+ status: "complete",
2903
+ startedAt: new Date(Date.now() - 1000).toISOString(),
2904
+ completedAt: new Date().toISOString(),
2905
+ durationMs: 1000,
2906
+ progress: true,
2907
+ changedFiles: ["notes/findings.md"],
2908
+ noProgressStreak: 0,
2909
+ snapshotTruncated: false,
2910
+ snapshotErrorCount: 0,
2911
+ loopToken: "subprocess-loop-token",
2912
+ } as any);
2913
+
2914
+ const restoreEnv = setRunnerEnv({
2915
+ RALPH_RUNNER_TASK_DIR: taskDir,
2916
+ RALPH_RUNNER_CWD: cwd,
2917
+ RALPH_RUNNER_LOOP_TOKEN: "subprocess-loop-token",
2918
+ RALPH_RUNNER_CURRENT_ITERATION: "2",
2919
+ RALPH_RUNNER_MAX_ITERATIONS: "4",
2920
+ RALPH_RUNNER_NO_PROGRESS_STREAK: "0",
2921
+ RALPH_RUNNER_GUARDRAILS: JSON.stringify({ blockCommands: [], protectedFiles: [] }),
2922
+ });
2923
+ t.after(restoreEnv);
2924
+
2925
+ const stderrWrites: string[] = [];
2926
+ const originalStderrWrite = process.stderr.write.bind(process.stderr);
2927
+ (process.stderr as any).write = (chunk: unknown) => {
2928
+ stderrWrites.push(String(chunk));
2929
+ return true;
2930
+ };
2931
+ t.after(() => {
2932
+ (process.stderr as any).write = originalStderrWrite;
2933
+ });
2934
+
2935
+ const harness = createHarness({
2936
+ appendEntry: () => {
2937
+ throw new Error("append failed");
2938
+ },
2939
+ });
2940
+ const beforeAgentStart = harness.event("before_agent_start");
2941
+
2942
+ await assert.doesNotReject(
2943
+ beforeAgentStart(
2944
+ { systemPrompt: "Base prompt" },
2945
+ { sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" } },
2946
+ ),
2947
+ );
2948
+
2949
+ const stderrOutput = stderrWrites.join("");
2950
+ assert.match(stderrOutput, /Ralph proof logging failed/);
2951
+ assert.match(stderrOutput, /ralph-steering-injected/);
2952
+ assert.match(stderrOutput, /ralph-loop-context-injected/);
2953
+ });
2954
+
2955
+ test("/ralph subprocess child injects durable loop context into before_agent_start when session entries are empty", { concurrency: false }, async (t) => {
2956
+ const cwd = createTempDir();
2957
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
2958
+
2959
+ const taskDir = join(cwd, "subprocess-child-task");
2960
+ mkdirSync(taskDir, { recursive: true });
2961
+ writeStatusFile(taskDir, {
2962
+ loopToken: "subprocess-loop-token",
2963
+ ralphPath: join(taskDir, "RALPH.md"),
2964
+ taskDir,
2965
+ cwd,
2966
+ status: "running",
2967
+ currentIteration: 2,
2968
+ maxIterations: 4,
2969
+ timeout: 300,
2970
+ startedAt: new Date().toISOString(),
2971
+ guardrails: { blockCommands: [], protectedFiles: [] },
2972
+ });
2973
+ appendIterationRecord(taskDir, {
2974
+ iteration: 1,
2975
+ status: "complete",
2976
+ startedAt: new Date(Date.now() - 1000).toISOString(),
2977
+ completedAt: new Date().toISOString(),
2978
+ durationMs: 1000,
2979
+ progress: true,
2980
+ changedFiles: ["notes/findings.md"],
2981
+ noProgressStreak: 0,
2982
+ snapshotTruncated: false,
2983
+ snapshotErrorCount: 0,
2984
+ loopToken: "subprocess-loop-token",
2985
+ } as any);
2986
+
2987
+ const restoreEnv = setRunnerEnv({
2988
+ RALPH_RUNNER_TASK_DIR: taskDir,
2989
+ RALPH_RUNNER_CWD: cwd,
2990
+ RALPH_RUNNER_LOOP_TOKEN: "subprocess-loop-token",
2991
+ RALPH_RUNNER_CURRENT_ITERATION: "2",
2992
+ RALPH_RUNNER_MAX_ITERATIONS: "4",
2993
+ RALPH_RUNNER_NO_PROGRESS_STREAK: "0",
2994
+ RALPH_RUNNER_GUARDRAILS: JSON.stringify({ blockCommands: [], protectedFiles: [] }),
2995
+ });
2996
+ t.after(restoreEnv);
2997
+
2998
+ const proofEntries: Array<{ customType: string; data: any }> = [];
2999
+ const harness = createHarness({
3000
+ appendEntry: (customType, data) => {
3001
+ proofEntries.push({ customType, data });
3002
+ },
3003
+ });
3004
+ const beforeAgentStart = harness.event("before_agent_start");
3005
+ const result = await beforeAgentStart(
3006
+ { systemPrompt: "Base prompt" },
3007
+ { sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" } },
3008
+ );
3009
+
3010
+ assert.ok(result);
3011
+ assert.match(result.systemPrompt, /## Ralph Loop Context/);
3012
+ assert.match(result.systemPrompt, /Iteration 2\/4/);
3013
+ assert.match(result.systemPrompt, /Task directory: \.\/subprocess-child-task/);
3014
+ assert.match(result.systemPrompt, /Previous iterations:\n- Iteration 1: 1s — durable progress \(notes\/findings\.md\); no-progress streak: 0/);
3015
+ assert.match(result.systemPrompt, /Last iteration durable progress: notes\/findings\.md\./);
3016
+ assert.deepEqual(proofEntries.map((entry) => entry.customType), ["ralph-steering-injected", "ralph-loop-context-injected"]);
3017
+ });
3018
+
3019
+ test("/ralph subprocess child scopes durable history to the current loop token", { concurrency: false }, async (t) => {
3020
+ const cwd = createTempDir();
3021
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3022
+
3023
+ const taskDir = join(cwd, "subprocess-child-task");
3024
+ mkdirSync(taskDir, { recursive: true });
3025
+ writeStatusFile(taskDir, {
3026
+ loopToken: "current-loop-token",
3027
+ ralphPath: join(taskDir, "RALPH.md"),
3028
+ taskDir,
3029
+ cwd,
3030
+ status: "running",
3031
+ currentIteration: 2,
3032
+ maxIterations: 5,
3033
+ timeout: 300,
3034
+ startedAt: new Date().toISOString(),
3035
+ guardrails: { blockCommands: [], protectedFiles: [] },
3036
+ });
3037
+ appendIterationRecord(taskDir, {
3038
+ loopToken: "stale-loop-token",
3039
+ iteration: 1,
3040
+ status: "complete",
3041
+ startedAt: new Date(Date.now() - 2000).toISOString(),
3042
+ completedAt: new Date(Date.now() - 1000).toISOString(),
3043
+ durationMs: 1000,
3044
+ progress: true,
3045
+ changedFiles: ["stale/findings.md"],
3046
+ noProgressStreak: 0,
3047
+ } as any);
3048
+ appendIterationRecord(taskDir, {
3049
+ loopToken: "current-loop-token",
3050
+ iteration: 2,
3051
+ status: "complete",
3052
+ startedAt: new Date(Date.now() - 1000).toISOString(),
3053
+ completedAt: new Date().toISOString(),
3054
+ durationMs: 1000,
3055
+ progress: true,
3056
+ changedFiles: ["current/findings.md"],
3057
+ noProgressStreak: 0,
3058
+ } as any);
3059
+
3060
+ const restoreEnv = setRunnerEnv({
3061
+ RALPH_RUNNER_TASK_DIR: taskDir,
3062
+ RALPH_RUNNER_CWD: cwd,
3063
+ RALPH_RUNNER_LOOP_TOKEN: "current-loop-token",
3064
+ RALPH_RUNNER_CURRENT_ITERATION: "2",
3065
+ RALPH_RUNNER_MAX_ITERATIONS: "5",
3066
+ RALPH_RUNNER_NO_PROGRESS_STREAK: "0",
3067
+ RALPH_RUNNER_GUARDRAILS: JSON.stringify({ blockCommands: [], protectedFiles: [] }),
3068
+ });
3069
+ t.after(restoreEnv);
3070
+
3071
+ const harness = createHarness();
3072
+ const beforeAgentStart = harness.event("before_agent_start");
3073
+ const result = await beforeAgentStart(
3074
+ { systemPrompt: "Base prompt" },
3075
+ { sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" } },
3076
+ );
3077
+
3078
+ assert.ok(result);
3079
+ assert.match(result.systemPrompt, /## Ralph Loop Context/);
3080
+ assert.match(result.systemPrompt, /Iteration 2\/5/);
3081
+ assert.match(result.systemPrompt, /Previous iterations:/);
3082
+ assert.match(result.systemPrompt, /Iteration 2: 1s — durable progress \(current\/findings\.md\); no-progress streak: 0/);
3083
+ assert.doesNotMatch(result.systemPrompt, /stale\/findings\.md/);
3084
+ });
3085
+
3086
+ test("/ralph subprocess child fails closed on malformed durable status files", { concurrency: false }, async (t) => {
3087
+ const cwd = createTempDir();
3088
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3089
+
3090
+ const taskDir = join(cwd, "subprocess-child-task");
3091
+ mkdirSync(taskDir, { recursive: true });
3092
+ writeStatusFile(taskDir, {
3093
+ loopToken: "malformed-loop-token",
3094
+ ralphPath: join(taskDir, "RALPH.md"),
3095
+ taskDir,
3096
+ cwd,
3097
+ status: "running",
3098
+ currentIteration: 1,
3099
+ maxIterations: 5,
3100
+ timeout: 300,
3101
+ startedAt: new Date().toISOString(),
3102
+ guardrails: null,
3103
+ } as any);
3104
+
3105
+ const restoreEnv = setRunnerEnv({
3106
+ RALPH_RUNNER_TASK_DIR: taskDir,
3107
+ RALPH_RUNNER_CWD: cwd,
3108
+ RALPH_RUNNER_LOOP_TOKEN: "malformed-loop-token",
3109
+ RALPH_RUNNER_CURRENT_ITERATION: "1",
3110
+ RALPH_RUNNER_MAX_ITERATIONS: "5",
3111
+ RALPH_RUNNER_NO_PROGRESS_STREAK: "0",
3112
+ RALPH_RUNNER_GUARDRAILS: JSON.stringify({ blockCommands: ["git\\s+push"], protectedFiles: ["src/generated/**"] }),
3113
+ });
3114
+ t.after(restoreEnv);
3115
+
3116
+ const harness = createHarness();
3117
+ const toolCall = harness.event("tool_call");
3118
+ const result = await toolCall(
3119
+ { toolName: "bash", input: { command: "git push origin main" } },
3120
+ { sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" } },
3121
+ );
3122
+
3123
+ assert.deepEqual(result, { block: true, reason: "ralph: invalid loop contract" });
3124
+ });
3125
+
3126
+ test("/ralph subprocess child fails closed when the env loop contract is malformed", { concurrency: false }, async (t) => {
3127
+ const cwd = createTempDir();
3128
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3129
+
3130
+ const taskDir = join(cwd, "subprocess-child-task");
3131
+ mkdirSync(taskDir, { recursive: true });
3132
+
3133
+ const restoreEnv = setRunnerEnv({
3134
+ RALPH_RUNNER_TASK_DIR: taskDir,
3135
+ RALPH_RUNNER_CWD: cwd,
3136
+ RALPH_RUNNER_LOOP_TOKEN: "env-contract-loop-token",
3137
+ RALPH_RUNNER_CURRENT_ITERATION: "1",
3138
+ RALPH_RUNNER_MAX_ITERATIONS: "5",
3139
+ RALPH_RUNNER_NO_PROGRESS_STREAK: "0",
3140
+ RALPH_RUNNER_GUARDRAILS: "not-json",
3141
+ });
3142
+ t.after(restoreEnv);
3143
+
3144
+ const harness = createHarness();
3145
+ const toolCall = harness.event("tool_call");
3146
+ const result = await toolCall(
3147
+ { toolName: "bash", input: { command: "git push origin main" } },
3148
+ { sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" } },
3149
+ );
3150
+
3151
+ assert.deepEqual(result, { block: true, reason: "ralph: invalid loop contract" });
3152
+ });
3153
+
3154
+ test("/ralph subprocess child steers repeated bash failures from durable runner state", { concurrency: false }, async (t) => {
3155
+ const cwd = createTempDir();
3156
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3157
+
3158
+ const taskDir = join(cwd, "subprocess-child-task");
3159
+ mkdirSync(taskDir, { recursive: true });
3160
+ writeStatusFile(taskDir, {
3161
+ loopToken: "subprocess-loop-token",
3162
+ ralphPath: join(taskDir, "RALPH.md"),
3163
+ taskDir,
3164
+ cwd,
3165
+ status: "running",
3166
+ currentIteration: 3,
3167
+ maxIterations: 4,
3168
+ timeout: 300,
3169
+ startedAt: new Date().toISOString(),
3170
+ guardrails: { blockCommands: [], protectedFiles: [] },
3171
+ });
3172
+
3173
+ const restoreEnv = setRunnerEnv({
3174
+ RALPH_RUNNER_TASK_DIR: taskDir,
3175
+ RALPH_RUNNER_CWD: cwd,
3176
+ RALPH_RUNNER_LOOP_TOKEN: "subprocess-loop-token",
3177
+ RALPH_RUNNER_CURRENT_ITERATION: "3",
3178
+ RALPH_RUNNER_MAX_ITERATIONS: "4",
3179
+ RALPH_RUNNER_NO_PROGRESS_STREAK: "0",
3180
+ RALPH_RUNNER_GUARDRAILS: JSON.stringify({ blockCommands: [], protectedFiles: [] }),
3181
+ });
3182
+ t.after(restoreEnv);
3183
+
3184
+ const harness = createHarness();
3185
+ const toolResult = harness.event("tool_result");
3186
+ const ctx = { sessionManager: { getEntries: () => [], getSessionFile: () => "session-a" } };
3187
+ const failureEvent = {
3188
+ toolName: "bash",
3189
+ content: [{ type: "text", text: "ERROR: command failed" }],
3190
+ };
3191
+
3192
+ assert.equal(await toolResult(failureEvent, ctx), undefined);
3193
+ assert.equal(await toolResult(failureEvent, ctx), undefined);
3194
+ assert.deepEqual(await toolResult(failureEvent, ctx), {
3195
+ content: [
3196
+ { type: "text", text: "ERROR: command failed" },
3197
+ { type: "text", text: "\n\n⚠️ ralph: 3+ failures this iteration. Stop and describe the root cause before retrying." },
3198
+ ],
3199
+ });
3200
+ });
3201
+
3202
+
3203
+ test("/ralph-stop --path prefers session state and uses the session registry cwd", async (t) => {
3204
+ const callerCwd = createTempDir();
3205
+ const sessionCwd = createTempDir();
3206
+ t.after(() => rmSync(callerCwd, { recursive: true, force: true }));
3207
+ t.after(() => rmSync(sessionCwd, { recursive: true, force: true }));
3208
+
3209
+ const taskDir = join(sessionCwd, "session-precedence-task");
3210
+ mkdirSync(taskDir, { recursive: true });
3211
+ const ralphPath = join(taskDir, "RALPH.md");
3212
+ writeFileSync(ralphPath, "Task: Stop me\n", "utf8");
3213
+
3214
+ const durableEntry: ActiveLoopRegistryEntry = {
3215
+ taskDir,
3216
+ ralphPath,
3217
+ cwd: sessionCwd,
3218
+ loopToken: "durable-loop-token",
3219
+ status: "running",
3220
+ currentIteration: 4,
3221
+ maxIterations: 8,
3222
+ startedAt: new Date(Date.now() - 10_000).toISOString(),
3223
+ updatedAt: new Date().toISOString(),
3224
+ };
3225
+ writeActiveLoopRegistryEntry(sessionCwd, durableEntry);
3226
+
3227
+ const persistedState = {
3228
+ active: true,
3229
+ loopToken: "session-loop-token",
3230
+ cwd: sessionCwd,
3231
+ taskDir,
3232
+ iteration: 2,
3233
+ maxIterations: 10,
3234
+ noProgressStreak: 0,
3235
+ iterationSummaries: [],
3236
+ guardrails: { blockCommands: [], protectedFiles: [] },
3237
+ stopRequested: false,
3238
+ };
3239
+
3240
+ const notifications: Array<{ message: string; level: string }> = [];
3241
+ const harness = createHarness();
3242
+ const handler = harness.handler("ralph-stop");
3243
+ let ctx: any;
3244
+ ctx = {
3245
+ cwd: callerCwd,
3246
+ hasUI: false,
3247
+ ui: {
3248
+ notify: (message: string, level: string) => notifications.push({ message, level }),
3249
+ select: async () => undefined,
3250
+ input: async () => undefined,
3251
+ editor: async () => undefined,
3252
+ setStatus: () => undefined,
3253
+ },
3254
+ sessionManager: createSessionManager([
3255
+ { type: "custom", customType: "ralph-loop-state", data: persistedState },
3256
+ ], "session-a"),
3257
+ getRuntimeCtx: () => ctx,
3258
+ newSession: async () => ({ cancelled: true }),
3259
+ waitForIdle: async () => undefined,
3260
+ };
3261
+
3262
+ await handler(`--path ${ralphPath}`, ctx);
3263
+
3264
+ assert.equal(existsSync(join(taskDir, ".ralph-runner", "stop.flag")), true);
3265
+ assert.equal(readActiveLoopRegistry(callerCwd).length, 0);
3266
+ const sessionRegistry = readActiveLoopRegistry(sessionCwd).find((entry) => entry.taskDir === taskDir);
3267
+ assert.ok(sessionRegistry);
3268
+ assert.equal(sessionRegistry?.currentIteration, durableEntry.currentIteration);
3269
+ assert.equal(sessionRegistry?.maxIterations, durableEntry.maxIterations);
3270
+ assert.equal(sessionRegistry?.status, durableEntry.status);
3271
+ assert.equal(sessionRegistry?.startedAt, durableEntry.startedAt);
3272
+ assert.equal(typeof sessionRegistry?.stopRequestedAt, "string");
3273
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop stopping after current iteration")));
3274
+ });
3275
+
3276
+ test("/ralph-stop preserves a stop that was already observed before the registry update", async (t) => {
3277
+ const cwd = createTempDir();
3278
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3279
+
3280
+ const taskDir = join(cwd, "mid-iteration-task");
3281
+ mkdirSync(taskDir, { recursive: true });
3282
+ const ralphPath = join(taskDir, "RALPH.md");
3283
+ writeFileSync(ralphPath, "Task: Stop me\n", "utf8");
3284
+
3285
+ const stopRequestedAt = new Date(Date.now() - 2000).toISOString();
3286
+ const stopObservedAt = new Date().toISOString();
3287
+ const durableEntry: ActiveLoopRegistryEntry = {
3288
+ taskDir,
3289
+ ralphPath,
3290
+ cwd,
3291
+ loopToken: "durable-loop-token",
3292
+ status: "stopped",
3293
+ currentIteration: 5,
3294
+ maxIterations: 8,
3295
+ startedAt: new Date(Date.now() - 20_000).toISOString(),
3296
+ updatedAt: stopObservedAt,
3297
+ stopRequestedAt,
3298
+ stopObservedAt,
3299
+ };
3300
+ writeActiveLoopRegistryEntry(cwd, durableEntry);
3301
+
3302
+ const notifications: Array<{ message: string; level: string }> = [];
3303
+ const harness = createHarness();
3304
+ const handler = harness.handler("ralph-stop");
3305
+ let ctx: any;
3306
+ ctx = {
3307
+ cwd,
3308
+ hasUI: false,
3309
+ ui: {
3310
+ notify: (message: string, level: string) => notifications.push({ message, level }),
3311
+ select: async () => undefined,
3312
+ input: async () => undefined,
3313
+ editor: async () => undefined,
3314
+ setStatus: () => undefined,
3315
+ },
3316
+ sessionManager: createSessionManager([
3317
+ {
3318
+ type: "custom",
3319
+ customType: "ralph-loop-state",
3320
+ data: {
3321
+ active: true,
3322
+ loopToken: "session-loop-token",
3323
+ cwd,
3324
+ taskDir,
3325
+ iteration: 2,
3326
+ maxIterations: 10,
3327
+ noProgressStreak: 1,
3328
+ iterationSummaries: [],
3329
+ guardrails: { blockCommands: [], protectedFiles: [] },
3330
+ stopRequested: false,
3331
+ },
3332
+ },
3333
+ ], "session-a"),
3334
+ getRuntimeCtx: () => ctx,
3335
+ newSession: async () => ({ cancelled: true }),
3336
+ waitForIdle: async () => undefined,
3337
+ };
3338
+
3339
+ await handler("", ctx);
3340
+
3341
+ assert.equal(existsSync(join(taskDir, ".ralph-runner", "stop.flag")), true);
3342
+ const updated = readActiveLoopRegistry(cwd).find((entry) => entry.taskDir === taskDir);
3343
+ assert.ok(updated);
3344
+ assert.equal(updated?.currentIteration, durableEntry.currentIteration);
3345
+ assert.equal(updated?.maxIterations, durableEntry.maxIterations);
3346
+ assert.equal(updated?.status, "stopped");
3347
+ assert.equal(updated?.startedAt, durableEntry.startedAt);
3348
+ assert.equal(updated?.stopObservedAt, stopObservedAt);
3349
+ assert.equal(typeof updated?.stopRequestedAt, "string");
3350
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop stopping after current iteration")));
3351
+ });
3352
+
3353
+ test("/ralph-stop reports no active loops when nothing is active", async (t) => {
3354
+ const cwd = createTempDir();
3355
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3356
+
3357
+ const notifications: Array<{ message: string; level: string }> = [];
3358
+ const harness = createHarness();
3359
+ const handler = harness.handler("ralph-stop");
3360
+ const ctx = {
3361
+ cwd,
3362
+ hasUI: false,
3363
+ ui: {
3364
+ notify: (message: string, level: string) => notifications.push({ message, level }),
3365
+ select: async () => undefined,
3366
+ input: async () => undefined,
3367
+ editor: async () => undefined,
3368
+ setStatus: () => undefined,
3369
+ },
3370
+ sessionManager: createSessionManager([], "session-a"),
3371
+ newSession: async () => ({ cancelled: true }),
3372
+ waitForIdle: async () => undefined,
3373
+ };
3374
+
3375
+ await handler("", ctx);
3376
+
3377
+ assert.ok(notifications.some(({ message, level }) => level === "warning" && message === "No active ralph loops found."));
3378
+ assert.equal(existsSync(join(cwd, ".ralph-runner", "stop.flag")), false);
3379
+ });
3380
+
3381
+ test("/ralph-stop --path ignores a stale status file without a matching active registry entry", async (t) => {
3382
+ const cwd = createTempDir();
3383
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3384
+
3385
+ const taskDir = join(cwd, "stale-status-task");
3386
+ mkdirSync(taskDir, { recursive: true });
3387
+ const ralphPath = join(taskDir, "RALPH.md");
3388
+ writeFileSync(ralphPath, "Task: stale status\n", "utf8");
3389
+ writeStatusFile(taskDir, {
3390
+ loopToken: "stale-status-token",
3391
+ ralphPath,
3392
+ taskDir,
3393
+ cwd,
3394
+ status: "running",
3395
+ currentIteration: 99,
3396
+ maxIterations: 100,
3397
+ timeout: 300,
3398
+ startedAt: new Date(Date.now() - 60_000).toISOString(),
3399
+ guardrails: { blockCommands: [], protectedFiles: [] },
3400
+ });
3401
+
3402
+ const notifications: Array<{ message: string; level: string }> = [];
3403
+ const harness = createHarness();
3404
+ const handler = harness.handler("ralph-stop");
3405
+ const ctx = {
3406
+ cwd,
3407
+ hasUI: false,
3408
+ ui: {
3409
+ notify: (message: string, level: string) => notifications.push({ message, level }),
3410
+ select: async () => undefined,
3411
+ input: async () => undefined,
3412
+ editor: async () => undefined,
3413
+ setStatus: () => undefined,
3414
+ },
3415
+ sessionManager: createSessionManager([], "session-a"),
3416
+ newSession: async () => ({ cancelled: true }),
3417
+ waitForIdle: async () => undefined,
3418
+ };
3419
+
3420
+ await handler(`--path ${ralphPath}`, ctx);
3421
+
3422
+ assert.equal(existsSync(join(taskDir, ".ralph-runner", "stop.flag")), false);
3423
+ assert.equal(readActiveLoopRegistry(cwd).length, 0);
3424
+ assert.ok(notifications.some(({ message }) => message.includes("No active ralph loop found")));
3425
+ });
3426
+
3427
+ test("/ralph-stop falls back to the durable registry when session state is absent", async (t) => {
3428
+ const cwd = createTempDir();
3429
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3430
+
3431
+ const taskDir = join(cwd, "registry-task");
3432
+ mkdirSync(taskDir, { recursive: true });
3433
+ const registryEntry: ActiveLoopRegistryEntry = {
3434
+ taskDir,
3435
+ ralphPath: join(taskDir, "RALPH.md"),
3436
+ cwd,
3437
+ loopToken: "registry-loop-token",
3438
+ status: "running",
3439
+ currentIteration: 2,
3440
+ maxIterations: 5,
3441
+ startedAt: new Date().toISOString(),
3442
+ updatedAt: new Date().toISOString(),
3443
+ };
3444
+ writeActiveLoopRegistryEntry(cwd, registryEntry);
3445
+
3446
+ const notifications: Array<{ message: string; level: string }> = [];
3447
+ const harness = createHarness();
3448
+ const handler = harness.handler("ralph-stop");
3449
+ const ctx = {
3450
+ cwd,
3451
+ hasUI: false,
3452
+ ui: {
3453
+ notify: (message: string, level: string) => notifications.push({ message, level }),
3454
+ select: async () => undefined,
3455
+ input: async () => undefined,
3456
+ editor: async () => undefined,
3457
+ setStatus: () => undefined,
3458
+ },
3459
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
3460
+ newSession: async () => ({ cancelled: true }),
3461
+ waitForIdle: async () => undefined,
3462
+ };
3463
+
3464
+ await handler("", ctx);
3465
+
3466
+ assert.equal(existsSync(join(taskDir, ".ralph-runner", "stop.flag")), true);
3467
+ const activeEntries = listActiveLoopRegistryEntries(cwd);
3468
+ assert.equal(activeEntries.length, 1);
3469
+ assert.equal(typeof activeEntries[0]?.stopRequestedAt, "string");
3470
+ assert.ok(notifications.some(({ message }) => message.includes("Ralph loop stopping after current iteration")));
3471
+ assert.equal(notifications.some(({ message }) => message.includes("No active ralph loop")), false);
3472
+ });
3473
+
3474
+ test("/ralph-stop refuses to guess when multiple durable active loops exist", async (t) => {
3475
+ const cwd = createTempDir();
3476
+ t.after(() => rmSync(cwd, { recursive: true, force: true }));
3477
+
3478
+ const taskDirA = join(cwd, "registry-task-a");
3479
+ const taskDirB = join(cwd, "registry-task-b");
3480
+ mkdirSync(taskDirA, { recursive: true });
3481
+ mkdirSync(taskDirB, { recursive: true });
3482
+ writeActiveLoopRegistryEntry(cwd, {
3483
+ taskDir: taskDirA,
3484
+ ralphPath: join(taskDirA, "RALPH.md"),
3485
+ cwd,
3486
+ loopToken: "registry-loop-token-a",
3487
+ status: "running",
3488
+ currentIteration: 2,
3489
+ maxIterations: 5,
3490
+ startedAt: new Date().toISOString(),
3491
+ updatedAt: new Date().toISOString(),
3492
+ });
3493
+ writeActiveLoopRegistryEntry(cwd, {
3494
+ taskDir: taskDirB,
3495
+ ralphPath: join(taskDirB, "RALPH.md"),
3496
+ cwd,
3497
+ loopToken: "registry-loop-token-b",
3498
+ status: "running",
3499
+ currentIteration: 1,
3500
+ maxIterations: 5,
3501
+ startedAt: new Date().toISOString(),
3502
+ updatedAt: new Date().toISOString(),
3503
+ });
3504
+
3505
+ const notifications: Array<{ message: string; level: string }> = [];
3506
+ const harness = createHarness();
3507
+ const handler = harness.handler("ralph-stop");
3508
+ const ctx = {
3509
+ cwd,
3510
+ hasUI: false,
3511
+ ui: {
3512
+ notify: (message: string, level: string) => notifications.push({ message, level }),
3513
+ select: async () => undefined,
3514
+ input: async () => undefined,
3515
+ editor: async () => undefined,
3516
+ setStatus: () => undefined,
3517
+ },
3518
+ sessionManager: { getEntries: () => [], getSessionFile: () => undefined },
3519
+ newSession: async () => ({ cancelled: true }),
3520
+ waitForIdle: async () => undefined,
3521
+ };
3522
+
3523
+ await handler("", ctx);
3524
+
3525
+ assert.equal(existsSync(join(taskDirA, ".ralph-runner", "stop.flag")), false);
3526
+ assert.equal(existsSync(join(taskDirB, ".ralph-runner", "stop.flag")), false);
3527
+ assert.ok(notifications.some(({ message }) => message.toLowerCase().includes("multiple active ralph loops")));
3528
+ assert.ok(notifications.some(({ message }) => message.toLowerCase().includes("explicit target path")));
3529
+ });