rw-runner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ import type { Requirement, CheckResult, Task } from "../types";
2
+ import { ui } from "./ui";
3
+
4
+ // Run a single requirement and evaluate all its checks
5
+ export async function runRequirement(req: Requirement): Promise<CheckResult> {
6
+ try {
7
+ const proc = Bun.spawn(["sh", "-c", req.command], {
8
+ stdout: "pipe",
9
+ stderr: "pipe",
10
+ cwd: process.cwd(),
11
+ });
12
+
13
+ const stdout = await new Response(proc.stdout).text();
14
+ const stderr = await new Response(proc.stderr).text();
15
+ const exitCode = await proc.exited;
16
+ const output = stdout + stderr;
17
+
18
+ // Evaluate all checks
19
+ for (const check of req.checks) {
20
+ switch (check.type) {
21
+ case "expect":
22
+ if (!output.includes(check.value)) {
23
+ return {
24
+ passed: false,
25
+ output,
26
+ error: `Expected output to contain: "${check.value}"`,
27
+ };
28
+ }
29
+ break;
30
+
31
+ case "expect-not":
32
+ if (output.includes(check.value)) {
33
+ return {
34
+ passed: false,
35
+ output,
36
+ error: `Expected output NOT to contain: "${check.value}"`,
37
+ };
38
+ }
39
+ break;
40
+
41
+ case "expect-exit":
42
+ const expectedCode = parseInt(check.value, 10);
43
+ if (exitCode !== expectedCode) {
44
+ return {
45
+ passed: false,
46
+ output,
47
+ error: `Expected exit code ${expectedCode}, got ${exitCode}`,
48
+ };
49
+ }
50
+ break;
51
+
52
+ case "eval":
53
+ // LLM evaluation - for now we'll use a simple Claude call
54
+ const evalResult = await evaluateWithLLM(output, check.value);
55
+ if (!evalResult.passed) {
56
+ return {
57
+ passed: false,
58
+ output,
59
+ error: evalResult.error,
60
+ };
61
+ }
62
+ break;
63
+ }
64
+ }
65
+
66
+ return { passed: true, output };
67
+ } catch (e) {
68
+ return {
69
+ passed: false,
70
+ output: "",
71
+ error: `Command failed: ${(e as Error).message}`,
72
+ };
73
+ }
74
+ }
75
+
76
+ // Run all requirements for a task
77
+ export async function runTaskRequirements(
78
+ task: Task
79
+ ): Promise<{ passed: boolean; results: CheckResult[] }> {
80
+ const results: CheckResult[] = [];
81
+ let allPassed = true;
82
+
83
+ for (const req of task.requirements) {
84
+ ui.info(`Running: ${req.command}`);
85
+ const result = await runRequirement(req);
86
+ results.push(result);
87
+
88
+ if (result.passed) {
89
+ ui.success(`Passed`);
90
+ } else {
91
+ ui.error(`Failed: ${result.error}`);
92
+ allPassed = false;
93
+ break; // Stop on first failure
94
+ }
95
+ }
96
+
97
+ return { passed: allPassed, results };
98
+ }
99
+
100
+ // LLM evaluation for [eval] checks
101
+ async function evaluateWithLLM(
102
+ output: string,
103
+ criteria: string
104
+ ): Promise<{ passed: boolean; error?: string }> {
105
+ try {
106
+ // Use Claude CLI to evaluate
107
+ const prompt = `You are evaluating command output against criteria.
108
+
109
+ Output:
110
+ \`\`\`
111
+ ${output.slice(0, 5000)}
112
+ \`\`\`
113
+
114
+ Criteria: ${criteria}
115
+
116
+ Does the output meet the criteria? Reply with only "PASS" or "FAIL: <reason>"`;
117
+
118
+ const proc = Bun.spawn(["claude", "-p", prompt, "--no-input"], {
119
+ stdout: "pipe",
120
+ stderr: "pipe",
121
+ });
122
+
123
+ const response = await new Response(proc.stdout).text();
124
+ await proc.exited;
125
+
126
+ const trimmed = response.trim();
127
+ if (trimmed.startsWith("PASS")) {
128
+ return { passed: true };
129
+ } else {
130
+ const reason = trimmed.replace(/^FAIL:?\s*/i, "") || "Criteria not met";
131
+ return { passed: false, error: reason };
132
+ }
133
+ } catch (e) {
134
+ // If Claude isn't available, fall back to simple check
135
+ return {
136
+ passed: false,
137
+ error: `LLM eval failed: ${(e as Error).message}`,
138
+ };
139
+ }
140
+ }
@@ -0,0 +1,89 @@
1
+ import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { join } from "node:path";
4
+ import type { Config } from "../types";
5
+
6
+ const RALPH_HOME = join(homedir(), ".ralph");
7
+ const CONFIG_PATH = join(RALPH_HOME, "config.yaml");
8
+
9
+ const DEFAULT_CONFIG: Config = {
10
+ model: "claude-sonnet",
11
+ maxRetries: 3,
12
+ loopDelay: 2,
13
+ };
14
+
15
+ // Simple YAML parser (just for our config structure)
16
+ function parseYaml(content: string): Partial<Config> {
17
+ const config: Partial<Config> = {};
18
+ const lines = content.split("\n");
19
+
20
+ for (const line of lines) {
21
+ const [key, value] = line.split(":").map((s) => s.trim());
22
+ if (!key || !value) continue;
23
+
24
+ switch (key) {
25
+ case "model":
26
+ config.model = value;
27
+ break;
28
+ case "max_retries":
29
+ config.maxRetries = parseInt(value, 10);
30
+ break;
31
+ case "loop_delay":
32
+ config.loopDelay = parseInt(value, 10);
33
+ break;
34
+ }
35
+ }
36
+
37
+ return config;
38
+ }
39
+
40
+ // Simple YAML generator
41
+ function generateYaml(config: Config): string {
42
+ return `# Ralph configuration
43
+
44
+ # Model for LLM evaluations
45
+ model: ${config.model}
46
+
47
+ # Max retries before marking task as failed
48
+ max_retries: ${config.maxRetries}
49
+
50
+ # Delay between loop iterations (seconds)
51
+ loop_delay: ${config.loopDelay}
52
+ `;
53
+ }
54
+
55
+ // Ensure ~/.ralph directory exists
56
+ export function ensureConfigDir(): void {
57
+ if (!existsSync(RALPH_HOME)) {
58
+ mkdirSync(RALPH_HOME, { recursive: true });
59
+ }
60
+ }
61
+
62
+ // Read config from ~/.ralph/config.yaml
63
+ export function readConfig(): Config {
64
+ ensureConfigDir();
65
+
66
+ if (!existsSync(CONFIG_PATH)) {
67
+ return DEFAULT_CONFIG;
68
+ }
69
+
70
+ try {
71
+ const content = readFileSync(CONFIG_PATH, "utf-8");
72
+ const parsed = parseYaml(content);
73
+ return { ...DEFAULT_CONFIG, ...parsed };
74
+ } catch {
75
+ return DEFAULT_CONFIG;
76
+ }
77
+ }
78
+
79
+ // Write config to ~/.ralph/config.yaml
80
+ export function writeConfig(config: Partial<Config>): void {
81
+ ensureConfigDir();
82
+ const merged = { ...DEFAULT_CONFIG, ...readConfig(), ...config };
83
+ writeFileSync(CONFIG_PATH, generateYaml(merged));
84
+ }
85
+
86
+ // Get config path for display
87
+ export function getConfigPath(): string {
88
+ return CONFIG_PATH;
89
+ }
@@ -0,0 +1,262 @@
1
+ import type { Spec, Task, Config } from "../types";
2
+ import { ui } from "./ui";
3
+ import {
4
+ readSpec,
5
+ updateTaskStatus,
6
+ getPendingTasks,
7
+ } from "./spec";
8
+ import { runTaskRequirements } from "./checker";
9
+ import {
10
+ generatePrompt,
11
+ spawnClaude,
12
+ spawnInteractiveClaude,
13
+ parseDoneOutput,
14
+ findTaskByDescription,
15
+ } from "./runner";
16
+ import * as readline from "node:readline";
17
+
18
+ interface LoopState {
19
+ running: boolean;
20
+ takeover: boolean;
21
+ currentTask: Task | null;
22
+ retryCount: Map<string, number>;
23
+ claudeProcess: ReturnType<typeof Bun.spawn> | null;
24
+ }
25
+
26
+ const DEFAULT_CONFIG: Config = {
27
+ model: "claude-sonnet",
28
+ maxRetries: 3,
29
+ loopDelay: 2,
30
+ };
31
+
32
+ export async function startLoop(config: Partial<Config> = {}): Promise<void> {
33
+ const cfg = { ...DEFAULT_CONFIG, ...config };
34
+
35
+ const state: LoopState = {
36
+ running: true,
37
+ takeover: false,
38
+ currentTask: null,
39
+ retryCount: new Map(),
40
+ claudeProcess: null,
41
+ };
42
+
43
+ // Set up keyboard input
44
+ setupKeyboardInput(state);
45
+
46
+ ui.logo();
47
+ ui.header("Starting RW loop");
48
+ ui.hotkeys();
49
+
50
+ while (state.running) {
51
+ const spec = readSpec();
52
+ const pendingTasks = getPendingTasks();
53
+
54
+ if (pendingTasks.length === 0) {
55
+ ui.newline();
56
+ ui.success("All tasks completed!");
57
+ break;
58
+ }
59
+
60
+ ui.divider();
61
+ ui.info(`${pendingTasks.length} task(s) remaining`);
62
+
63
+ // Run Claude
64
+ const prompt = generatePrompt(spec);
65
+ ui.header("Running Claude...");
66
+
67
+ state.claudeProcess = spawnClaude(prompt);
68
+ let output = "";
69
+
70
+ // Stream output
71
+ const reader = state.claudeProcess.stdout.getReader();
72
+ const decoder = new TextDecoder();
73
+
74
+ try {
75
+ while (true) {
76
+ // Check for takeover
77
+ if (state.takeover) {
78
+ await handleTakeover(state, cfg);
79
+ break;
80
+ }
81
+
82
+ const { done, value } = await reader.read();
83
+ if (done) break;
84
+
85
+ const chunk = decoder.decode(value, { stream: true });
86
+ output += chunk;
87
+ process.stdout.write(chunk);
88
+
89
+ // Check for DONE signal
90
+ if (output.includes("DONE:")) {
91
+ // Wait a moment for full output
92
+ await Bun.sleep(500);
93
+
94
+ // Kill Claude process
95
+ state.claudeProcess?.kill();
96
+
97
+ const taskName = parseDoneOutput(output);
98
+ if (taskName) {
99
+ await handleTaskCompletion(spec, taskName, state, cfg);
100
+ }
101
+ break;
102
+ }
103
+ }
104
+ } catch (e) {
105
+ // Process was killed (takeover or quit)
106
+ if (!state.takeover) {
107
+ ui.error(`Claude process error: ${(e as Error).message}`);
108
+ }
109
+ }
110
+
111
+ // Wait before next iteration
112
+ if (state.running && !state.takeover) {
113
+ await Bun.sleep(cfg.loopDelay * 1000);
114
+ }
115
+
116
+ state.takeover = false;
117
+ }
118
+
119
+ // Cleanup
120
+ process.stdin.setRawMode?.(false);
121
+ process.stdin.pause();
122
+ }
123
+
124
+ async function handleTaskCompletion(
125
+ spec: Spec,
126
+ taskName: string,
127
+ state: LoopState,
128
+ cfg: Config
129
+ ): Promise<void> {
130
+ ui.newline();
131
+ ui.header(`Task completed: ${taskName}`);
132
+
133
+ const task = findTaskByDescription(spec.tasks, taskName);
134
+ if (!task) {
135
+ ui.warn(`Could not find task matching: "${taskName}"`);
136
+ return;
137
+ }
138
+
139
+ if (task.requirements.length === 0) {
140
+ ui.info("No requirements to check, marking complete");
141
+ updateTaskStatus(task.description, "completed");
142
+ ui.success("Task marked complete");
143
+ return;
144
+ }
145
+
146
+ ui.header("Running requirements...");
147
+ const { passed, results } = await runTaskRequirements(task);
148
+
149
+ if (passed) {
150
+ updateTaskStatus(task.description, "completed");
151
+ ui.success("All requirements passed! Task marked complete");
152
+ state.retryCount.delete(task.description);
153
+ } else {
154
+ const retries = (state.retryCount.get(task.description) || 0) + 1;
155
+ state.retryCount.set(task.description, retries);
156
+
157
+ if (retries >= cfg.maxRetries) {
158
+ ui.error(`Task failed after ${cfg.maxRetries} attempts, skipping`);
159
+ updateTaskStatus(task.description, "failed");
160
+ state.retryCount.delete(task.description);
161
+ } else {
162
+ ui.warn(`Attempt ${retries}/${cfg.maxRetries} failed, retrying...`);
163
+ // The next loop iteration will retry with error context
164
+ }
165
+ }
166
+ }
167
+
168
+ async function handleTakeover(state: LoopState, cfg: Config): Promise<void> {
169
+ ui.newline();
170
+ ui.box("TAKEOVER MODE", "yellow");
171
+ ui.info("You now have control. Press Ctrl+D or type /exit when done.");
172
+ ui.newline();
173
+
174
+ // Kill automated Claude
175
+ state.claudeProcess?.kill();
176
+
177
+ // Restore normal stdin
178
+ process.stdin.setRawMode?.(false);
179
+
180
+ // Spawn interactive Claude
181
+ const interactive = spawnInteractiveClaude();
182
+ await interactive.exited;
183
+
184
+ ui.newline();
185
+ ui.header("Takeover ended");
186
+
187
+ // Ask if we should run requirements
188
+ const answer = await askQuestion("Run requirements check? [y/n]: ");
189
+
190
+ if (answer.toLowerCase() === "y") {
191
+ const spec = readSpec();
192
+ const pendingTasks = getPendingTasks();
193
+
194
+ if (pendingTasks.length > 0) {
195
+ // Assume working on first pending task
196
+ const task = pendingTasks[0];
197
+ ui.header(`Checking requirements for: ${task.description}`);
198
+
199
+ const { passed } = await runTaskRequirements(task);
200
+ if (passed) {
201
+ updateTaskStatus(task.description, "completed");
202
+ ui.success("Task marked complete");
203
+ } else {
204
+ ui.warn("Requirements not met, task remains pending");
205
+ }
206
+ }
207
+ }
208
+
209
+ // Re-enable raw mode for hotkeys
210
+ setupKeyboardInput(state);
211
+ state.takeover = false;
212
+ }
213
+
214
+ function setupKeyboardInput(state: LoopState): void {
215
+ if (process.stdin.isTTY) {
216
+ process.stdin.setRawMode(true);
217
+ process.stdin.resume();
218
+ process.stdin.setEncoding("utf8");
219
+
220
+ process.stdin.removeAllListeners("data");
221
+ process.stdin.on("data", (key: string) => {
222
+ // Ctrl+C
223
+ if (key === "\u0003") {
224
+ ui.newline();
225
+ ui.info("Interrupted, exiting...");
226
+ state.claudeProcess?.kill();
227
+ process.exit(0);
228
+ }
229
+
230
+ // 't' for takeover
231
+ if (key === "t" || key === "T") {
232
+ state.takeover = true;
233
+ state.claudeProcess?.kill();
234
+ }
235
+
236
+ // 'q' for quit
237
+ if (key === "q" || key === "Q") {
238
+ ui.newline();
239
+ ui.info("Quitting...");
240
+ state.running = false;
241
+ state.claudeProcess?.kill();
242
+ }
243
+ });
244
+ }
245
+ }
246
+
247
+ function askQuestion(question: string): Promise<string> {
248
+ return new Promise((resolve) => {
249
+ const rl = readline.createInterface({
250
+ input: process.stdin,
251
+ output: process.stdout,
252
+ });
253
+
254
+ // Temporarily disable raw mode for readline
255
+ process.stdin.setRawMode?.(false);
256
+
257
+ rl.question(question, (answer) => {
258
+ rl.close();
259
+ resolve(answer);
260
+ });
261
+ });
262
+ }
@@ -0,0 +1,107 @@
1
+ import type { Spec, Task, Resource } from "../types";
2
+ import { readChangelog } from "./spec";
3
+
4
+ // Generate the prompt for Claude
5
+ export function generatePrompt(
6
+ spec: Spec,
7
+ failedAttempt?: { task: string; error: string }
8
+ ): string {
9
+ const pendingTasks = spec.tasks.filter((t) => t.status === "pending");
10
+ const resources = spec.resources;
11
+
12
+ let prompt = `You are RW, an autonomous task runner. Your goal is to complete tasks from the spec.
13
+
14
+ ## Your Current Tasks (pick the most important one)
15
+ ${pendingTasks.map((t) => `- ${t.description}`).join("\n")}
16
+
17
+ ## Available Resources (read these if you need context)
18
+ ${resources.map((r) => `- ${r.path}: ${r.description}`).join("\n")}
19
+
20
+ ## Instructions
21
+ 1. Pick the most important pending task
22
+ 2. Complete it thoroughly
23
+ 3. Write your changes to .ralph/changelog.md with:
24
+ - What you changed
25
+ - Why you changed it
26
+ - Files modified
27
+ 4. When done, output exactly: DONE: <task description>
28
+
29
+ Example completion output:
30
+ DONE: Implement user authentication
31
+
32
+ `;
33
+
34
+ if (failedAttempt) {
35
+ const changelog = readChangelog();
36
+ prompt += `
37
+ ## RETRY MODE
38
+ Your previous attempt at "${failedAttempt.task}" failed.
39
+
40
+ Error from requirements check:
41
+ \`\`\`
42
+ ${failedAttempt.error}
43
+ \`\`\`
44
+
45
+ Read .ralph/changelog.md for your change history, then fix the issues.
46
+
47
+ Recent changelog:
48
+ \`\`\`
49
+ ${changelog.slice(-2000)}
50
+ \`\`\`
51
+ `;
52
+ }
53
+
54
+ return prompt;
55
+ }
56
+
57
+ // Spawn Claude process
58
+ export function spawnClaude(prompt: string): ReturnType<typeof Bun.spawn> {
59
+ return Bun.spawn(
60
+ [
61
+ "claude",
62
+ "--dangerously-skip-permissions",
63
+ "-p",
64
+ prompt,
65
+ ],
66
+ {
67
+ stdout: "pipe",
68
+ stderr: "pipe",
69
+ stdin: "pipe",
70
+ }
71
+ );
72
+ }
73
+
74
+ // Spawn interactive Claude (for takeover mode)
75
+ export function spawnInteractiveClaude(): ReturnType<typeof Bun.spawn> {
76
+ return Bun.spawn(
77
+ ["claude"],
78
+ {
79
+ stdout: "inherit",
80
+ stderr: "inherit",
81
+ stdin: "inherit",
82
+ }
83
+ );
84
+ }
85
+
86
+ // Parse DONE output to get task name
87
+ export function parseDoneOutput(output: string): string | null {
88
+ const match = output.match(/DONE:\s*(.+?)(?:\n|$)/);
89
+ return match ? match[1].trim() : null;
90
+ }
91
+
92
+ // Find task by description (fuzzy match)
93
+ export function findTaskByDescription(tasks: Task[], description: string): Task | null {
94
+ // Exact match first
95
+ const exact = tasks.find(
96
+ (t) => t.description.toLowerCase() === description.toLowerCase()
97
+ );
98
+ if (exact) return exact;
99
+
100
+ // Partial match
101
+ const partial = tasks.find(
102
+ (t) =>
103
+ t.description.toLowerCase().includes(description.toLowerCase()) ||
104
+ description.toLowerCase().includes(t.description.toLowerCase())
105
+ );
106
+ return partial || null;
107
+ }