npm - rw-runner - Versions diffs - 0.1.0 - Mend

rw-runner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/src/lib/checker.ts ADDED Viewed

@@ -0,0 +1,140 @@
+import type { Requirement, CheckResult, Task } from "../types";
+import { ui } from "./ui";
+// Run a single requirement and evaluate all its checks
+export async function runRequirement(req: Requirement): Promise<CheckResult> {
+  try {
+    const proc = Bun.spawn(["sh", "-c", req.command], {
+      stdout: "pipe",
+      stderr: "pipe",
+      cwd: process.cwd(),
+    });
+    const stdout = await new Response(proc.stdout).text();
+    const stderr = await new Response(proc.stderr).text();
+    const exitCode = await proc.exited;
+    const output = stdout + stderr;
+    // Evaluate all checks
+    for (const check of req.checks) {
+      switch (check.type) {
+        case "expect":
+          if (!output.includes(check.value)) {
+            return {
+              passed: false,
+              output,
+              error: `Expected output to contain: "${check.value}"`,
+            };
+          }
+          break;
+        case "expect-not":
+          if (output.includes(check.value)) {
+            return {
+              passed: false,
+              output,
+              error: `Expected output NOT to contain: "${check.value}"`,
+            };
+          }
+          break;
+        case "expect-exit":
+          const expectedCode = parseInt(check.value, 10);
+          if (exitCode !== expectedCode) {
+            return {
+              passed: false,
+              output,
+              error: `Expected exit code ${expectedCode}, got ${exitCode}`,
+            };
+          }
+          break;
+        case "eval":
+          // LLM evaluation - for now we'll use a simple Claude call
+          const evalResult = await evaluateWithLLM(output, check.value);
+          if (!evalResult.passed) {
+            return {
+              passed: false,
+              output,
+              error: evalResult.error,
+            };
+          }
+          break;
+      }
+    }
+    return { passed: true, output };
+  } catch (e) {
+    return {
+      passed: false,
+      output: "",
+      error: `Command failed: ${(e as Error).message}`,
+    };
+  }
+}
+// Run all requirements for a task
+export async function runTaskRequirements(
+  task: Task
+): Promise<{ passed: boolean; results: CheckResult[] }> {
+  const results: CheckResult[] = [];
+  let allPassed = true;
+  for (const req of task.requirements) {
+    ui.info(`Running: ${req.command}`);
+    const result = await runRequirement(req);
+    results.push(result);
+    if (result.passed) {
+      ui.success(`Passed`);
+    } else {
+      ui.error(`Failed: ${result.error}`);
+      allPassed = false;
+      break; // Stop on first failure
+    }
+  }
+  return { passed: allPassed, results };
+}
+// LLM evaluation for [eval] checks
+async function evaluateWithLLM(
+  output: string,
+  criteria: string
+): Promise<{ passed: boolean; error?: string }> {
+  try {
+    // Use Claude CLI to evaluate
+    const prompt = `You are evaluating command output against criteria.
+Output:
+\`\`\`
+${output.slice(0, 5000)}
+\`\`\`
+Criteria: ${criteria}
+Does the output meet the criteria? Reply with only "PASS" or "FAIL: <reason>"`;
+    const proc = Bun.spawn(["claude", "-p", prompt, "--no-input"], {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+    const response = await new Response(proc.stdout).text();
+    await proc.exited;
+    const trimmed = response.trim();
+    if (trimmed.startsWith("PASS")) {
+      return { passed: true };
+    } else {
+      const reason = trimmed.replace(/^FAIL:?\s*/i, "") || "Criteria not met";
+      return { passed: false, error: reason };
+    }
+  } catch (e) {
+    // If Claude isn't available, fall back to simple check
+    return {
+      passed: false,
+      error: `LLM eval failed: ${(e as Error).message}`,
+    };
+  }
+}

package/src/lib/config.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import type { Config } from "../types";
+const RALPH_HOME = join(homedir(), ".ralph");
+const CONFIG_PATH = join(RALPH_HOME, "config.yaml");
+const DEFAULT_CONFIG: Config = {
+  model: "claude-sonnet",
+  maxRetries: 3,
+  loopDelay: 2,
+};
+// Simple YAML parser (just for our config structure)
+function parseYaml(content: string): Partial<Config> {
+  const config: Partial<Config> = {};
+  const lines = content.split("\n");
+  for (const line of lines) {
+    const [key, value] = line.split(":").map((s) => s.trim());
+    if (!key || !value) continue;
+    switch (key) {
+      case "model":
+        config.model = value;
+        break;
+      case "max_retries":
+        config.maxRetries = parseInt(value, 10);
+        break;
+      case "loop_delay":
+        config.loopDelay = parseInt(value, 10);
+        break;
+    }
+  }
+  return config;
+}
+// Simple YAML generator
+function generateYaml(config: Config): string {
+  return `# Ralph configuration
+# Model for LLM evaluations
+model: ${config.model}
+# Max retries before marking task as failed
+max_retries: ${config.maxRetries}
+# Delay between loop iterations (seconds)
+loop_delay: ${config.loopDelay}
+`;
+}
+// Ensure ~/.ralph directory exists
+export function ensureConfigDir(): void {
+  if (!existsSync(RALPH_HOME)) {
+    mkdirSync(RALPH_HOME, { recursive: true });
+  }
+}
+// Read config from ~/.ralph/config.yaml
+export function readConfig(): Config {
+  ensureConfigDir();
+  if (!existsSync(CONFIG_PATH)) {
+    return DEFAULT_CONFIG;
+  }
+  try {
+    const content = readFileSync(CONFIG_PATH, "utf-8");
+    const parsed = parseYaml(content);
+    return { ...DEFAULT_CONFIG, ...parsed };
+  } catch {
+    return DEFAULT_CONFIG;
+  }
+}
+// Write config to ~/.ralph/config.yaml
+export function writeConfig(config: Partial<Config>): void {
+  ensureConfigDir();
+  const merged = { ...DEFAULT_CONFIG, ...readConfig(), ...config };
+  writeFileSync(CONFIG_PATH, generateYaml(merged));
+}
+// Get config path for display
+export function getConfigPath(): string {
+  return CONFIG_PATH;
+}

package/src/lib/loop.ts ADDED Viewed

@@ -0,0 +1,262 @@
+import type { Spec, Task, Config } from "../types";
+import { ui } from "./ui";
+import {
+  readSpec,
+  updateTaskStatus,
+  getPendingTasks,
+} from "./spec";
+import { runTaskRequirements } from "./checker";
+import {
+  generatePrompt,
+  spawnClaude,
+  spawnInteractiveClaude,
+  parseDoneOutput,
+  findTaskByDescription,
+} from "./runner";
+import * as readline from "node:readline";
+interface LoopState {
+  running: boolean;
+  takeover: boolean;
+  currentTask: Task | null;
+  retryCount: Map<string, number>;
+  claudeProcess: ReturnType<typeof Bun.spawn> | null;
+}
+const DEFAULT_CONFIG: Config = {
+  model: "claude-sonnet",
+  maxRetries: 3,
+  loopDelay: 2,
+};
+export async function startLoop(config: Partial<Config> = {}): Promise<void> {
+  const cfg = { ...DEFAULT_CONFIG, ...config };
+  const state: LoopState = {
+    running: true,
+    takeover: false,
+    currentTask: null,
+    retryCount: new Map(),
+    claudeProcess: null,
+  };
+  // Set up keyboard input
+  setupKeyboardInput(state);
+  ui.logo();
+  ui.header("Starting RW loop");
+  ui.hotkeys();
+  while (state.running) {
+    const spec = readSpec();
+    const pendingTasks = getPendingTasks();
+    if (pendingTasks.length === 0) {
+      ui.newline();
+      ui.success("All tasks completed!");
+      break;
+    }
+    ui.divider();
+    ui.info(`${pendingTasks.length} task(s) remaining`);
+    // Run Claude
+    const prompt = generatePrompt(spec);
+    ui.header("Running Claude...");
+    state.claudeProcess = spawnClaude(prompt);
+    let output = "";
+    // Stream output
+    const reader = state.claudeProcess.stdout.getReader();
+    const decoder = new TextDecoder();
+    try {
+      while (true) {
+        // Check for takeover
+        if (state.takeover) {
+          await handleTakeover(state, cfg);
+          break;
+        }
+        const { done, value } = await reader.read();
+        if (done) break;
+        const chunk = decoder.decode(value, { stream: true });
+        output += chunk;
+        process.stdout.write(chunk);
+        // Check for DONE signal
+        if (output.includes("DONE:")) {
+          // Wait a moment for full output
+          await Bun.sleep(500);
+          // Kill Claude process
+          state.claudeProcess?.kill();
+          const taskName = parseDoneOutput(output);
+          if (taskName) {
+            await handleTaskCompletion(spec, taskName, state, cfg);
+          }
+          break;
+        }
+      }
+    } catch (e) {
+      // Process was killed (takeover or quit)
+      if (!state.takeover) {
+        ui.error(`Claude process error: ${(e as Error).message}`);
+      }
+    }
+    // Wait before next iteration
+    if (state.running && !state.takeover) {
+      await Bun.sleep(cfg.loopDelay * 1000);
+    }
+    state.takeover = false;
+  }
+  // Cleanup
+  process.stdin.setRawMode?.(false);
+  process.stdin.pause();
+}
+async function handleTaskCompletion(
+  spec: Spec,
+  taskName: string,
+  state: LoopState,
+  cfg: Config
+): Promise<void> {
+  ui.newline();
+  ui.header(`Task completed: ${taskName}`);
+  const task = findTaskByDescription(spec.tasks, taskName);
+  if (!task) {
+    ui.warn(`Could not find task matching: "${taskName}"`);
+    return;
+  }
+  if (task.requirements.length === 0) {
+    ui.info("No requirements to check, marking complete");
+    updateTaskStatus(task.description, "completed");
+    ui.success("Task marked complete");
+    return;
+  }
+  ui.header("Running requirements...");
+  const { passed, results } = await runTaskRequirements(task);
+  if (passed) {
+    updateTaskStatus(task.description, "completed");
+    ui.success("All requirements passed! Task marked complete");
+    state.retryCount.delete(task.description);
+  } else {
+    const retries = (state.retryCount.get(task.description) || 0) + 1;
+    state.retryCount.set(task.description, retries);
+    if (retries >= cfg.maxRetries) {
+      ui.error(`Task failed after ${cfg.maxRetries} attempts, skipping`);
+      updateTaskStatus(task.description, "failed");
+      state.retryCount.delete(task.description);
+    } else {
+      ui.warn(`Attempt ${retries}/${cfg.maxRetries} failed, retrying...`);
+      // The next loop iteration will retry with error context
+    }
+  }
+}
+async function handleTakeover(state: LoopState, cfg: Config): Promise<void> {
+  ui.newline();
+  ui.box("TAKEOVER MODE", "yellow");
+  ui.info("You now have control. Press Ctrl+D or type /exit when done.");
+  ui.newline();
+  // Kill automated Claude
+  state.claudeProcess?.kill();
+  // Restore normal stdin
+  process.stdin.setRawMode?.(false);
+  // Spawn interactive Claude
+  const interactive = spawnInteractiveClaude();
+  await interactive.exited;
+  ui.newline();
+  ui.header("Takeover ended");
+  // Ask if we should run requirements
+  const answer = await askQuestion("Run requirements check? [y/n]: ");
+  if (answer.toLowerCase() === "y") {
+    const spec = readSpec();
+    const pendingTasks = getPendingTasks();
+    if (pendingTasks.length > 0) {
+      // Assume working on first pending task
+      const task = pendingTasks[0];
+      ui.header(`Checking requirements for: ${task.description}`);
+      const { passed } = await runTaskRequirements(task);
+      if (passed) {
+        updateTaskStatus(task.description, "completed");
+        ui.success("Task marked complete");
+      } else {
+        ui.warn("Requirements not met, task remains pending");
+      }
+    }
+  }
+  // Re-enable raw mode for hotkeys
+  setupKeyboardInput(state);
+  state.takeover = false;
+}
+function setupKeyboardInput(state: LoopState): void {
+  if (process.stdin.isTTY) {
+    process.stdin.setRawMode(true);
+    process.stdin.resume();
+    process.stdin.setEncoding("utf8");
+    process.stdin.removeAllListeners("data");
+    process.stdin.on("data", (key: string) => {
+      // Ctrl+C
+      if (key === "\u0003") {
+        ui.newline();
+        ui.info("Interrupted, exiting...");
+        state.claudeProcess?.kill();
+        process.exit(0);
+      }
+      // 't' for takeover
+      if (key === "t" || key === "T") {
+        state.takeover = true;
+        state.claudeProcess?.kill();
+      }
+      // 'q' for quit
+      if (key === "q" || key === "Q") {
+        ui.newline();
+        ui.info("Quitting...");
+        state.running = false;
+        state.claudeProcess?.kill();
+      }
+    });
+  }
+}
+function askQuestion(question: string): Promise<string> {
+  return new Promise((resolve) => {
+    const rl = readline.createInterface({
+      input: process.stdin,
+      output: process.stdout,
+    });
+    // Temporarily disable raw mode for readline
+    process.stdin.setRawMode?.(false);
+    rl.question(question, (answer) => {
+      rl.close();
+      resolve(answer);
+    });
+  });
+}

package/src/lib/runner.ts ADDED Viewed

@@ -0,0 +1,107 @@
+import type { Spec, Task, Resource } from "../types";
+import { readChangelog } from "./spec";
+// Generate the prompt for Claude
+export function generatePrompt(
+  spec: Spec,
+  failedAttempt?: { task: string; error: string }
+): string {
+  const pendingTasks = spec.tasks.filter((t) => t.status === "pending");
+  const resources = spec.resources;
+  let prompt = `You are RW, an autonomous task runner. Your goal is to complete tasks from the spec.
+## Your Current Tasks (pick the most important one)
+${pendingTasks.map((t) => `- ${t.description}`).join("\n")}
+## Available Resources (read these if you need context)
+${resources.map((r) => `- ${r.path}: ${r.description}`).join("\n")}
+## Instructions
+1. Pick the most important pending task
+2. Complete it thoroughly
+3. Write your changes to .ralph/changelog.md with:
+   - What you changed
+   - Why you changed it
+   - Files modified
+4. When done, output exactly: DONE: <task description>
+Example completion output:
+DONE: Implement user authentication
+`;
+  if (failedAttempt) {
+    const changelog = readChangelog();
+    prompt += `
+## RETRY MODE
+Your previous attempt at "${failedAttempt.task}" failed.
+Error from requirements check:
+\`\`\`
+${failedAttempt.error}
+\`\`\`
+Read .ralph/changelog.md for your change history, then fix the issues.
+Recent changelog:
+\`\`\`
+${changelog.slice(-2000)}
+\`\`\`
+`;
+  }
+  return prompt;
+}
+// Spawn Claude process
+export function spawnClaude(prompt: string): ReturnType<typeof Bun.spawn> {
+  return Bun.spawn(
+    [
+      "claude",
+      "--dangerously-skip-permissions",
+      "-p",
+      prompt,
+    ],
+    {
+      stdout: "pipe",
+      stderr: "pipe",
+      stdin: "pipe",
+    }
+  );
+}
+// Spawn interactive Claude (for takeover mode)
+export function spawnInteractiveClaude(): ReturnType<typeof Bun.spawn> {
+  return Bun.spawn(
+    ["claude"],
+    {
+      stdout: "inherit",
+      stderr: "inherit",
+      stdin: "inherit",
+    }
+  );
+}
+// Parse DONE output to get task name
+export function parseDoneOutput(output: string): string | null {
+  const match = output.match(/DONE:\s*(.+?)(?:\n|$)/);
+  return match ? match[1].trim() : null;
+}
+// Find task by description (fuzzy match)
+export function findTaskByDescription(tasks: Task[], description: string): Task | null {
+  // Exact match first
+  const exact = tasks.find(
+    (t) => t.description.toLowerCase() === description.toLowerCase()
+  );
+  if (exact) return exact;
+  // Partial match
+  const partial = tasks.find(
+    (t) =>
+      t.description.toLowerCase().includes(description.toLowerCase()) ||
+      description.toLowerCase().includes(t.description.toLowerCase())
+  );
+  return partial || null;
+}