npm - @graypark/loophaus - Versions diffs - 3.2.0 → 3.4.0 - Mend

@graypark/loophaus 3.2.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/bin/loophaus.mjs +71 -1
package/commands/loop-plan.md +53 -1
package/core/engine.mjs +23 -0
package/core/events.mjs +4 -0
package/core/policy.mjs +58 -0
package/core/quality-scorer.mjs +136 -0
package/core/refine-loop.mjs +29 -0
package/core/session.mjs +66 -0
package/core/validate.mjs +3 -0
package/hooks/stop-hook.mjs +49 -0
package/package.json +1 -1
package/platforms/codex-cli/installer.mjs +8 -0
package/platforms/kiro-cli/installer.mjs +8 -0
package/skills/ralph-claude-interview/SKILL.md +2 -0
package/store/state-store.mjs +3 -0

package/bin/loophaus.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 // loophaus CLI — install, status, stats, uninstall
-import { resolve, dirname } from "node:path";
+import { resolve, dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 import { access } from "node:fs/promises";
@@ -46,6 +46,9 @@ Usage:
   npx @graypark/loophaus loops
   npx @graypark/loophaus worktree <create|remove|list>
   npx @graypark/loophaus parallel <prd.json> [--count N] [--base branch]
+  npx @graypark/loophaus quality [--story US-001]
+  npx @graypark/loophaus sessions
+  npx @graypark/loophaus resume <session-id>
   npx @graypark/loophaus --version
 Hosts:
@@ -408,6 +411,28 @@ async function runWorktree() {
   }
 }
+async function runSessions() {
+  const { listSessions } = await import("../core/session.mjs");
+  const sessions = await listSessions();
+  if (sessions.length === 0) { console.log("No saved sessions."); return; }
+  console.log("Sessions");
+  console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
+  for (const s of sessions) {
+    const age = Math.round((Date.now() - new Date(s.savedAt).getTime()) / 60000);
+    console.log(`  ${s.sessionId}  iter=${s.currentIteration || 0}  ${age}m ago`);
+  }
+}
+async function runResume() {
+  const id = args[1];
+  if (!id) { console.log("Usage: loophaus resume <session-id>"); return; }
+  const { resumeSession } = await import("../core/session.mjs");
+  const state = await resumeSession(id);
+  if (!state) { console.log(`Session not found: ${id}`); return; }
+  console.log(`Resumed session ${id} at iteration ${state.currentIteration}`);
+  console.log(`Loop is now active. The stop hook will continue from here.`);
+}
 async function runParallelCmd() {
   const prdPath = args[1] || "prd.json";
   const count = parseInt(getFlag("--count") || "2", 10);
@@ -424,6 +449,48 @@ async function runParallelCmd() {
   }
 }
+async function runQuality() {
+  const storyId = getFlag("--story");
+  const cwd = process.cwd();
+  if (storyId) {
+    const { evaluateStory } = await import("../core/quality-scorer.mjs");
+    const { read } = await import("../store/state-store.mjs");
+    const state = await read(cwd);
+    const config = state.qualityConfig || {};
+    if (!config.typecheckCommand) {
+      try { await access(join(cwd, "tsconfig.json")); config.typecheckCommand = "npx tsc --noEmit"; } catch {}
+    }
+    const result = await evaluateStory(storyId, cwd, config);
+    console.log(`Quality: ${storyId}`);
+    console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
+    console.log(`Score: ${result.score}/100 (${result.grade})`);
+    for (const [k, v] of Object.entries(result.breakdown)) {
+      const bar = "\u2588".repeat(v) + "\u2591".repeat(10 - v);
+      console.log(`  ${k.padEnd(10)} ${bar} ${v}/10`);
+    }
+  } else {
+    const { readResults } = await import("../core/quality-scorer.mjs");
+    const results = await readResults(cwd);
+    if (results.length === 0) { console.log("No quality results yet. Run /loop-plan first."); return; }
+    console.log("Quality Results");
+    console.log("\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550");
+    const byStory = {};
+    for (const r of results) {
+      if (!byStory[r.storyId]) byStory[r.storyId] = [];
+      byStory[r.storyId].push(r);
+    }
+    for (const [sid, attempts] of Object.entries(byStory)) {
+      const best = attempts.reduce((a, b) => a.score > b.score ? a : b);
+      const icon = best.status === "keep" ? "\u2713" : best.status === "discard" ? "\u2717" : "~";
+      console.log(`  ${icon} ${sid}  score: ${best.score}  (${attempts.length} attempts)`);
+    }
+  }
+}
 try {
   switch (command) {
     case "install": await runInstall(); break;
@@ -436,6 +503,9 @@ try {
     case "compare": await runCompare(); break;
     case "worktree": await runWorktree(); break;
     case "parallel": await runParallelCmd(); break;
+    case "quality": await runQuality(); break;
+    case "sessions": await runSessions(); break;
+    case "resume": await runResume(); break;
     default:
       if (command.startsWith("-")) {
         await runInstall();

package/commands/loop-plan.md CHANGED Viewed

@@ -172,7 +172,59 @@ Single loop, no worktrees:
 3. Each iteration: implement one story, verify, commit, update prd.json.
 4. Output `<promise>TASK COMPLETE</promise>` when ALL stories pass.
-## Phase 5: Summary Report
+## Phase 5: Evaluate
+After all stories are implemented (parallel or sequential), evaluate each:
+For each story in prd.json:
+1. Run testCommand if defined
+2. Run typecheck if project has tsconfig.json: `npx tsc --noEmit`
+3. Run lint if project has eslint config: `npx eslint . --quiet`
+4. Check .loophaus/verify.sh if exists
+5. Analyze git diff size
+Score each story 0-100. Record in `.loophaus/results.tsv`.
+Display quality dashboard:
+```
+Quality Evaluation
+──────────────────
+  US-001  Add login API        score: 65  (D) <- needs refinement
+  US-002  Add auth middleware   score: 92  (A) ✓
+  US-003  Add login UI         score: 45  (F) <- needs refinement
+Overall: 67/100 — threshold: 80
+Stories needing refinement: 2
+```
+## Phase 6: Refine Loop (autoresearch pattern)
+For each story below the quality threshold (default: 80):
+LOOP (max 3 attempts per story):
+  1. Git checkpoint: `git add -A && git commit -m "checkpoint: <story-id> attempt <N>"`
+  2. Read the quality feedback (which criteria failed, error messages)
+  3. Re-implement with a different approach, focusing on weak areas
+  4. Re-evaluate (same criteria as Phase 5)
+  5. If score improved -> KEEP (advance the commit)
+     If score same or worse -> DISCARD (git reset --hard to checkpoint)
+  6. Record attempt in .loophaus/results.tsv
+  7. If score >= threshold -> DONE with this story
+     If max attempts reached -> move on (best-effort)
+After all refinements:
+```
+Refinement Complete
+───────────────────
+  US-001  65 -> 82 (B) ✓  (2 attempts)
+  US-003  45 -> 78 (C)    (3 attempts, best effort)
+Overall: 84/100 — PASS
+```
+CRITICAL: The refine loop uses git reset --hard to discard bad attempts. This is the autoresearch pattern — safe because we always checkpoint first.
+## Phase 7: Summary Report
 After completion (parallel or sequential), output:

package/core/engine.mjs CHANGED Viewed

@@ -27,6 +27,19 @@ export function evaluateStopHook(input, state) {
     };
   }
+  if (input.policy_result && input.policy_result.shouldStop) {
+    nextState.active = false;
+    events.push({ event: "stop", reason: "policy_violation", violations: input.policy_result.violations });
+    const reasons = input.policy_result.violations.map(v => `${v.type}: ${v.current}/${v.limit}`).join(", ");
+    return {
+      decision: "allow",
+      nextState,
+      events,
+      output: null,
+      message: `Loop: policy violation (${reasons}).`,
+    };
+  }
   if (nextState.completionPromise && input.last_assistant_text) {
     if (extractPromise(input.last_assistant_text, nextState.completionPromise)) {
       nextState.active = false;
@@ -57,6 +70,16 @@ export function evaluateStopHook(input, state) {
     events.push({ event: "verify_failed", script: nextState.verifyScript, output: input.verify_result.output || "" });
   }
+  if (input.test_results && input.test_results.length > 0) {
+    const allPassed = input.test_results.every(r => r.passed);
+    if (allPassed) {
+      events.push({ event: "test_result", status: "all_passed", results: input.test_results });
+    } else {
+      const failed = input.test_results.filter(r => !r.passed);
+      events.push({ event: "test_result", status: "some_failed", failed: failed.map(f => f.storyId) });
+    }
+  }
   if (input.stop_hook_active === true) {
     if (!input.has_pending_stories) {
       nextState.active = false;

package/core/events.mjs CHANGED Viewed

@@ -15,6 +15,10 @@ export const EventType = {
   CHECKPOINT: "checkpoint",
   ERROR: "error",
   STATE_CHANGE: "state_change",
+  QUALITY_SCORE: "quality_score",
+  REFINE_ATTEMPT: "refine_attempt",
+  REFINE_KEEP: "refine_keep",
+  REFINE_DISCARD: "refine_discard",
 };
 export function filterByType(events, type) {

package/core/policy.mjs ADDED Viewed

@@ -0,0 +1,58 @@
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+const DEFAULT_POLICY = {
+  id: "default",
+  conditions: [
+    { type: "max_iterations", value: 20 },
+  ],
+};
+export async function loadPolicy(cwd) {
+  const policyPath = join(cwd || process.cwd(), ".loophaus", "policy.json");
+  try {
+    const raw = await readFile(policyPath, "utf-8");
+    return JSON.parse(raw);
+  } catch {
+    return DEFAULT_POLICY;
+  }
+}
+export function evaluatePolicy(policy, state, context = {}) {
+  const violations = [];
+  for (const condition of policy.conditions || []) {
+    switch (condition.type) {
+      case "max_iterations":
+        if (state.currentIteration > condition.value) {
+          violations.push({ type: "max_iterations", limit: condition.value, current: state.currentIteration });
+        }
+        break;
+      case "max_cost":
+        if (context.totalCost && context.totalCost > condition.value) {
+          violations.push({ type: "max_cost", limit: condition.value, current: context.totalCost });
+        }
+        break;
+      case "max_time_minutes":
+        if (state.startedAt) {
+          const elapsed = (Date.now() - new Date(state.startedAt).getTime()) / 60000;
+          if (elapsed > condition.value) {
+            violations.push({ type: "max_time_minutes", limit: condition.value, current: Math.round(elapsed) });
+          }
+        }
+        break;
+      case "max_errors":
+        if (context.errorCount && context.errorCount > condition.value) {
+          violations.push({ type: "max_errors", limit: condition.value, current: context.errorCount });
+        }
+        break;
+    }
+  }
+  return {
+    shouldStop: violations.length > 0,
+    violations,
+  };
+}
+export { DEFAULT_POLICY };

package/core/quality-scorer.mjs ADDED Viewed

@@ -0,0 +1,136 @@
+// core/quality-scorer.mjs
+// Quality scoring for story implementations (autoresearch pattern: val_bpb -> quality score)
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+import { readFile, stat } from "node:fs/promises";
+import { join } from "node:path";
+const execFileAsync = promisify(execFile);
+const CRITERIA = {
+  tests:     { weight: 3, max: 10 },
+  typecheck: { weight: 2, max: 10 },
+  lint:      { weight: 1, max: 10 },
+  verify:    { weight: 2, max: 10 },
+  diff:      { weight: 1, max: 10 },
+  custom:    { weight: 1, max: 10 },
+};
+export function scoreStory(results) {
+  let totalWeight = 0;
+  let weightedSum = 0;
+  const breakdown = {};
+  for (const [key, config] of Object.entries(CRITERIA)) {
+    if (results[key] === undefined || results[key] === null) continue;
+    const value = typeof results[key] === "number" ? results[key] : (results[key].score ?? 0);
+    const clamped = Math.max(0, Math.min(config.max, value));
+    breakdown[key] = clamped;
+    weightedSum += clamped * config.weight;
+    totalWeight += config.max * config.weight;
+  }
+  const score = totalWeight > 0 ? Math.round((weightedSum / totalWeight) * 100) : 0;
+  const grade = score >= 90 ? "A" : score >= 80 ? "B" : score >= 70 ? "C" : score >= 60 ? "D" : "F";
+  return { score, grade, breakdown };
+}
+export async function evaluateStory(storyId, cwd, config = {}) {
+  const results = {};
+  if (config.testCommand) {
+    try {
+      await execFileAsync("sh", ["-c", config.testCommand], { cwd, timeout: 120_000 });
+      results.tests = 10;
+    } catch {
+      results.tests = 0;
+    }
+  }
+  if (config.typecheckCommand) {
+    try {
+      await execFileAsync("sh", ["-c", config.typecheckCommand], { cwd, timeout: 60_000 });
+      results.typecheck = 10;
+    } catch (err) {
+      const errorCount = (err.stdout || "").split("\n").filter(l => l.includes("error")).length;
+      results.typecheck = Math.max(0, 10 - errorCount);
+    }
+  }
+  if (config.lintCommand) {
+    try {
+      await execFileAsync("sh", ["-c", config.lintCommand], { cwd, timeout: 60_000 });
+      results.lint = 10;
+    } catch (err) {
+      const warnings = (err.stdout || "").split("\n").filter(l => l.includes("warning") || l.includes("error")).length;
+      results.lint = Math.max(0, 10 - warnings);
+    }
+  }
+  if (config.verifyScript) {
+    try {
+      await execFileAsync("sh", ["-c", config.verifyScript], { cwd, timeout: 60_000 });
+      results.verify = 10;
+    } catch {
+      results.verify = 0;
+    }
+  }
+  try {
+    const { stdout } = await execFileAsync("git", ["diff", "--stat", "HEAD~1"], { cwd, timeout: 10_000 });
+    const lines = stdout.trim().split("\n");
+    const lastLine = lines[lines.length - 1] || "";
+    const match = lastLine.match(/(\d+) insertion.+?(\d+) deletion/);
+    if (match) {
+      const total = parseInt(match[1]) + parseInt(match[2]);
+      results.diff = total < 100 ? 10 : total < 300 ? 8 : total < 500 ? 6 : total < 1000 ? 4 : 2;
+    }
+  } catch {
+    // No git diff available
+  }
+  const customPath = join(cwd, ".loophaus", "quality.mjs");
+  try {
+    await stat(customPath);
+    const mod = await import(customPath);
+    if (typeof mod.evaluate === "function") {
+      const customResult = await mod.evaluate(storyId, cwd);
+      results.custom = typeof customResult === "number" ? customResult : (customResult?.score ?? 0);
+    }
+  } catch {
+    // No custom evaluator
+  }
+  return { storyId, results, ...scoreStory(results) };
+}
+export async function logResult(entry, cwd) {
+  const { appendFile, mkdir } = await import("node:fs/promises");
+  const tsvPath = join(cwd || process.cwd(), ".loophaus", "results.tsv");
+  await mkdir(join(cwd || process.cwd(), ".loophaus"), { recursive: true });
+  try {
+    await stat(tsvPath);
+  } catch {
+    await appendFile(tsvPath, "story_id\tattempt\tscore\tstatus\tdescription\tcommit\n", "utf-8");
+  }
+  const line = `${entry.storyId}\t${entry.attempt}\t${entry.score}\t${entry.status}\t${entry.description}\t${entry.commit || ""}\n`;
+  await appendFile(tsvPath, line, "utf-8");
+}
+export async function readResults(cwd) {
+  const tsvPath = join(cwd || process.cwd(), ".loophaus", "results.tsv");
+  try {
+    const raw = await readFile(tsvPath, "utf-8");
+    const lines = raw.trim().split("\n").slice(1);
+    return lines.map(line => {
+      const [storyId, attempt, score, status, description, commit] = line.split("\t");
+      return { storyId, attempt: parseInt(attempt), score: parseInt(score), status, description, commit };
+    });
+  } catch {
+    return [];
+  }
+}

package/core/refine-loop.mjs ADDED Viewed

@@ -0,0 +1,29 @@
+// core/refine-loop.mjs
+// autoresearch keep/discard pattern for code quality improvement
+export function shouldKeep(newScore, baselineScore) {
+  return newScore > baselineScore;
+}
+export function generateFeedback(evaluation, previousAttempts = []) {
+  const { storyId, score, grade, breakdown } = evaluation;
+  const failedCriteria = Object.entries(breakdown)
+    .filter(([_, v]) => v < 7)
+    .map(([k, v]) => `${k}: ${v}/10`);
+  let prompt = `Story ${storyId} quality: ${score}/100 (${grade}).\n`;
+  if (failedCriteria.length > 0) {
+    prompt += `Weak areas: ${failedCriteria.join(", ")}.\n`;
+  }
+  if (previousAttempts.length > 0) {
+    prompt += `Previous attempts: ${previousAttempts.map(a => `attempt ${a.attempt}: ${a.score} (${a.status})`).join(", ")}.\n`;
+  }
+  prompt += `Improve the implementation. Focus on the weak areas. Try a different approach if the same strategy keeps failing.`;
+  return prompt;
+}
+export function identifyRefinementTargets(evaluations, threshold = 80) {
+  return evaluations
+    .filter(e => e.score < threshold)
+    .sort((a, b) => a.score - b.score);
+}

package/core/session.mjs ADDED Viewed

@@ -0,0 +1,66 @@
+import { readFile, writeFile, readdir, mkdir } from "node:fs/promises";
+import { join } from "node:path";
+function getSessionsDir(cwd) {
+  return join(cwd || process.cwd(), ".loophaus", "sessions");
+}
+export async function saveCheckpoint(sessionId, data, cwd) {
+  const dir = getSessionsDir(cwd);
+  await mkdir(dir, { recursive: true });
+  const checkpoint = {
+    sessionId,
+    savedAt: new Date().toISOString(),
+    ...data,
+  };
+  await writeFile(join(dir, `${sessionId}.json`), JSON.stringify(checkpoint, null, 2), "utf-8");
+  return checkpoint;
+}
+export async function loadCheckpoint(sessionId, cwd) {
+  const dir = getSessionsDir(cwd);
+  try {
+    const raw = await readFile(join(dir, `${sessionId}.json`), "utf-8");
+    return JSON.parse(raw);
+  } catch {
+    return null;
+  }
+}
+export async function listSessions(cwd) {
+  const dir = getSessionsDir(cwd);
+  try {
+    const files = await readdir(dir);
+    const sessions = [];
+    for (const file of files) {
+      if (!file.endsWith(".json")) continue;
+      try {
+        const raw = await readFile(join(dir, file), "utf-8");
+        const data = JSON.parse(raw);
+        sessions.push(data);
+      } catch { /* skip malformed */ }
+    }
+    return sessions.sort((a, b) => new Date(b.savedAt).getTime() - new Date(a.savedAt).getTime());
+  } catch {
+    return [];
+  }
+}
+export async function resumeSession(sessionId, cwd) {
+  const checkpoint = await loadCheckpoint(sessionId, cwd);
+  if (!checkpoint) return null;
+  const { write } = await import("../store/state-store.mjs");
+  const state = {
+    active: true,
+    prompt: checkpoint.prompt || "",
+    completionPromise: checkpoint.completionPromise || "TADA",
+    maxIterations: checkpoint.maxIterations || 20,
+    currentIteration: checkpoint.currentIteration || 0,
+    sessionId: checkpoint.sessionId,
+    name: checkpoint.name || "",
+    startedAt: checkpoint.startedAt || new Date().toISOString(),
+  };
+  await write(state, cwd, checkpoint.name);
+  return state;
+}

package/core/validate.mjs CHANGED Viewed

@@ -14,6 +14,9 @@ const STATE_OPTIONAL = {
   verifyScript: "string",
   startedAt: "string",
   cost: "object",
+  qualityThreshold: "number",
+  maxRefineAttempts: "number",
+  qualityConfig: "object",
 };
 export function validateState(obj) {

package/hooks/stop-hook.mjs CHANGED Viewed

@@ -4,6 +4,32 @@ import { evaluateStopHook } from "../core/engine.mjs";
 import { getLastAssistantText, hasPendingStories } from "../core/io-helpers.mjs";
 import { read as readState, write as writeState } from "../store/state-store.mjs";
 import { logEvents } from "../core/event-logger.mjs";
+import { join } from "node:path";
+async function runStoryTests(cwd) {
+  const { readFile } = await import("node:fs/promises");
+  const { execFile } = await import("node:child_process");
+  const { promisify } = await import("node:util");
+  const execFileAsync = promisify(execFile);
+  const prdPath = join(cwd, "prd.json");
+  try {
+    const prd = JSON.parse(await readFile(prdPath, "utf-8"));
+    if (!Array.isArray(prd.userStories)) return [];
+    const results = [];
+    for (const story of prd.userStories) {
+      if (!story.testCommand || story.passes) continue;
+      try {
+        await execFileAsync("sh", ["-c", story.testCommand], { cwd, timeout: 60_000 });
+        results.push({ storyId: story.id, passed: true });
+      } catch (err) {
+        results.push({ storyId: story.id, passed: false, error: err.message });
+      }
+    }
+    return results;
+  } catch { return []; }
+}
 async function readStdin() {
   const chunks = [];
@@ -39,11 +65,21 @@ async function main() {
     }
   }
+  // Run story tests if prd.json has testCommand fields
+  const testResults = await runStoryTests(cwd);
+  // Evaluate loop policy
+  const { loadPolicy, evaluatePolicy } = await import("../core/policy.mjs");
+  const policy = await loadPolicy(cwd);
+  const policyResult = evaluatePolicy(policy, state, { totalCost: 0, errorCount: 0 });
   const input = {
     ...hookInput,
     last_assistant_text: lastText,
     has_pending_stories: pending,
     verify_result: verifyResult,
+    test_results: testResults,
+    policy_result: policyResult,
   };
   const result = evaluateStopHook(input, state);
@@ -51,6 +87,19 @@ async function main() {
   await writeState(result.nextState, cwd);
   await logEvents(result.events, { adapter: "auto", loop_id: state.sessionId || "unknown" }, cwd);
+  // Save session checkpoint (best-effort)
+  try {
+    const { saveCheckpoint } = await import("../core/session.mjs");
+    await saveCheckpoint(result.nextState.sessionId || `auto-${Date.now()}`, {
+      prompt: result.nextState.prompt,
+      completionPromise: result.nextState.completionPromise,
+      maxIterations: result.nextState.maxIterations,
+      currentIteration: result.nextState.currentIteration,
+      name: result.nextState.name,
+      startedAt: result.nextState.startedAt,
+    }, cwd);
+  } catch { /* best-effort */ }
   if (result.message) process.stderr.write(result.message + "\n");
   if (result.output) process.stdout.write(JSON.stringify(result.output));
   process.exit(0);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@graypark/loophaus",
-  "version": "3.2.0",
+  "version": "3.4.0",
   "type": "module",
   "description": "loophaus — Control plane for coding agents. Iterative dev loops with multi-agent orchestration.",
   "license": "MIT",

package/platforms/codex-cli/installer.mjs CHANGED Viewed

@@ -112,6 +112,14 @@ Score >= 3: parallel mode (worktrees by group). Score < 3: sequential mode.
 ## Phase 4B: Sequential Execution (score < 3)
 Create \`.loophaus/state.json\` and work through stories one at a time.
+## Phase 5: Evaluate
+Score each story 0-100 (tests, typecheck, lint, verify, diff size). Record in \`.loophaus/results.tsv\`.
+## Phase 6: Refine Loop (autoresearch pattern)
+For stories below quality threshold (default 80), loop up to 3 attempts:
+1. Checkpoint, 2. Re-implement weak areas, 3. Re-evaluate.
+Keep if improved, discard (git reset) if not. Best-effort after max attempts.
 ## Rules
 - Present PRD for user approval before execution
 - Show parallelism score and recommendation

package/platforms/kiro-cli/installer.mjs CHANGED Viewed

@@ -84,6 +84,14 @@ Score >= 3: parallel (worktrees). Score < 3: sequential.
 Parallel: create worktrees per group, distribute stories, run simultaneously, merge back.
 Sequential: single loop through stories in order.
+## Phase 5: Evaluate
+Score each story 0-100 (tests, typecheck, lint, verify, diff size). Record in \`.loophaus/results.tsv\`.
+## Phase 6: Refine Loop (autoresearch pattern)
+For stories below quality threshold (default 80), loop up to 3 attempts:
+1. Checkpoint, 2. Re-implement weak areas, 3. Re-evaluate.
+Keep if improved, discard (git reset) if not. Best-effort after max attempts.
 Rules: present PRD for approval, show parallelism score, stop on merge conflicts.
 `,
   },

package/skills/ralph-claude-interview/SKILL.md CHANGED Viewed

@@ -30,6 +30,8 @@ Ask **concise questions** for missing items. Max 3-5 per round, one round only.
 | **Constraints**           | Must not break existing tests? Library restrictions? |
 | **When stuck**            | Document? Skip? Suggest alternative?                 |
 | **Parallelism potential** | Multiple services? Independent file groups?          |
+| **Quality verification**  | What verification commands? (npm test, npx tsc, etc.) |
+| **Quality threshold**     | What quality threshold? (default: 80/100)            |
 ## Phase Design

package/store/state-store.mjs CHANGED Viewed

@@ -9,6 +9,9 @@ const DEFAULT_STATE = {
   maxIterations: 20,
   currentIteration: 0,
   sessionId: "",
+  qualityThreshold: 80,
+  maxRefineAttempts: 3,
+  qualityConfig: null,
 };
 export function getStatePath(cwd, name) {