npm - @graypark/loophaus - Versions diffs - 3.3.0 → 3.4.0 - Mend

@graypark/loophaus 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/bin/loophaus.mjs +45 -1
package/commands/loop-plan.md +53 -1
package/core/events.mjs +4 -0
package/core/quality-scorer.mjs +136 -0
package/core/refine-loop.mjs +29 -0
package/core/validate.mjs +3 -0
package/package.json +1 -1
package/platforms/codex-cli/installer.mjs +8 -0
package/platforms/kiro-cli/installer.mjs +8 -0
package/skills/ralph-claude-interview/SKILL.md +2 -0
package/store/state-store.mjs +3 -0

package/bin/loophaus.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 // loophaus CLI — install, status, stats, uninstall
-import { resolve, dirname } from "node:path";
+import { resolve, dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 import { access } from "node:fs/promises";
@@ -46,6 +46,7 @@ Usage:
   npx @graypark/loophaus loops
   npx @graypark/loophaus worktree <create|remove|list>
   npx @graypark/loophaus parallel <prd.json> [--count N] [--base branch]
+  npx @graypark/loophaus quality [--story US-001]
   npx @graypark/loophaus sessions
   npx @graypark/loophaus resume <session-id>
   npx @graypark/loophaus --version
@@ -448,6 +449,48 @@ async function runParallelCmd() {
   }
 }
+async function runQuality() {
+  const storyId = getFlag("--story");
+  const cwd = process.cwd();
+  if (storyId) {
+    const { evaluateStory } = await import("../core/quality-scorer.mjs");
+    const { read } = await import("../store/state-store.mjs");
+    const state = await read(cwd);
+    const config = state.qualityConfig || {};
+    if (!config.typecheckCommand) {
+      try { await access(join(cwd, "tsconfig.json")); config.typecheckCommand = "npx tsc --noEmit"; } catch {}
+    }
+    const result = await evaluateStory(storyId, cwd, config);
+    console.log(`Quality: ${storyId}`);
+    console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
+    console.log(`Score: ${result.score}/100 (${result.grade})`);
+    for (const [k, v] of Object.entries(result.breakdown)) {
+      const bar = "\u2588".repeat(v) + "\u2591".repeat(10 - v);
+      console.log(`  ${k.padEnd(10)} ${bar} ${v}/10`);
+    }
+  } else {
+    const { readResults } = await import("../core/quality-scorer.mjs");
+    const results = await readResults(cwd);
+    if (results.length === 0) { console.log("No quality results yet. Run /loop-plan first."); return; }
+    console.log("Quality Results");
+    console.log("\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550");
+    const byStory = {};
+    for (const r of results) {
+      if (!byStory[r.storyId]) byStory[r.storyId] = [];
+      byStory[r.storyId].push(r);
+    }
+    for (const [sid, attempts] of Object.entries(byStory)) {
+      const best = attempts.reduce((a, b) => a.score > b.score ? a : b);
+      const icon = best.status === "keep" ? "\u2713" : best.status === "discard" ? "\u2717" : "~";
+      console.log(`  ${icon} ${sid}  score: ${best.score}  (${attempts.length} attempts)`);
+    }
+  }
+}
 try {
   switch (command) {
     case "install": await runInstall(); break;
@@ -460,6 +503,7 @@ try {
     case "compare": await runCompare(); break;
     case "worktree": await runWorktree(); break;
     case "parallel": await runParallelCmd(); break;
+    case "quality": await runQuality(); break;
     case "sessions": await runSessions(); break;
     case "resume": await runResume(); break;
     default:

package/commands/loop-plan.md CHANGED Viewed

@@ -172,7 +172,59 @@ Single loop, no worktrees:
 3. Each iteration: implement one story, verify, commit, update prd.json.
 4. Output `<promise>TASK COMPLETE</promise>` when ALL stories pass.
-## Phase 5: Summary Report
+## Phase 5: Evaluate
+After all stories are implemented (parallel or sequential), evaluate each:
+For each story in prd.json:
+1. Run testCommand if defined
+2. Run typecheck if project has tsconfig.json: `npx tsc --noEmit`
+3. Run lint if project has eslint config: `npx eslint . --quiet`
+4. Check .loophaus/verify.sh if exists
+5. Analyze git diff size
+Score each story 0-100. Record in `.loophaus/results.tsv`.
+Display quality dashboard:
+```
+Quality Evaluation
+──────────────────
+  US-001  Add login API        score: 65  (D) <- needs refinement
+  US-002  Add auth middleware   score: 92  (A) ✓
+  US-003  Add login UI         score: 45  (F) <- needs refinement
+Overall: 67/100 — threshold: 80
+Stories needing refinement: 2
+```
+## Phase 6: Refine Loop (autoresearch pattern)
+For each story below the quality threshold (default: 80):
+LOOP (max 3 attempts per story):
+  1. Git checkpoint: `git add -A && git commit -m "checkpoint: <story-id> attempt <N>"`
+  2. Read the quality feedback (which criteria failed, error messages)
+  3. Re-implement with a different approach, focusing on weak areas
+  4. Re-evaluate (same criteria as Phase 5)
+  5. If score improved -> KEEP (advance the commit)
+     If score same or worse -> DISCARD (git reset --hard to checkpoint)
+  6. Record attempt in .loophaus/results.tsv
+  7. If score >= threshold -> DONE with this story
+     If max attempts reached -> move on (best-effort)
+After all refinements:
+```
+Refinement Complete
+───────────────────
+  US-001  65 -> 82 (B) ✓  (2 attempts)
+  US-003  45 -> 78 (C)    (3 attempts, best effort)
+Overall: 84/100 — PASS
+```
+CRITICAL: The refine loop uses git reset --hard to discard bad attempts. This is the autoresearch pattern — safe because we always checkpoint first.
+## Phase 7: Summary Report
 After completion (parallel or sequential), output:

package/core/events.mjs CHANGED Viewed

@@ -15,6 +15,10 @@ export const EventType = {
   CHECKPOINT: "checkpoint",
   ERROR: "error",
   STATE_CHANGE: "state_change",
+  QUALITY_SCORE: "quality_score",
+  REFINE_ATTEMPT: "refine_attempt",
+  REFINE_KEEP: "refine_keep",
+  REFINE_DISCARD: "refine_discard",
 };
 export function filterByType(events, type) {

package/core/quality-scorer.mjs ADDED Viewed

@@ -0,0 +1,136 @@
+// core/quality-scorer.mjs
+// Quality scoring for story implementations (autoresearch pattern: val_bpb -> quality score)
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+import { readFile, stat } from "node:fs/promises";
+import { join } from "node:path";
+const execFileAsync = promisify(execFile);
+const CRITERIA = {
+  tests:     { weight: 3, max: 10 },
+  typecheck: { weight: 2, max: 10 },
+  lint:      { weight: 1, max: 10 },
+  verify:    { weight: 2, max: 10 },
+  diff:      { weight: 1, max: 10 },
+  custom:    { weight: 1, max: 10 },
+};
+export function scoreStory(results) {
+  let totalWeight = 0;
+  let weightedSum = 0;
+  const breakdown = {};
+  for (const [key, config] of Object.entries(CRITERIA)) {
+    if (results[key] === undefined || results[key] === null) continue;
+    const value = typeof results[key] === "number" ? results[key] : (results[key].score ?? 0);
+    const clamped = Math.max(0, Math.min(config.max, value));
+    breakdown[key] = clamped;
+    weightedSum += clamped * config.weight;
+    totalWeight += config.max * config.weight;
+  }
+  const score = totalWeight > 0 ? Math.round((weightedSum / totalWeight) * 100) : 0;
+  const grade = score >= 90 ? "A" : score >= 80 ? "B" : score >= 70 ? "C" : score >= 60 ? "D" : "F";
+  return { score, grade, breakdown };
+}
+export async function evaluateStory(storyId, cwd, config = {}) {
+  const results = {};
+  if (config.testCommand) {
+    try {
+      await execFileAsync("sh", ["-c", config.testCommand], { cwd, timeout: 120_000 });
+      results.tests = 10;
+    } catch {
+      results.tests = 0;
+    }
+  }
+  if (config.typecheckCommand) {
+    try {
+      await execFileAsync("sh", ["-c", config.typecheckCommand], { cwd, timeout: 60_000 });
+      results.typecheck = 10;
+    } catch (err) {
+      const errorCount = (err.stdout || "").split("\n").filter(l => l.includes("error")).length;
+      results.typecheck = Math.max(0, 10 - errorCount);
+    }
+  }
+  if (config.lintCommand) {
+    try {
+      await execFileAsync("sh", ["-c", config.lintCommand], { cwd, timeout: 60_000 });
+      results.lint = 10;
+    } catch (err) {
+      const warnings = (err.stdout || "").split("\n").filter(l => l.includes("warning") || l.includes("error")).length;
+      results.lint = Math.max(0, 10 - warnings);
+    }
+  }
+  if (config.verifyScript) {
+    try {
+      await execFileAsync("sh", ["-c", config.verifyScript], { cwd, timeout: 60_000 });
+      results.verify = 10;
+    } catch {
+      results.verify = 0;
+    }
+  }
+  try {
+    const { stdout } = await execFileAsync("git", ["diff", "--stat", "HEAD~1"], { cwd, timeout: 10_000 });
+    const lines = stdout.trim().split("\n");
+    const lastLine = lines[lines.length - 1] || "";
+    const match = lastLine.match(/(\d+) insertion.+?(\d+) deletion/);
+    if (match) {
+      const total = parseInt(match[1]) + parseInt(match[2]);
+      results.diff = total < 100 ? 10 : total < 300 ? 8 : total < 500 ? 6 : total < 1000 ? 4 : 2;
+    }
+  } catch {
+    // No git diff available
+  }
+  const customPath = join(cwd, ".loophaus", "quality.mjs");
+  try {
+    await stat(customPath);
+    const mod = await import(customPath);
+    if (typeof mod.evaluate === "function") {
+      const customResult = await mod.evaluate(storyId, cwd);
+      results.custom = typeof customResult === "number" ? customResult : (customResult?.score ?? 0);
+    }
+  } catch {
+    // No custom evaluator
+  }
+  return { storyId, results, ...scoreStory(results) };
+}
+export async function logResult(entry, cwd) {
+  const { appendFile, mkdir } = await import("node:fs/promises");
+  const tsvPath = join(cwd || process.cwd(), ".loophaus", "results.tsv");
+  await mkdir(join(cwd || process.cwd(), ".loophaus"), { recursive: true });
+  try {
+    await stat(tsvPath);
+  } catch {
+    await appendFile(tsvPath, "story_id\tattempt\tscore\tstatus\tdescription\tcommit\n", "utf-8");
+  }
+  const line = `${entry.storyId}\t${entry.attempt}\t${entry.score}\t${entry.status}\t${entry.description}\t${entry.commit || ""}\n`;
+  await appendFile(tsvPath, line, "utf-8");
+}
+export async function readResults(cwd) {
+  const tsvPath = join(cwd || process.cwd(), ".loophaus", "results.tsv");
+  try {
+    const raw = await readFile(tsvPath, "utf-8");
+    const lines = raw.trim().split("\n").slice(1);
+    return lines.map(line => {
+      const [storyId, attempt, score, status, description, commit] = line.split("\t");
+      return { storyId, attempt: parseInt(attempt), score: parseInt(score), status, description, commit };
+    });
+  } catch {
+    return [];
+  }
+}

package/core/refine-loop.mjs ADDED Viewed

@@ -0,0 +1,29 @@
+// core/refine-loop.mjs
+// autoresearch keep/discard pattern for code quality improvement
+export function shouldKeep(newScore, baselineScore) {
+  return newScore > baselineScore;
+}
+export function generateFeedback(evaluation, previousAttempts = []) {
+  const { storyId, score, grade, breakdown } = evaluation;
+  const failedCriteria = Object.entries(breakdown)
+    .filter(([_, v]) => v < 7)
+    .map(([k, v]) => `${k}: ${v}/10`);
+  let prompt = `Story ${storyId} quality: ${score}/100 (${grade}).\n`;
+  if (failedCriteria.length > 0) {
+    prompt += `Weak areas: ${failedCriteria.join(", ")}.\n`;
+  }
+  if (previousAttempts.length > 0) {
+    prompt += `Previous attempts: ${previousAttempts.map(a => `attempt ${a.attempt}: ${a.score} (${a.status})`).join(", ")}.\n`;
+  }
+  prompt += `Improve the implementation. Focus on the weak areas. Try a different approach if the same strategy keeps failing.`;
+  return prompt;
+}
+export function identifyRefinementTargets(evaluations, threshold = 80) {
+  return evaluations
+    .filter(e => e.score < threshold)
+    .sort((a, b) => a.score - b.score);
+}

package/core/validate.mjs CHANGED Viewed

@@ -14,6 +14,9 @@ const STATE_OPTIONAL = {
   verifyScript: "string",
   startedAt: "string",
   cost: "object",
+  qualityThreshold: "number",
+  maxRefineAttempts: "number",
+  qualityConfig: "object",
 };
 export function validateState(obj) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@graypark/loophaus",
-  "version": "3.3.0",
+  "version": "3.4.0",
   "type": "module",
   "description": "loophaus — Control plane for coding agents. Iterative dev loops with multi-agent orchestration.",
   "license": "MIT",

package/platforms/codex-cli/installer.mjs CHANGED Viewed

@@ -112,6 +112,14 @@ Score >= 3: parallel mode (worktrees by group). Score < 3: sequential mode.
 ## Phase 4B: Sequential Execution (score < 3)
 Create \`.loophaus/state.json\` and work through stories one at a time.
+## Phase 5: Evaluate
+Score each story 0-100 (tests, typecheck, lint, verify, diff size). Record in \`.loophaus/results.tsv\`.
+## Phase 6: Refine Loop (autoresearch pattern)
+For stories below quality threshold (default 80), loop up to 3 attempts:
+1. Checkpoint, 2. Re-implement weak areas, 3. Re-evaluate.
+Keep if improved, discard (git reset) if not. Best-effort after max attempts.
 ## Rules
 - Present PRD for user approval before execution
 - Show parallelism score and recommendation

package/platforms/kiro-cli/installer.mjs CHANGED Viewed

@@ -84,6 +84,14 @@ Score >= 3: parallel (worktrees). Score < 3: sequential.
 Parallel: create worktrees per group, distribute stories, run simultaneously, merge back.
 Sequential: single loop through stories in order.
+## Phase 5: Evaluate
+Score each story 0-100 (tests, typecheck, lint, verify, diff size). Record in \`.loophaus/results.tsv\`.
+## Phase 6: Refine Loop (autoresearch pattern)
+For stories below quality threshold (default 80), loop up to 3 attempts:
+1. Checkpoint, 2. Re-implement weak areas, 3. Re-evaluate.
+Keep if improved, discard (git reset) if not. Best-effort after max attempts.
 Rules: present PRD for approval, show parallelism score, stop on merge conflicts.
 `,
   },

package/skills/ralph-claude-interview/SKILL.md CHANGED Viewed

@@ -30,6 +30,8 @@ Ask **concise questions** for missing items. Max 3-5 per round, one round only.
 | **Constraints**           | Must not break existing tests? Library restrictions? |
 | **When stuck**            | Document? Skip? Suggest alternative?                 |
 | **Parallelism potential** | Multiple services? Independent file groups?          |
+| **Quality verification**  | What verification commands? (npm test, npx tsc, etc.) |
+| **Quality threshold**     | What quality threshold? (default: 80/100)            |
 ## Phase Design

package/store/state-store.mjs CHANGED Viewed

@@ -9,6 +9,9 @@ const DEFAULT_STATE = {
   maxIterations: 20,
   currentIteration: 0,
   sessionId: "",
+  qualityThreshold: 80,
+  maxRefineAttempts: 3,
+  qualityConfig: null,
 };
 export function getStatePath(cwd, name) {