@graypark/loophaus 3.2.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/loophaus.mjs +71 -1
- package/commands/loop-plan.md +53 -1
- package/core/engine.mjs +23 -0
- package/core/events.mjs +4 -0
- package/core/policy.mjs +58 -0
- package/core/quality-scorer.mjs +136 -0
- package/core/refine-loop.mjs +29 -0
- package/core/session.mjs +66 -0
- package/core/validate.mjs +3 -0
- package/hooks/stop-hook.mjs +49 -0
- package/package.json +1 -1
- package/platforms/codex-cli/installer.mjs +8 -0
- package/platforms/kiro-cli/installer.mjs +8 -0
- package/skills/ralph-claude-interview/SKILL.md +2 -0
- package/store/state-store.mjs +3 -0
package/bin/loophaus.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// loophaus CLI — install, status, stats, uninstall
|
|
3
3
|
|
|
4
|
-
import { resolve, dirname } from "node:path";
|
|
4
|
+
import { resolve, dirname, join } from "node:path";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
6
|
import { access } from "node:fs/promises";
|
|
7
7
|
|
|
@@ -46,6 +46,9 @@ Usage:
|
|
|
46
46
|
npx @graypark/loophaus loops
|
|
47
47
|
npx @graypark/loophaus worktree <create|remove|list>
|
|
48
48
|
npx @graypark/loophaus parallel <prd.json> [--count N] [--base branch]
|
|
49
|
+
npx @graypark/loophaus quality [--story US-001]
|
|
50
|
+
npx @graypark/loophaus sessions
|
|
51
|
+
npx @graypark/loophaus resume <session-id>
|
|
49
52
|
npx @graypark/loophaus --version
|
|
50
53
|
|
|
51
54
|
Hosts:
|
|
@@ -408,6 +411,28 @@ async function runWorktree() {
|
|
|
408
411
|
}
|
|
409
412
|
}
|
|
410
413
|
|
|
414
|
+
async function runSessions() {
|
|
415
|
+
const { listSessions } = await import("../core/session.mjs");
|
|
416
|
+
const sessions = await listSessions();
|
|
417
|
+
if (sessions.length === 0) { console.log("No saved sessions."); return; }
|
|
418
|
+
console.log("Sessions");
|
|
419
|
+
console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
420
|
+
for (const s of sessions) {
|
|
421
|
+
const age = Math.round((Date.now() - new Date(s.savedAt).getTime()) / 60000);
|
|
422
|
+
console.log(` ${s.sessionId} iter=${s.currentIteration || 0} ${age}m ago`);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
async function runResume() {
|
|
427
|
+
const id = args[1];
|
|
428
|
+
if (!id) { console.log("Usage: loophaus resume <session-id>"); return; }
|
|
429
|
+
const { resumeSession } = await import("../core/session.mjs");
|
|
430
|
+
const state = await resumeSession(id);
|
|
431
|
+
if (!state) { console.log(`Session not found: ${id}`); return; }
|
|
432
|
+
console.log(`Resumed session ${id} at iteration ${state.currentIteration}`);
|
|
433
|
+
console.log(`Loop is now active. The stop hook will continue from here.`);
|
|
434
|
+
}
|
|
435
|
+
|
|
411
436
|
async function runParallelCmd() {
|
|
412
437
|
const prdPath = args[1] || "prd.json";
|
|
413
438
|
const count = parseInt(getFlag("--count") || "2", 10);
|
|
@@ -424,6 +449,48 @@ async function runParallelCmd() {
|
|
|
424
449
|
}
|
|
425
450
|
}
|
|
426
451
|
|
|
452
|
+
async function runQuality() {
|
|
453
|
+
const storyId = getFlag("--story");
|
|
454
|
+
const cwd = process.cwd();
|
|
455
|
+
|
|
456
|
+
if (storyId) {
|
|
457
|
+
const { evaluateStory } = await import("../core/quality-scorer.mjs");
|
|
458
|
+
const { read } = await import("../store/state-store.mjs");
|
|
459
|
+
const state = await read(cwd);
|
|
460
|
+
const config = state.qualityConfig || {};
|
|
461
|
+
|
|
462
|
+
if (!config.typecheckCommand) {
|
|
463
|
+
try { await access(join(cwd, "tsconfig.json")); config.typecheckCommand = "npx tsc --noEmit"; } catch {}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const result = await evaluateStory(storyId, cwd, config);
|
|
467
|
+
console.log(`Quality: ${storyId}`);
|
|
468
|
+
console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
469
|
+
console.log(`Score: ${result.score}/100 (${result.grade})`);
|
|
470
|
+
for (const [k, v] of Object.entries(result.breakdown)) {
|
|
471
|
+
const bar = "\u2588".repeat(v) + "\u2591".repeat(10 - v);
|
|
472
|
+
console.log(` ${k.padEnd(10)} ${bar} ${v}/10`);
|
|
473
|
+
}
|
|
474
|
+
} else {
|
|
475
|
+
const { readResults } = await import("../core/quality-scorer.mjs");
|
|
476
|
+
const results = await readResults(cwd);
|
|
477
|
+
if (results.length === 0) { console.log("No quality results yet. Run /loop-plan first."); return; }
|
|
478
|
+
|
|
479
|
+
console.log("Quality Results");
|
|
480
|
+
console.log("\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550");
|
|
481
|
+
const byStory = {};
|
|
482
|
+
for (const r of results) {
|
|
483
|
+
if (!byStory[r.storyId]) byStory[r.storyId] = [];
|
|
484
|
+
byStory[r.storyId].push(r);
|
|
485
|
+
}
|
|
486
|
+
for (const [sid, attempts] of Object.entries(byStory)) {
|
|
487
|
+
const best = attempts.reduce((a, b) => a.score > b.score ? a : b);
|
|
488
|
+
const icon = best.status === "keep" ? "\u2713" : best.status === "discard" ? "\u2717" : "~";
|
|
489
|
+
console.log(` ${icon} ${sid} score: ${best.score} (${attempts.length} attempts)`);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
427
494
|
try {
|
|
428
495
|
switch (command) {
|
|
429
496
|
case "install": await runInstall(); break;
|
|
@@ -436,6 +503,9 @@ try {
|
|
|
436
503
|
case "compare": await runCompare(); break;
|
|
437
504
|
case "worktree": await runWorktree(); break;
|
|
438
505
|
case "parallel": await runParallelCmd(); break;
|
|
506
|
+
case "quality": await runQuality(); break;
|
|
507
|
+
case "sessions": await runSessions(); break;
|
|
508
|
+
case "resume": await runResume(); break;
|
|
439
509
|
default:
|
|
440
510
|
if (command.startsWith("-")) {
|
|
441
511
|
await runInstall();
|
package/commands/loop-plan.md
CHANGED
|
@@ -172,7 +172,59 @@ Single loop, no worktrees:
|
|
|
172
172
|
3. Each iteration: implement one story, verify, commit, update prd.json.
|
|
173
173
|
4. Output `<promise>TASK COMPLETE</promise>` when ALL stories pass.
|
|
174
174
|
|
|
175
|
-
## Phase 5:
|
|
175
|
+
## Phase 5: Evaluate
|
|
176
|
+
|
|
177
|
+
After all stories are implemented (parallel or sequential), evaluate each:
|
|
178
|
+
|
|
179
|
+
For each story in prd.json:
|
|
180
|
+
1. Run testCommand if defined
|
|
181
|
+
2. Run typecheck if project has tsconfig.json: `npx tsc --noEmit`
|
|
182
|
+
3. Run lint if project has eslint config: `npx eslint . --quiet`
|
|
183
|
+
4. Check .loophaus/verify.sh if exists
|
|
184
|
+
5. Analyze git diff size
|
|
185
|
+
|
|
186
|
+
Score each story 0-100. Record in `.loophaus/results.tsv`.
|
|
187
|
+
|
|
188
|
+
Display quality dashboard:
|
|
189
|
+
```
|
|
190
|
+
Quality Evaluation
|
|
191
|
+
──────────────────
|
|
192
|
+
US-001 Add login API score: 65 (D) <- needs refinement
|
|
193
|
+
US-002 Add auth middleware score: 92 (A) ✓
|
|
194
|
+
US-003 Add login UI score: 45 (F) <- needs refinement
|
|
195
|
+
|
|
196
|
+
Overall: 67/100 — threshold: 80
|
|
197
|
+
Stories needing refinement: 2
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Phase 6: Refine Loop (autoresearch pattern)
|
|
201
|
+
|
|
202
|
+
For each story below the quality threshold (default: 80):
|
|
203
|
+
|
|
204
|
+
LOOP (max 3 attempts per story):
|
|
205
|
+
1. Git checkpoint: `git add -A && git commit -m "checkpoint: <story-id> attempt <N>"`
|
|
206
|
+
2. Read the quality feedback (which criteria failed, error messages)
|
|
207
|
+
3. Re-implement with a different approach, focusing on weak areas
|
|
208
|
+
4. Re-evaluate (same criteria as Phase 5)
|
|
209
|
+
5. If score improved -> KEEP (advance the commit)
|
|
210
|
+
If score same or worse -> DISCARD (git reset --hard to checkpoint)
|
|
211
|
+
6. Record attempt in .loophaus/results.tsv
|
|
212
|
+
7. If score >= threshold -> DONE with this story
|
|
213
|
+
If max attempts reached -> move on (best-effort)
|
|
214
|
+
|
|
215
|
+
After all refinements:
|
|
216
|
+
```
|
|
217
|
+
Refinement Complete
|
|
218
|
+
───────────────────
|
|
219
|
+
US-001 65 -> 82 (B) ✓ (2 attempts)
|
|
220
|
+
US-003 45 -> 78 (C) (3 attempts, best effort)
|
|
221
|
+
|
|
222
|
+
Overall: 84/100 — PASS
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
CRITICAL: The refine loop uses git reset --hard to discard bad attempts. This is the autoresearch pattern — safe because we always checkpoint first.
|
|
226
|
+
|
|
227
|
+
## Phase 7: Summary Report
|
|
176
228
|
|
|
177
229
|
After completion (parallel or sequential), output:
|
|
178
230
|
|
package/core/engine.mjs
CHANGED
|
@@ -27,6 +27,19 @@ export function evaluateStopHook(input, state) {
|
|
|
27
27
|
};
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
if (input.policy_result && input.policy_result.shouldStop) {
|
|
31
|
+
nextState.active = false;
|
|
32
|
+
events.push({ event: "stop", reason: "policy_violation", violations: input.policy_result.violations });
|
|
33
|
+
const reasons = input.policy_result.violations.map(v => `${v.type}: ${v.current}/${v.limit}`).join(", ");
|
|
34
|
+
return {
|
|
35
|
+
decision: "allow",
|
|
36
|
+
nextState,
|
|
37
|
+
events,
|
|
38
|
+
output: null,
|
|
39
|
+
message: `Loop: policy violation (${reasons}).`,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
30
43
|
if (nextState.completionPromise && input.last_assistant_text) {
|
|
31
44
|
if (extractPromise(input.last_assistant_text, nextState.completionPromise)) {
|
|
32
45
|
nextState.active = false;
|
|
@@ -57,6 +70,16 @@ export function evaluateStopHook(input, state) {
|
|
|
57
70
|
events.push({ event: "verify_failed", script: nextState.verifyScript, output: input.verify_result.output || "" });
|
|
58
71
|
}
|
|
59
72
|
|
|
73
|
+
if (input.test_results && input.test_results.length > 0) {
|
|
74
|
+
const allPassed = input.test_results.every(r => r.passed);
|
|
75
|
+
if (allPassed) {
|
|
76
|
+
events.push({ event: "test_result", status: "all_passed", results: input.test_results });
|
|
77
|
+
} else {
|
|
78
|
+
const failed = input.test_results.filter(r => !r.passed);
|
|
79
|
+
events.push({ event: "test_result", status: "some_failed", failed: failed.map(f => f.storyId) });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
60
83
|
if (input.stop_hook_active === true) {
|
|
61
84
|
if (!input.has_pending_stories) {
|
|
62
85
|
nextState.active = false;
|
package/core/events.mjs
CHANGED
|
@@ -15,6 +15,10 @@ export const EventType = {
|
|
|
15
15
|
CHECKPOINT: "checkpoint",
|
|
16
16
|
ERROR: "error",
|
|
17
17
|
STATE_CHANGE: "state_change",
|
|
18
|
+
QUALITY_SCORE: "quality_score",
|
|
19
|
+
REFINE_ATTEMPT: "refine_attempt",
|
|
20
|
+
REFINE_KEEP: "refine_keep",
|
|
21
|
+
REFINE_DISCARD: "refine_discard",
|
|
18
22
|
};
|
|
19
23
|
|
|
20
24
|
export function filterByType(events, type) {
|
package/core/policy.mjs
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
const DEFAULT_POLICY = {
|
|
5
|
+
id: "default",
|
|
6
|
+
conditions: [
|
|
7
|
+
{ type: "max_iterations", value: 20 },
|
|
8
|
+
],
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export async function loadPolicy(cwd) {
|
|
12
|
+
const policyPath = join(cwd || process.cwd(), ".loophaus", "policy.json");
|
|
13
|
+
try {
|
|
14
|
+
const raw = await readFile(policyPath, "utf-8");
|
|
15
|
+
return JSON.parse(raw);
|
|
16
|
+
} catch {
|
|
17
|
+
return DEFAULT_POLICY;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function evaluatePolicy(policy, state, context = {}) {
|
|
22
|
+
const violations = [];
|
|
23
|
+
|
|
24
|
+
for (const condition of policy.conditions || []) {
|
|
25
|
+
switch (condition.type) {
|
|
26
|
+
case "max_iterations":
|
|
27
|
+
if (state.currentIteration > condition.value) {
|
|
28
|
+
violations.push({ type: "max_iterations", limit: condition.value, current: state.currentIteration });
|
|
29
|
+
}
|
|
30
|
+
break;
|
|
31
|
+
case "max_cost":
|
|
32
|
+
if (context.totalCost && context.totalCost > condition.value) {
|
|
33
|
+
violations.push({ type: "max_cost", limit: condition.value, current: context.totalCost });
|
|
34
|
+
}
|
|
35
|
+
break;
|
|
36
|
+
case "max_time_minutes":
|
|
37
|
+
if (state.startedAt) {
|
|
38
|
+
const elapsed = (Date.now() - new Date(state.startedAt).getTime()) / 60000;
|
|
39
|
+
if (elapsed > condition.value) {
|
|
40
|
+
violations.push({ type: "max_time_minutes", limit: condition.value, current: Math.round(elapsed) });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
break;
|
|
44
|
+
case "max_errors":
|
|
45
|
+
if (context.errorCount && context.errorCount > condition.value) {
|
|
46
|
+
violations.push({ type: "max_errors", limit: condition.value, current: context.errorCount });
|
|
47
|
+
}
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
shouldStop: violations.length > 0,
|
|
54
|
+
violations,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export { DEFAULT_POLICY };
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
// core/quality-scorer.mjs
|
|
2
|
+
// Quality scoring for story implementations (autoresearch pattern: val_bpb -> quality score)
|
|
3
|
+
|
|
4
|
+
import { execFile } from "node:child_process";
|
|
5
|
+
import { promisify } from "node:util";
|
|
6
|
+
import { readFile, stat } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
|
|
9
|
+
const execFileAsync = promisify(execFile);
|
|
10
|
+
|
|
11
|
+
const CRITERIA = {
|
|
12
|
+
tests: { weight: 3, max: 10 },
|
|
13
|
+
typecheck: { weight: 2, max: 10 },
|
|
14
|
+
lint: { weight: 1, max: 10 },
|
|
15
|
+
verify: { weight: 2, max: 10 },
|
|
16
|
+
diff: { weight: 1, max: 10 },
|
|
17
|
+
custom: { weight: 1, max: 10 },
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export function scoreStory(results) {
|
|
21
|
+
let totalWeight = 0;
|
|
22
|
+
let weightedSum = 0;
|
|
23
|
+
const breakdown = {};
|
|
24
|
+
|
|
25
|
+
for (const [key, config] of Object.entries(CRITERIA)) {
|
|
26
|
+
if (results[key] === undefined || results[key] === null) continue;
|
|
27
|
+
const value = typeof results[key] === "number" ? results[key] : (results[key].score ?? 0);
|
|
28
|
+
const clamped = Math.max(0, Math.min(config.max, value));
|
|
29
|
+
breakdown[key] = clamped;
|
|
30
|
+
weightedSum += clamped * config.weight;
|
|
31
|
+
totalWeight += config.max * config.weight;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const score = totalWeight > 0 ? Math.round((weightedSum / totalWeight) * 100) : 0;
|
|
35
|
+
const grade = score >= 90 ? "A" : score >= 80 ? "B" : score >= 70 ? "C" : score >= 60 ? "D" : "F";
|
|
36
|
+
|
|
37
|
+
return { score, grade, breakdown };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export async function evaluateStory(storyId, cwd, config = {}) {
|
|
41
|
+
const results = {};
|
|
42
|
+
|
|
43
|
+
if (config.testCommand) {
|
|
44
|
+
try {
|
|
45
|
+
await execFileAsync("sh", ["-c", config.testCommand], { cwd, timeout: 120_000 });
|
|
46
|
+
results.tests = 10;
|
|
47
|
+
} catch {
|
|
48
|
+
results.tests = 0;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (config.typecheckCommand) {
|
|
53
|
+
try {
|
|
54
|
+
await execFileAsync("sh", ["-c", config.typecheckCommand], { cwd, timeout: 60_000 });
|
|
55
|
+
results.typecheck = 10;
|
|
56
|
+
} catch (err) {
|
|
57
|
+
const errorCount = (err.stdout || "").split("\n").filter(l => l.includes("error")).length;
|
|
58
|
+
results.typecheck = Math.max(0, 10 - errorCount);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (config.lintCommand) {
|
|
63
|
+
try {
|
|
64
|
+
await execFileAsync("sh", ["-c", config.lintCommand], { cwd, timeout: 60_000 });
|
|
65
|
+
results.lint = 10;
|
|
66
|
+
} catch (err) {
|
|
67
|
+
const warnings = (err.stdout || "").split("\n").filter(l => l.includes("warning") || l.includes("error")).length;
|
|
68
|
+
results.lint = Math.max(0, 10 - warnings);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (config.verifyScript) {
|
|
73
|
+
try {
|
|
74
|
+
await execFileAsync("sh", ["-c", config.verifyScript], { cwd, timeout: 60_000 });
|
|
75
|
+
results.verify = 10;
|
|
76
|
+
} catch {
|
|
77
|
+
results.verify = 0;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
const { stdout } = await execFileAsync("git", ["diff", "--stat", "HEAD~1"], { cwd, timeout: 10_000 });
|
|
83
|
+
const lines = stdout.trim().split("\n");
|
|
84
|
+
const lastLine = lines[lines.length - 1] || "";
|
|
85
|
+
const match = lastLine.match(/(\d+) insertion.+?(\d+) deletion/);
|
|
86
|
+
if (match) {
|
|
87
|
+
const total = parseInt(match[1]) + parseInt(match[2]);
|
|
88
|
+
results.diff = total < 100 ? 10 : total < 300 ? 8 : total < 500 ? 6 : total < 1000 ? 4 : 2;
|
|
89
|
+
}
|
|
90
|
+
} catch {
|
|
91
|
+
// No git diff available
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const customPath = join(cwd, ".loophaus", "quality.mjs");
|
|
95
|
+
try {
|
|
96
|
+
await stat(customPath);
|
|
97
|
+
const mod = await import(customPath);
|
|
98
|
+
if (typeof mod.evaluate === "function") {
|
|
99
|
+
const customResult = await mod.evaluate(storyId, cwd);
|
|
100
|
+
results.custom = typeof customResult === "number" ? customResult : (customResult?.score ?? 0);
|
|
101
|
+
}
|
|
102
|
+
} catch {
|
|
103
|
+
// No custom evaluator
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return { storyId, results, ...scoreStory(results) };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export async function logResult(entry, cwd) {
|
|
110
|
+
const { appendFile, mkdir } = await import("node:fs/promises");
|
|
111
|
+
const tsvPath = join(cwd || process.cwd(), ".loophaus", "results.tsv");
|
|
112
|
+
await mkdir(join(cwd || process.cwd(), ".loophaus"), { recursive: true });
|
|
113
|
+
|
|
114
|
+
try {
|
|
115
|
+
await stat(tsvPath);
|
|
116
|
+
} catch {
|
|
117
|
+
await appendFile(tsvPath, "story_id\tattempt\tscore\tstatus\tdescription\tcommit\n", "utf-8");
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const line = `${entry.storyId}\t${entry.attempt}\t${entry.score}\t${entry.status}\t${entry.description}\t${entry.commit || ""}\n`;
|
|
121
|
+
await appendFile(tsvPath, line, "utf-8");
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export async function readResults(cwd) {
|
|
125
|
+
const tsvPath = join(cwd || process.cwd(), ".loophaus", "results.tsv");
|
|
126
|
+
try {
|
|
127
|
+
const raw = await readFile(tsvPath, "utf-8");
|
|
128
|
+
const lines = raw.trim().split("\n").slice(1);
|
|
129
|
+
return lines.map(line => {
|
|
130
|
+
const [storyId, attempt, score, status, description, commit] = line.split("\t");
|
|
131
|
+
return { storyId, attempt: parseInt(attempt), score: parseInt(score), status, description, commit };
|
|
132
|
+
});
|
|
133
|
+
} catch {
|
|
134
|
+
return [];
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
// core/refine-loop.mjs
|
|
2
|
+
// autoresearch keep/discard pattern for code quality improvement
|
|
3
|
+
|
|
4
|
+
export function shouldKeep(newScore, baselineScore) {
|
|
5
|
+
return newScore > baselineScore;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export function generateFeedback(evaluation, previousAttempts = []) {
|
|
9
|
+
const { storyId, score, grade, breakdown } = evaluation;
|
|
10
|
+
const failedCriteria = Object.entries(breakdown)
|
|
11
|
+
.filter(([_, v]) => v < 7)
|
|
12
|
+
.map(([k, v]) => `${k}: ${v}/10`);
|
|
13
|
+
|
|
14
|
+
let prompt = `Story ${storyId} quality: ${score}/100 (${grade}).\n`;
|
|
15
|
+
if (failedCriteria.length > 0) {
|
|
16
|
+
prompt += `Weak areas: ${failedCriteria.join(", ")}.\n`;
|
|
17
|
+
}
|
|
18
|
+
if (previousAttempts.length > 0) {
|
|
19
|
+
prompt += `Previous attempts: ${previousAttempts.map(a => `attempt ${a.attempt}: ${a.score} (${a.status})`).join(", ")}.\n`;
|
|
20
|
+
}
|
|
21
|
+
prompt += `Improve the implementation. Focus on the weak areas. Try a different approach if the same strategy keeps failing.`;
|
|
22
|
+
return prompt;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function identifyRefinementTargets(evaluations, threshold = 80) {
|
|
26
|
+
return evaluations
|
|
27
|
+
.filter(e => e.score < threshold)
|
|
28
|
+
.sort((a, b) => a.score - b.score);
|
|
29
|
+
}
|
package/core/session.mjs
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { readFile, writeFile, readdir, mkdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
function getSessionsDir(cwd) {
|
|
5
|
+
return join(cwd || process.cwd(), ".loophaus", "sessions");
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export async function saveCheckpoint(sessionId, data, cwd) {
|
|
9
|
+
const dir = getSessionsDir(cwd);
|
|
10
|
+
await mkdir(dir, { recursive: true });
|
|
11
|
+
const checkpoint = {
|
|
12
|
+
sessionId,
|
|
13
|
+
savedAt: new Date().toISOString(),
|
|
14
|
+
...data,
|
|
15
|
+
};
|
|
16
|
+
await writeFile(join(dir, `${sessionId}.json`), JSON.stringify(checkpoint, null, 2), "utf-8");
|
|
17
|
+
return checkpoint;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export async function loadCheckpoint(sessionId, cwd) {
|
|
21
|
+
const dir = getSessionsDir(cwd);
|
|
22
|
+
try {
|
|
23
|
+
const raw = await readFile(join(dir, `${sessionId}.json`), "utf-8");
|
|
24
|
+
return JSON.parse(raw);
|
|
25
|
+
} catch {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export async function listSessions(cwd) {
|
|
31
|
+
const dir = getSessionsDir(cwd);
|
|
32
|
+
try {
|
|
33
|
+
const files = await readdir(dir);
|
|
34
|
+
const sessions = [];
|
|
35
|
+
for (const file of files) {
|
|
36
|
+
if (!file.endsWith(".json")) continue;
|
|
37
|
+
try {
|
|
38
|
+
const raw = await readFile(join(dir, file), "utf-8");
|
|
39
|
+
const data = JSON.parse(raw);
|
|
40
|
+
sessions.push(data);
|
|
41
|
+
} catch { /* skip malformed */ }
|
|
42
|
+
}
|
|
43
|
+
return sessions.sort((a, b) => new Date(b.savedAt).getTime() - new Date(a.savedAt).getTime());
|
|
44
|
+
} catch {
|
|
45
|
+
return [];
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export async function resumeSession(sessionId, cwd) {
|
|
50
|
+
const checkpoint = await loadCheckpoint(sessionId, cwd);
|
|
51
|
+
if (!checkpoint) return null;
|
|
52
|
+
|
|
53
|
+
const { write } = await import("../store/state-store.mjs");
|
|
54
|
+
const state = {
|
|
55
|
+
active: true,
|
|
56
|
+
prompt: checkpoint.prompt || "",
|
|
57
|
+
completionPromise: checkpoint.completionPromise || "TADA",
|
|
58
|
+
maxIterations: checkpoint.maxIterations || 20,
|
|
59
|
+
currentIteration: checkpoint.currentIteration || 0,
|
|
60
|
+
sessionId: checkpoint.sessionId,
|
|
61
|
+
name: checkpoint.name || "",
|
|
62
|
+
startedAt: checkpoint.startedAt || new Date().toISOString(),
|
|
63
|
+
};
|
|
64
|
+
await write(state, cwd, checkpoint.name);
|
|
65
|
+
return state;
|
|
66
|
+
}
|
package/core/validate.mjs
CHANGED
package/hooks/stop-hook.mjs
CHANGED
|
@@ -4,6 +4,32 @@ import { evaluateStopHook } from "../core/engine.mjs";
|
|
|
4
4
|
import { getLastAssistantText, hasPendingStories } from "../core/io-helpers.mjs";
|
|
5
5
|
import { read as readState, write as writeState } from "../store/state-store.mjs";
|
|
6
6
|
import { logEvents } from "../core/event-logger.mjs";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
|
|
9
|
+
async function runStoryTests(cwd) {
|
|
10
|
+
const { readFile } = await import("node:fs/promises");
|
|
11
|
+
const { execFile } = await import("node:child_process");
|
|
12
|
+
const { promisify } = await import("node:util");
|
|
13
|
+
const execFileAsync = promisify(execFile);
|
|
14
|
+
const prdPath = join(cwd, "prd.json");
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
const prd = JSON.parse(await readFile(prdPath, "utf-8"));
|
|
18
|
+
if (!Array.isArray(prd.userStories)) return [];
|
|
19
|
+
|
|
20
|
+
const results = [];
|
|
21
|
+
for (const story of prd.userStories) {
|
|
22
|
+
if (!story.testCommand || story.passes) continue;
|
|
23
|
+
try {
|
|
24
|
+
await execFileAsync("sh", ["-c", story.testCommand], { cwd, timeout: 60_000 });
|
|
25
|
+
results.push({ storyId: story.id, passed: true });
|
|
26
|
+
} catch (err) {
|
|
27
|
+
results.push({ storyId: story.id, passed: false, error: err.message });
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return results;
|
|
31
|
+
} catch { return []; }
|
|
32
|
+
}
|
|
7
33
|
|
|
8
34
|
async function readStdin() {
|
|
9
35
|
const chunks = [];
|
|
@@ -39,11 +65,21 @@ async function main() {
|
|
|
39
65
|
}
|
|
40
66
|
}
|
|
41
67
|
|
|
68
|
+
// Run story tests if prd.json has testCommand fields
|
|
69
|
+
const testResults = await runStoryTests(cwd);
|
|
70
|
+
|
|
71
|
+
// Evaluate loop policy
|
|
72
|
+
const { loadPolicy, evaluatePolicy } = await import("../core/policy.mjs");
|
|
73
|
+
const policy = await loadPolicy(cwd);
|
|
74
|
+
const policyResult = evaluatePolicy(policy, state, { totalCost: 0, errorCount: 0 });
|
|
75
|
+
|
|
42
76
|
const input = {
|
|
43
77
|
...hookInput,
|
|
44
78
|
last_assistant_text: lastText,
|
|
45
79
|
has_pending_stories: pending,
|
|
46
80
|
verify_result: verifyResult,
|
|
81
|
+
test_results: testResults,
|
|
82
|
+
policy_result: policyResult,
|
|
47
83
|
};
|
|
48
84
|
|
|
49
85
|
const result = evaluateStopHook(input, state);
|
|
@@ -51,6 +87,19 @@ async function main() {
|
|
|
51
87
|
await writeState(result.nextState, cwd);
|
|
52
88
|
await logEvents(result.events, { adapter: "auto", loop_id: state.sessionId || "unknown" }, cwd);
|
|
53
89
|
|
|
90
|
+
// Save session checkpoint (best-effort)
|
|
91
|
+
try {
|
|
92
|
+
const { saveCheckpoint } = await import("../core/session.mjs");
|
|
93
|
+
await saveCheckpoint(result.nextState.sessionId || `auto-${Date.now()}`, {
|
|
94
|
+
prompt: result.nextState.prompt,
|
|
95
|
+
completionPromise: result.nextState.completionPromise,
|
|
96
|
+
maxIterations: result.nextState.maxIterations,
|
|
97
|
+
currentIteration: result.nextState.currentIteration,
|
|
98
|
+
name: result.nextState.name,
|
|
99
|
+
startedAt: result.nextState.startedAt,
|
|
100
|
+
}, cwd);
|
|
101
|
+
} catch { /* best-effort */ }
|
|
102
|
+
|
|
54
103
|
if (result.message) process.stderr.write(result.message + "\n");
|
|
55
104
|
if (result.output) process.stdout.write(JSON.stringify(result.output));
|
|
56
105
|
process.exit(0);
|
package/package.json
CHANGED
|
@@ -112,6 +112,14 @@ Score >= 3: parallel mode (worktrees by group). Score < 3: sequential mode.
|
|
|
112
112
|
## Phase 4B: Sequential Execution (score < 3)
|
|
113
113
|
Create \`.loophaus/state.json\` and work through stories one at a time.
|
|
114
114
|
|
|
115
|
+
## Phase 5: Evaluate
|
|
116
|
+
Score each story 0-100 (tests, typecheck, lint, verify, diff size). Record in \`.loophaus/results.tsv\`.
|
|
117
|
+
|
|
118
|
+
## Phase 6: Refine Loop (autoresearch pattern)
|
|
119
|
+
For stories below quality threshold (default 80), loop up to 3 attempts:
|
|
120
|
+
1. Checkpoint, 2. Re-implement weak areas, 3. Re-evaluate.
|
|
121
|
+
Keep if improved, discard (git reset) if not. Best-effort after max attempts.
|
|
122
|
+
|
|
115
123
|
## Rules
|
|
116
124
|
- Present PRD for user approval before execution
|
|
117
125
|
- Show parallelism score and recommendation
|
|
@@ -84,6 +84,14 @@ Score >= 3: parallel (worktrees). Score < 3: sequential.
|
|
|
84
84
|
Parallel: create worktrees per group, distribute stories, run simultaneously, merge back.
|
|
85
85
|
Sequential: single loop through stories in order.
|
|
86
86
|
|
|
87
|
+
## Phase 5: Evaluate
|
|
88
|
+
Score each story 0-100 (tests, typecheck, lint, verify, diff size). Record in \`.loophaus/results.tsv\`.
|
|
89
|
+
|
|
90
|
+
## Phase 6: Refine Loop (autoresearch pattern)
|
|
91
|
+
For stories below quality threshold (default 80), loop up to 3 attempts:
|
|
92
|
+
1. Checkpoint, 2. Re-implement weak areas, 3. Re-evaluate.
|
|
93
|
+
Keep if improved, discard (git reset) if not. Best-effort after max attempts.
|
|
94
|
+
|
|
87
95
|
Rules: present PRD for approval, show parallelism score, stop on merge conflicts.
|
|
88
96
|
`,
|
|
89
97
|
},
|
|
@@ -30,6 +30,8 @@ Ask **concise questions** for missing items. Max 3-5 per round, one round only.
|
|
|
30
30
|
| **Constraints** | Must not break existing tests? Library restrictions? |
|
|
31
31
|
| **When stuck** | Document? Skip? Suggest alternative? |
|
|
32
32
|
| **Parallelism potential** | Multiple services? Independent file groups? |
|
|
33
|
+
| **Quality verification** | What verification commands? (npm test, npx tsc, etc.) |
|
|
34
|
+
| **Quality threshold** | What quality threshold? (default: 80/100) |
|
|
33
35
|
|
|
34
36
|
## Phase Design
|
|
35
37
|
|