loop-engineering 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +115 -0
- package/bin/loop-engineering.mjs +225 -0
- package/package.json +50 -0
- package/patterns/cursor-loop.md +57 -0
- package/skill/SKILL.md +79 -0
- package/skill/reference/examples.md +72 -0
- package/skill/reference/harden.md +31 -0
- package/skill/reference/ide-natives.md +14 -0
- package/skill/reference/init.md +51 -0
- package/skill/reference/new.md +55 -0
- package/skill/reference/run.md +44 -0
- package/skill/reference/status.md +15 -0
- package/skill/reference/verify.md +23 -0
- package/skill/scripts/lint_spec.mjs +140 -0
- package/skill/scripts/run_loop.mjs +249 -0
- package/src/lib/audit.mjs +137 -0
- package/src/lib/cost.mjs +72 -0
- package/src/lib/detect.mjs +108 -0
- package/src/lib/install.mjs +112 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# IDE-native loop primitives
|
|
2
|
+
|
|
3
|
+
Use this once a spec passes `verify` — to decide whether to run the loop via this skill's `run` command, or hand off to the IDE's own native autonomous-loop mechanism, if it has one.
|
|
4
|
+
|
|
5
|
+
| Tool | Native loop primitive | Notes |
|
|
6
|
+
|---|---|---|
|
|
7
|
+
| **Codex CLI / IDE ext** | `/goal` (enable via `features.goals` in config.toml if not listed) | Paste this spec's Goal + Verification straight in. Codex treats goal text as both prompt and completion criteria, with its own progress UI (pause/resume/edit). If available, prefer it over this skill's `run` — it's native and has better UI for it. |
|
|
8
|
+
| **Claude Code** | No single built-in `/goal`. This skill's `run` command (backed by `<skill-dir>/scripts/run_loop.mjs`) is the recommended mechanism — it persists state across turns the way a bare conversation loop can't. | Keep `LOOP_SPEC.md` and `.loop/state.json` in the repo; reference the spec each turn rather than re-describing the goal. |
|
|
9
|
+
| **Cursor** | No first-class autonomous-until-done primitive as of mid-2026. Multi-task agent runs exist but aren't goal-gated. | Use `/loop` (the Cursor command in `integrations/cursor/`) to author/harden the spec; run iterations by re-invoking manually or scripting `run_loop.mjs` externally. |
|
|
10
|
+
| **Windsurf (Cascade)** | Workflows (`.windsurf/workflows/*.md`, `/workflow-name`) run once per invocation, not autonomously until a goal is met. | Chain workflow steps and use this spec's Verification command as the explicit gate between them. |
|
|
11
|
+
| **Antigravity** | Skill-triggered, single-shot per invocation like Windsurf, as of mid-2026. | Same approach — the spec governs each invocation; verify current docs before assuming an autonomous mode exists. |
|
|
12
|
+
| **Generic / scripted (any CLI agent)** | Plain wrapper: call the agent CLI non-interactively, then `run_loop.mjs check`, branch on `status`, repeat. | Most portable. Works with any CLI-capable agent regardless of native support. |
|
|
13
|
+
|
|
14
|
+
This table reflects mid-2026 product behavior and changes fast. If a native `/goal`-style mode exists and works well, prefer it — this skill's job is producing the spec either way, not insisting on its own runner.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# /loop init — Capture project conventions into LOOP_CONTEXT.md
|
|
2
|
+
|
|
3
|
+
Interviews the user for standing project conventions and writes them to `LOOP_CONTEXT.md`. Future `/loop new` and `/loop harden` invocations read this file so they don't re-ask for things that never change (test command, build command, forbidden paths).
|
|
4
|
+
|
|
5
|
+
## When to use
|
|
6
|
+
|
|
7
|
+
- First time using `/loop` in a project that has an established test/build setup.
|
|
8
|
+
- After significant project restructuring (changed test runner, new forbidden paths, etc.).
|
|
9
|
+
- When the user says "remember that we always use X" or "never touch Y."
|
|
10
|
+
|
|
11
|
+
## Flow
|
|
12
|
+
|
|
13
|
+
1. **Check for existing file.** If `LOOP_CONTEXT.md` already exists, read it and ask: "I found an existing LOOP_CONTEXT.md. Do you want to update it or start fresh?" If updating, show the current values and let the user amend only what changed.
|
|
14
|
+
|
|
15
|
+
2. **Interview.** Ask the following, one at a time. Accept "none" or "skip" for any item.
|
|
16
|
+
|
|
17
|
+
- **Test command**: "What command runs your test suite?" (e.g. `npm test`, `pytest`, `go test ./...`)
|
|
18
|
+
- **Build command**: "What command builds the project, if any?" (e.g. `npm run build`, `make`, "none")
|
|
19
|
+
- **Lint/format command**: "What command runs lint or formatting?" (e.g. `npm run lint`, `ruff check .`, "none")
|
|
20
|
+
- **Forbidden always**: "Are there files, directories, or actions that loop specs in this project should never touch? List them." (e.g. `migrations/`, force-push, editing test files to make them pass)
|
|
21
|
+
- **Other standing notes**: "Anything else a loop spec author should know about this project by default?" (optional — the user can skip)
|
|
22
|
+
|
|
23
|
+
3. **Write `LOOP_CONTEXT.md`** with this structure:
|
|
24
|
+
|
|
25
|
+
```markdown
|
|
26
|
+
# Loop Context
|
|
27
|
+
|
|
28
|
+
Project conventions for loop spec authoring. Generated by `/loop init`.
|
|
29
|
+
Update by running `/loop init` again.
|
|
30
|
+
|
|
31
|
+
## Commands
|
|
32
|
+
- **Test**: `<test command or "none">`
|
|
33
|
+
- **Build**: `<build command or "none">`
|
|
34
|
+
- **Lint**: `<lint command or "none">`
|
|
35
|
+
|
|
36
|
+
## Forbidden always
|
|
37
|
+
<bullet list of forbidden paths/actions, or "none specified">
|
|
38
|
+
|
|
39
|
+
## Notes
|
|
40
|
+
<standing notes, or omit section if none>
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
4. **Confirm before writing.** Show the user the draft and ask "Write this to LOOP_CONTEXT.md?" before creating or overwriting the file.
|
|
44
|
+
|
|
45
|
+
5. **Remind how it's used.** After writing, say: "When you run `/loop new` or `/loop harden`, I'll read LOOP_CONTEXT.md and pre-fill the test/build commands and forbidden actions — you won't need to re-state them each time."
|
|
46
|
+
|
|
47
|
+
## How downstream commands use LOOP_CONTEXT.md
|
|
48
|
+
|
|
49
|
+
- `/loop new`: If `LOOP_CONTEXT.md` exists, pre-fill the Verification section with the test command and the Scope's Forbidden list with the standing forbidden actions. Still ask the user to confirm or override them for this specific spec.
|
|
50
|
+
- `/loop harden`: Same — use the standing commands as defaults when proposing fixes to a weak spec.
|
|
51
|
+
- The file is never required. If it doesn't exist, the downstream commands ask for this information as usual.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# `new` — author a fresh loop spec
|
|
2
|
+
|
|
3
|
+
Triggered by: `/loop new <goal description>`, or any goal description with no existing `LOOP_SPEC.md` in the working directory.
|
|
4
|
+
|
|
5
|
+
## Flow
|
|
6
|
+
|
|
7
|
+
1. **Read what's given.** If the user's goal description already implies an answer for one of the 5 components, don't ask about it again — extract it.
|
|
8
|
+
|
|
9
|
+
2. **Interview for the 2 components that are usually actually missing** — Goal precision and Verification. Don't interrogate all 5 one by one; most users can state scope/escalation defaults fine once asked once, but goal vagueness and verification are where real gaps live.
|
|
10
|
+
|
|
11
|
+
- If the goal is a vague adjective ("better," "cleaner," "more polished," "looks good"): ask what observable, checkable thing would prove it's done. Push past the first vague answer if needed — "looks good" → "what would make it look good?" → "consistent spacing" → "is there a lint rule or visual diff that catches inconsistent spacing, or do we need a human eyeball each time?"
|
|
12
|
+
- If no verification method is stated: ask what command (test, build, lint, type-check) would return pass/fail. If genuinely nothing mechanical exists, say so and propose an LLM-judge rubric explicitly, flagged as the weakest link — never silently assume one exists.
|
|
13
|
+
|
|
14
|
+
3. **Default the other 3 unless the user has opinions:**
|
|
15
|
+
- Termination: max iterations default 8–10 for a build task, lower (3–5) for anything touching production data or external services. No-progress default: 2 consecutive identical results. State these defaults explicitly so the user can override, don't just silently pick them.
|
|
16
|
+
- Scope: ask "anything definitely off-limits?" if not stated — at minimum forbid editing test files to force a pass.
|
|
17
|
+
- Escalation: default to "stop, summarize attempts, wait for human" unless the user wants something else (e.g. auto-revert on cap).
|
|
18
|
+
|
|
19
|
+
4. **Write `LOOP_SPEC.md`** in the working directory using this exact shape:
|
|
20
|
+
|
|
21
|
+
```markdown
|
|
22
|
+
# Loop: <short name>
|
|
23
|
+
|
|
24
|
+
## Goal
|
|
25
|
+
<concrete, observable end state — not an adjective>
|
|
26
|
+
|
|
27
|
+
## Verification
|
|
28
|
+
```
|
|
29
|
+
<exact command, e.g. npm run test && npm run build>
|
|
30
|
+
```
|
|
31
|
+
(or, if no deterministic check exists: state the LLM-judge rubric explicitly here, and say so)
|
|
32
|
+
|
|
33
|
+
## Termination
|
|
34
|
+
- Success: verification exits 0
|
|
35
|
+
- Max iterations: <N>
|
|
36
|
+
- No-progress: stop if 2 consecutive iterations produce identical result + unchanged working tree
|
|
37
|
+
- Budget: <optional>
|
|
38
|
+
|
|
39
|
+
## Scope
|
|
40
|
+
- Allowed: <paths/stack>
|
|
41
|
+
- Forbidden: <at least one explicit thing>
|
|
42
|
+
|
|
43
|
+
## Escalation
|
|
44
|
+
On cap or no-progress: stop, summarize attempts + last error, wait for human review.
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
5. **Run the linter immediately after writing the file.** Do not skip this because you just wrote it carefully:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
node <skill-dir>/scripts/lint_spec.mjs LOOP_SPEC.md
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
If it fails, fix exactly what it reports and re-run — do not guess at what else might be wrong.
|
|
54
|
+
|
|
55
|
+
6. Once it passes, tell the user the spec is ready and ask whether they want to `run` it now, hand it to their IDE's native loop primitive (see `reference/ide-natives.md`), or just keep the spec for later.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# `run` — execute the loop end-to-end
|
|
2
|
+
|
|
3
|
+
Triggered by: `/loop run [path]`, "start the loop," "iterate until this passes."
|
|
4
|
+
|
|
5
|
+
## Precondition
|
|
6
|
+
|
|
7
|
+
**Never run a spec that hasn't passed the linter.** Run `node <skill-dir>/scripts/lint_spec.mjs <path>` first if it hasn't been checked in this conversation already. If it fails, stop and go to `reference/harden.md` — do not run a loop against an incomplete spec; the no-progress and cap logic only mean something if Goal/Verification/Termination are real.
|
|
8
|
+
|
|
9
|
+
## The loop
|
|
10
|
+
|
|
11
|
+
This is the actual cycle. The script (`run_loop.mjs`) only runs the verifier and tracks state — it does not write code. That's the agent's job, every iteration, between script calls:
|
|
12
|
+
|
|
13
|
+
1. **Check current state** (skip on the very first iteration of a fresh run):
|
|
14
|
+
```bash
|
|
15
|
+
node <skill-dir>/scripts/run_loop.mjs check LOOP_SPEC.md
|
|
16
|
+
```
|
|
17
|
+
Read the JSON output's `status` field. Three possible values:
|
|
18
|
+
- `"success"` → stop. Report success to the user with the iteration count. Done.
|
|
19
|
+
- `"continue"` → an attempt was made and failed, but under cap and making progress. Proceed to step 2.
|
|
20
|
+
- `"stop-escalate"` → stop immediately. Do not run the verifier again, do not make another edit "just to see." Report the `reason` field to the user verbatim, along with the iteration history (`node <skill-dir>/scripts/run_loop.mjs status LOOP_SPEC.md`), and wait for human input per the spec's Escalation section.
|
|
21
|
+
|
|
22
|
+
2. **If `continue` (or this is iteration 1): make a real change.** Read the spec's Goal and the last failure's `stderr` (included in the JSON). Make a genuine attempt to close the gap — actually edit files, don't just re-run the check hoping for a different result. This is the step the script cannot do for you.
|
|
23
|
+
|
|
24
|
+
3. **Loop back to step 1.** Re-run `check`. The script re-evaluates from the new state.
|
|
25
|
+
|
|
26
|
+
## Critical: don't fight the script's verdict
|
|
27
|
+
|
|
28
|
+
If `run_loop.mjs` reports `stop-escalate` for no-progress, that means the verifier output AND the working tree were identical to the previous attempt — i.e., either nothing was actually changed, or a change was made and then reverted to the same state. Do not:
|
|
29
|
+
- Re-run the check again manually outside the loop hoping it now reads differently — it won't, the inputs haven't changed.
|
|
30
|
+
- Argue that "this time it'll work" and bypass the script by running the verification command directly instead of through `run_loop.mjs check` — that's circumventing the cap, not satisfying it.
|
|
31
|
+
|
|
32
|
+
If you genuinely believe the no-progress detection is wrong (e.g. the change was real but doesn't show in git status because it touched a file outside the repo), say so to the user explicitly and let them decide whether to `reset` and continue — don't silently route around the script.
|
|
33
|
+
|
|
34
|
+
## Reporting
|
|
35
|
+
|
|
36
|
+
After every iteration, give the user a one-line status: iteration N/cap, pass/fail, what changed. Don't go silent for multiple iterations and then dump a wall of history — that's how a stuck loop goes unnoticed.
|
|
37
|
+
|
|
38
|
+
## On success
|
|
39
|
+
|
|
40
|
+
Report the final iteration count and what changed across the run. Don't immediately suggest more changes unless asked — the loop's job was to hit the stated goal, not to keep improving indefinitely.
|
|
41
|
+
|
|
42
|
+
## On escalation
|
|
43
|
+
|
|
44
|
+
Don't soften it. State plainly: cap reached / no-progress detected, here's the last error, here's what was tried, your call on next steps. Offer concrete next steps (raise the cap and retry, change the verification method, manually intervene) rather than just reporting failure and stopping.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# `status` — report iteration history, read-only
|
|
2
|
+
|
|
3
|
+
Triggered by: `/loop status [path]`, "how's the loop going," "what happened last run."
|
|
4
|
+
|
|
5
|
+
## Flow
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
node <skill-dir>/scripts/run_loop.mjs status <path, default LOOP_SPEC.md>
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Report:
|
|
12
|
+
- `"no-history"` → no run has happened yet for this spec. Suggest `run` if they want to start one.
|
|
13
|
+
- Otherwise → completed iterations vs max, and a short read of the history: did it trend toward passing, or stall? Look at consecutive hashes in `history` — if the last two `hash` values match, the loop is (or was) one `check` away from no-progress escalation.
|
|
14
|
+
|
|
15
|
+
This command never executes the verifier or modifies state — it only reads `.loop/state.json`. If the user wants to clear history and start fresh, that's `node <skill-dir>/scripts/run_loop.mjs reset <path>`, not part of `status` — confirm with them before running `reset`, since it discards the record of what was already tried.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# `verify` — lint-only check, no execution
|
|
2
|
+
|
|
3
|
+
Triggered by: `/loop verify [path]`, "is this spec ready?", "check my loop spec."
|
|
4
|
+
|
|
5
|
+
## Flow
|
|
6
|
+
|
|
7
|
+
1. Run:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
node <skill-dir>/scripts/lint_spec.mjs <path, default LOOP_SPEC.md>
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
2. Report the result verbatim-ish to the user:
|
|
14
|
+
- **Exit 0**: spec is loop-ready. Tell the user plainly, and ask if they want to `run` it now.
|
|
15
|
+
- **Exit 1**: relay the itemized issue list exactly as the script reported it — don't summarize it into something vaguer. If the user wants it fixed now, switch to `reference/harden.md`; if they just wanted the check, stop here.
|
|
16
|
+
|
|
17
|
+
## What this command does NOT do
|
|
18
|
+
|
|
19
|
+
- Does not execute the verification command from the spec (that's `run`).
|
|
20
|
+
- Does not modify the spec file (that's `harden`).
|
|
21
|
+
- Does not touch `.loop/state.json`.
|
|
22
|
+
|
|
23
|
+
This is a read-only check. If the user wanted execution, they'll say so — don't assume `verify` implies `run`.
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// lint_spec.mjs — validates a LOOP_SPEC.md against the 5 required loop-engineering components.
|
|
3
|
+
// Exit 0 = spec is loop-ready. Exit 1 = spec is missing or incomplete, with specific errors.
|
|
4
|
+
//
|
|
5
|
+
// Usage: node lint_spec.mjs [path/to/LOOP_SPEC.md]
|
|
6
|
+
// Default path: ./LOOP_SPEC.md
|
|
7
|
+
|
|
8
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
9
|
+
import { resolve } from "node:path";
|
|
10
|
+
|
|
11
|
+
const target = resolve(process.argv[2] || "LOOP_SPEC.md");
|
|
12
|
+
|
|
13
|
+
const REQUIRED_SECTIONS = [
|
|
14
|
+
{ key: "Goal", header: /^##\s+Goal\s*$/m },
|
|
15
|
+
{ key: "Verification", header: /^##\s+Verification\s*$/m },
|
|
16
|
+
{ key: "Termination", header: /^##\s+Termination\s*$/m },
|
|
17
|
+
{ key: "Scope", header: /^##\s+Scope\s*$/m },
|
|
18
|
+
{ key: "Escalation", header: /^##\s+Escalation\s*$/m },
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
// Phrases that indicate the section is a template placeholder, not real content.
|
|
22
|
+
// These match only when the ENTIRE trimmed section is the placeholder — a substring match
|
|
23
|
+
// would false-positive on legitimate commands containing <args>, e.g. `playwright test <file>`.
|
|
24
|
+
const PLACEHOLDER_WHOLE_SECTION_PATTERNS = [
|
|
25
|
+
/^\.\.\.$/,
|
|
26
|
+
/^<[^>]{2,80}>$/, // section is nothing but a single unfilled template slot like "<concrete, observable end state>"
|
|
27
|
+
/^TBD$/i,
|
|
28
|
+
/^TODO$/i,
|
|
29
|
+
/^N\/A$/i,
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
function isPlaceholder(text) {
|
|
33
|
+
if (!text || text.length === 0) return true;
|
|
34
|
+
const trimmed = text.trim();
|
|
35
|
+
return PLACEHOLDER_WHOLE_SECTION_PATTERNS.some((p) => p.test(trimmed));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function extractSection(content, key) {
|
|
39
|
+
// Grab everything between "## Key" and the next "## " heading or end of file.
|
|
40
|
+
// Anchored on "\n##" (not "^##" with the m flag) to avoid $ matching end-of-line
|
|
41
|
+
// instead of end-of-string, which previously truncated every multi-line section
|
|
42
|
+
// at its first line break.
|
|
43
|
+
const re = new RegExp(`(?:^|\\n)##\\s+${key}\\s*\\n([\\s\\S]*?)(?=\\n##\\s+|$)`);
|
|
44
|
+
const m = content.match(re);
|
|
45
|
+
return m ? m[1].trim() : null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function checkVerification(text) {
|
|
49
|
+
// Verification section needs either a runnable-looking command (backticks / code fence)
|
|
50
|
+
// or an explicit acknowledgement that it's LLM-judge based (flagged as weakest link).
|
|
51
|
+
const hasCodeFence = /```[\s\S]*?```/.test(text) || /`[^`]+`/.test(text);
|
|
52
|
+
const flagsLLMJudge = /llm[\s-]?judge|llm[\s-]?as[\s-]?judge|rubric/i.test(text);
|
|
53
|
+
if (!hasCodeFence && !flagsLLMJudge) {
|
|
54
|
+
return "Verification section has no command (in backticks/code fence) and does not explicitly name an LLM-judge rubric as fallback. A verifier must be either a runnable check or an explicitly-flagged LLM-judge.";
|
|
55
|
+
}
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function checkTermination(text) {
|
|
60
|
+
const hasSuccess = /success/i.test(text);
|
|
61
|
+
const hasMaxIter = /max\s+iterations?|iteration\s+cap/i.test(text);
|
|
62
|
+
const hasNoProgress = /no-?progress/i.test(text);
|
|
63
|
+
const missing = [];
|
|
64
|
+
if (!hasSuccess) missing.push("success condition");
|
|
65
|
+
if (!hasMaxIter) missing.push("max iterations / iteration cap");
|
|
66
|
+
if (!hasNoProgress) missing.push("no-progress exit");
|
|
67
|
+
if (missing.length > 0) {
|
|
68
|
+
return `Termination section is missing: ${missing.join(", ")}. All three are required — a loop without a no-progress exit can burn its entire budget repeating the same failure.`;
|
|
69
|
+
}
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function checkScope(text) {
|
|
74
|
+
const hasForbidden = /forbidden|not\s+allowed|never|do\s+not/i.test(text);
|
|
75
|
+
if (!hasForbidden) {
|
|
76
|
+
return "Scope section names no forbidden actions. Every loop spec must explicitly forbid at least one destructive shortcut (e.g. editing tests to force a pass, touching migrations, force-pushing).";
|
|
77
|
+
}
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function checkEscalation(text) {
|
|
82
|
+
const hasStop = /stop|halt|wait|escalat|human|review/i.test(text);
|
|
83
|
+
if (!hasStop) {
|
|
84
|
+
return "Escalation section does not describe what happens when the cap is hit. Must explicitly state the loop stops and waits for a human rather than retrying indefinitely.";
|
|
85
|
+
}
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function main() {
|
|
90
|
+
if (!existsSync(target)) {
|
|
91
|
+
console.error(`FAIL: ${target} not found.`);
|
|
92
|
+
console.error("No loop spec exists yet. Run the 'new' command to author one before verifying or running a loop.");
|
|
93
|
+
process.exit(1);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const content = readFileSync(target, "utf-8");
|
|
97
|
+
const errors = [];
|
|
98
|
+
const sections = {};
|
|
99
|
+
|
|
100
|
+
for (const { key, header } of REQUIRED_SECTIONS) {
|
|
101
|
+
if (!header.test(content)) {
|
|
102
|
+
errors.push(`Missing required section: "## ${key}"`);
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
const text = extractSection(content, key);
|
|
106
|
+
sections[key] = text;
|
|
107
|
+
if (isPlaceholder(text)) {
|
|
108
|
+
errors.push(`Section "## ${key}" is empty or still contains placeholder/template text: ${JSON.stringify((text || "").slice(0, 80))}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Section-specific deep checks only run if the section exists and isn't a placeholder.
|
|
113
|
+
if (sections.Verification && !isPlaceholder(sections.Verification)) {
|
|
114
|
+
const err = checkVerification(sections.Verification);
|
|
115
|
+
if (err) errors.push(`[Verification] ${err}`);
|
|
116
|
+
}
|
|
117
|
+
if (sections.Termination && !isPlaceholder(sections.Termination)) {
|
|
118
|
+
const err = checkTermination(sections.Termination);
|
|
119
|
+
if (err) errors.push(`[Termination] ${err}`);
|
|
120
|
+
}
|
|
121
|
+
if (sections.Scope && !isPlaceholder(sections.Scope)) {
|
|
122
|
+
const err = checkScope(sections.Scope);
|
|
123
|
+
if (err) errors.push(`[Scope] ${err}`);
|
|
124
|
+
}
|
|
125
|
+
if (sections.Escalation && !isPlaceholder(sections.Escalation)) {
|
|
126
|
+
const err = checkEscalation(sections.Escalation);
|
|
127
|
+
if (err) errors.push(`[Escalation] ${err}`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (errors.length > 0) {
|
|
131
|
+
console.error(`FAIL: ${target} is not loop-ready (${errors.length} issue${errors.length > 1 ? "s" : ""}):`);
|
|
132
|
+
for (const e of errors) console.error(` - ${e}`);
|
|
133
|
+
process.exit(1);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
console.log(`PASS: ${target} is loop-ready. All 5 sections present and substantive.`);
|
|
137
|
+
process.exit(0);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
main();
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// run_loop.mjs — runs one iteration of a loop spec's verification command, updates state,
|
|
3
|
+
// and reports whether the loop should continue, stop-success, or stop-escalate.
|
|
4
|
+
//
|
|
5
|
+
// This script enforces caps itself rather than trusting the calling agent to count correctly.
|
|
6
|
+
// It does NOT make code changes — that's the agent's job between iterations. This script's
|
|
7
|
+
// only responsibilities: run the verifier, hash the result, compare to history, decide status.
|
|
8
|
+
//
|
|
9
|
+
// Usage:
|
|
10
|
+
// node run_loop.mjs check [path/to/LOOP_SPEC.md] — run one verification pass, report status
|
|
11
|
+
// node run_loop.mjs reset [path/to/LOOP_SPEC.md] — clear iteration history for a fresh run
|
|
12
|
+
// node run_loop.mjs status [path/to/LOOP_SPEC.md] — print current state without running anything
|
|
13
|
+
//
|
|
14
|
+
// State file: .loop/state.json (relative to cwd), one entry per spec path.
|
|
15
|
+
|
|
16
|
+
import { readFileSync, readdirSync, existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
17
|
+
import { resolve, dirname, join } from "node:path";
|
|
18
|
+
import { execSync } from "node:child_process";
|
|
19
|
+
import { createHash } from "node:crypto";
|
|
20
|
+
|
|
21
|
+
const STATE_DIR = resolve(".loop");
|
|
22
|
+
const STATE_FILE = resolve(STATE_DIR, "state.json");
|
|
23
|
+
|
|
24
|
+
const DEFAULT_MAX_ITERATIONS = 10;
|
|
25
|
+
const DEFAULT_NO_PROGRESS_LIMIT = 2; // consecutive identical results before stopping
|
|
26
|
+
|
|
27
|
+
function loadState() {
|
|
28
|
+
if (!existsSync(STATE_FILE)) return {};
|
|
29
|
+
try {
|
|
30
|
+
return JSON.parse(readFileSync(STATE_FILE, "utf-8"));
|
|
31
|
+
} catch {
|
|
32
|
+
return {};
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function saveState(state) {
|
|
37
|
+
if (!existsSync(STATE_DIR)) mkdirSync(STATE_DIR, { recursive: true });
|
|
38
|
+
writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function extractVerificationCommand(specContent) {
|
|
42
|
+
const re = /(?:^|\n)##\s+Verification\s*\n([\s\S]*?)(?=\n##\s+|$)/;
|
|
43
|
+
const m = specContent.match(re);
|
|
44
|
+
if (!m) return null;
|
|
45
|
+
const fenceMatch = m[1].match(/```(?:[a-z]*\n)?([\s\S]*?)```/);
|
|
46
|
+
if (fenceMatch) return fenceMatch[1].trim();
|
|
47
|
+
const inlineMatch = m[1].match(/`([^`]+)`/);
|
|
48
|
+
if (inlineMatch) return inlineMatch[1].trim();
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function extractMaxIterations(specContent) {
|
|
53
|
+
const re = /(?:^|\n)##\s+Termination\s*\n([\s\S]*?)(?=\n##\s+|$)/;
|
|
54
|
+
const m = specContent.match(re);
|
|
55
|
+
if (!m) return DEFAULT_MAX_ITERATIONS;
|
|
56
|
+
const numMatch = m[1].match(/max\s+iterations?\s*:?\s*(\d+)/i);
|
|
57
|
+
return numMatch ? parseInt(numMatch[1], 10) : DEFAULT_MAX_ITERATIONS;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function hashResult(stdout, stderr, exitCode) {
|
|
61
|
+
return createHash("sha256").update(`${exitCode}:${stdout}:${stderr}`).digest("hex").slice(0, 16);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function hashWorkingTree() {
|
|
65
|
+
// Prefer git: a diff of tracked+untracked changes against HEAD. Falls back to a
|
|
66
|
+
// content hash of the cwd if not in a git repo. This exists because many verification
|
|
67
|
+
// commands (assert/test-style) print identical generic failure text regardless of *why*
|
|
68
|
+
// they failed — without this, two genuinely different failed attempts would hash identically
|
|
69
|
+
// and falsely trigger no-progress.
|
|
70
|
+
try {
|
|
71
|
+
const diff = execSync("git diff HEAD --stat && git status --porcelain", {
|
|
72
|
+
encoding: "utf-8",
|
|
73
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
74
|
+
});
|
|
75
|
+
if (diff && diff.trim().length > 0) {
|
|
76
|
+
return createHash("sha256").update(diff).digest("hex").slice(0, 16);
|
|
77
|
+
}
|
|
78
|
+
// Empty diff in a real git repo is meaningful (truly unchanged) — keep it, don't fall through.
|
|
79
|
+
return createHash("sha256").update("git-clean").digest("hex").slice(0, 16);
|
|
80
|
+
} catch {
|
|
81
|
+
// Not a git repo (or git unavailable) — walk the directory tree with Node.js fs APIs.
|
|
82
|
+
// Cross-platform; no shell commands needed. Skips .git, node_modules, .loop (state files).
|
|
83
|
+
try {
|
|
84
|
+
const hash = createHash("sha256");
|
|
85
|
+
const SKIP = new Set([".git", "node_modules", ".loop"]);
|
|
86
|
+
function walk(dir) {
|
|
87
|
+
let entries;
|
|
88
|
+
try { entries = readdirSync(dir, { withFileTypes: true }); } catch { return; }
|
|
89
|
+
entries.sort((a, b) => a.name.localeCompare(b.name));
|
|
90
|
+
for (const e of entries) {
|
|
91
|
+
if (SKIP.has(e.name)) continue;
|
|
92
|
+
const full = join(dir, e.name);
|
|
93
|
+
if (e.isDirectory()) walk(full);
|
|
94
|
+
else { try { hash.update(e.name + ":"); hash.update(readFileSync(full)); } catch {} }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
walk(process.cwd());
|
|
98
|
+
return hash.digest("hex").slice(0, 16);
|
|
99
|
+
} catch {
|
|
100
|
+
return "no-tree-signal";
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function runVerification(command) {
|
|
106
|
+
try {
|
|
107
|
+
const stdout = execSync(command, { encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"] });
|
|
108
|
+
return { exitCode: 0, stdout, stderr: "" };
|
|
109
|
+
} catch (err) {
|
|
110
|
+
return {
|
|
111
|
+
exitCode: err.status ?? 1,
|
|
112
|
+
stdout: err.stdout ? err.stdout.toString() : "",
|
|
113
|
+
stderr: err.stderr ? err.stderr.toString() : String(err.message || err),
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function cmdCheck(specPath) {
|
|
119
|
+
if (!existsSync(specPath)) {
|
|
120
|
+
console.error(`FAIL: ${specPath} not found. Author a spec first (run the 'new' command).`);
|
|
121
|
+
process.exit(1);
|
|
122
|
+
}
|
|
123
|
+
const specContent = readFileSync(specPath, "utf-8");
|
|
124
|
+
const command = extractVerificationCommand(specContent);
|
|
125
|
+
if (!command) {
|
|
126
|
+
console.error(`FAIL: could not extract a runnable command from the Verification section of ${specPath}.`);
|
|
127
|
+
console.error("Either add a fenced/backtick command, or this spec is LLM-judge-based and must be checked manually, not via run_loop.mjs.");
|
|
128
|
+
process.exit(1);
|
|
129
|
+
}
|
|
130
|
+
const maxIterations = extractMaxIterations(specContent);
|
|
131
|
+
|
|
132
|
+
const state = loadState();
|
|
133
|
+
const key = resolve(specPath);
|
|
134
|
+
const entry = state[key] || { iterations: [], maxIterations };
|
|
135
|
+
entry.maxIterations = maxIterations; // always refresh from current spec in case it changed
|
|
136
|
+
|
|
137
|
+
const completedCount = entry.iterations.length;
|
|
138
|
+
if (completedCount >= maxIterations) {
|
|
139
|
+
console.log(JSON.stringify({
|
|
140
|
+
status: "stop-escalate",
|
|
141
|
+
reason: `Iteration cap reached (${completedCount}/${maxIterations}) without success.`,
|
|
142
|
+
iterations: completedCount,
|
|
143
|
+
}, null, 2));
|
|
144
|
+
process.exit(2);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
console.error(`Running verification (iteration ${completedCount + 1}/${maxIterations}): ${command}`);
|
|
148
|
+
const result = runVerification(command);
|
|
149
|
+
const resultHash = hashResult(result.stdout, result.stderr, result.exitCode);
|
|
150
|
+
const treeHash = hashWorkingTree();
|
|
151
|
+
const compositeHash = `${resultHash}:${treeHash}`;
|
|
152
|
+
const iterationRecord = {
|
|
153
|
+
n: completedCount + 1,
|
|
154
|
+
exitCode: result.exitCode,
|
|
155
|
+
hash: compositeHash,
|
|
156
|
+
timestamp: new Date().toISOString(),
|
|
157
|
+
};
|
|
158
|
+
entry.iterations.push(iterationRecord);
|
|
159
|
+
state[key] = entry;
|
|
160
|
+
saveState(state);
|
|
161
|
+
|
|
162
|
+
if (result.exitCode === 0) {
|
|
163
|
+
console.log(JSON.stringify({
|
|
164
|
+
status: "success",
|
|
165
|
+
reason: "Verification command exited 0.",
|
|
166
|
+
iterations: entry.iterations.length,
|
|
167
|
+
stdout: result.stdout.slice(-2000),
|
|
168
|
+
}, null, 2));
|
|
169
|
+
process.exit(0);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// No-progress check: look at the last N hashes (including this one).
|
|
173
|
+
const recentHashes = entry.iterations.slice(-DEFAULT_NO_PROGRESS_LIMIT).map((it) => it.hash);
|
|
174
|
+
const allSame = recentHashes.length === DEFAULT_NO_PROGRESS_LIMIT && recentHashes.every((h) => h === recentHashes[0]);
|
|
175
|
+
if (allSame) {
|
|
176
|
+
console.log(JSON.stringify({
|
|
177
|
+
status: "stop-escalate",
|
|
178
|
+
reason: `No-progress: last ${DEFAULT_NO_PROGRESS_LIMIT} iterations produced an identical result AND an unchanged working tree (no files were modified, or the change reverted to the same state). Stopping to avoid burning budget on a repeated failure.`,
|
|
179
|
+
iterations: entry.iterations.length,
|
|
180
|
+
stderr: result.stderr.slice(-2000),
|
|
181
|
+
}, null, 2));
|
|
182
|
+
process.exit(2);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (entry.iterations.length >= maxIterations) {
|
|
186
|
+
console.log(JSON.stringify({
|
|
187
|
+
status: "stop-escalate",
|
|
188
|
+
reason: `Iteration cap reached (${entry.iterations.length}/${maxIterations}) without success.`,
|
|
189
|
+
iterations: entry.iterations.length,
|
|
190
|
+
stderr: result.stderr.slice(-2000),
|
|
191
|
+
}, null, 2));
|
|
192
|
+
process.exit(2);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
console.log(JSON.stringify({
|
|
196
|
+
status: "continue",
|
|
197
|
+
reason: "Verification failed but under cap and making progress (result differs from immediately preceding attempt). Make a change and check again.",
|
|
198
|
+
iterations: entry.iterations.length,
|
|
199
|
+
maxIterations,
|
|
200
|
+
stderr: result.stderr.slice(-2000),
|
|
201
|
+
}, null, 2));
|
|
202
|
+
process.exit(1);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function cmdReset(specPath) {
|
|
206
|
+
const state = loadState();
|
|
207
|
+
const key = resolve(specPath);
|
|
208
|
+
delete state[key];
|
|
209
|
+
saveState(state);
|
|
210
|
+
console.log(`Reset iteration history for ${specPath}.`);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function cmdStatus(specPath) {
|
|
214
|
+
const state = loadState();
|
|
215
|
+
const key = resolve(specPath);
|
|
216
|
+
const entry = state[key];
|
|
217
|
+
if (!entry) {
|
|
218
|
+
console.log(JSON.stringify({ status: "no-history", specPath }, null, 2));
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
console.log(JSON.stringify({
|
|
222
|
+
specPath,
|
|
223
|
+
maxIterations: entry.maxIterations,
|
|
224
|
+
completed: entry.iterations.length,
|
|
225
|
+
history: entry.iterations,
|
|
226
|
+
}, null, 2));
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function main() {
|
|
230
|
+
const [, , subcommand, specPathArg] = process.argv;
|
|
231
|
+
const specPath = resolve(specPathArg || "LOOP_SPEC.md");
|
|
232
|
+
|
|
233
|
+
switch (subcommand) {
|
|
234
|
+
case "check":
|
|
235
|
+
cmdCheck(specPath);
|
|
236
|
+
break;
|
|
237
|
+
case "reset":
|
|
238
|
+
cmdReset(specPath);
|
|
239
|
+
break;
|
|
240
|
+
case "status":
|
|
241
|
+
cmdStatus(specPath);
|
|
242
|
+
break;
|
|
243
|
+
default:
|
|
244
|
+
console.error("Usage: node run_loop.mjs <check|reset|status> [path/to/LOOP_SPEC.md]");
|
|
245
|
+
process.exit(1);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
main();
|