@jhlee0619/codexloop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +34 -0
- package/.claude-plugin/plugin.json +8 -0
- package/.codex-plugin/plugin.json +38 -0
- package/LICENSE +21 -0
- package/README.md +425 -0
- package/assets/banner.png +0 -0
- package/bin/cloop +45 -0
- package/commands/iterate.md +25 -0
- package/commands/model.md +33 -0
- package/commands/result.md +17 -0
- package/commands/start.md +188 -0
- package/commands/status.md +10 -0
- package/commands/stop.md +12 -0
- package/package.json +60 -0
- package/prompts/evaluate.md +91 -0
- package/prompts/rank.md +97 -0
- package/prompts/suggest.md +69 -0
- package/schemas/evaluation.schema.json +65 -0
- package/schemas/loop-state.schema.json +103 -0
- package/schemas/proposal.schema.json +74 -0
- package/schemas/ranking.schema.json +77 -0
- package/scripts/lib/apply.mjs +254 -0
- package/scripts/lib/args.mjs +202 -0
- package/scripts/lib/codex-exec.mjs +318 -0
- package/scripts/lib/convergence.mjs +153 -0
- package/scripts/lib/iteration.mjs +484 -0
- package/scripts/lib/process.mjs +164 -0
- package/scripts/lib/prompts.mjs +53 -0
- package/scripts/lib/rank.mjs +149 -0
- package/scripts/lib/render.mjs +240 -0
- package/scripts/lib/state.mjs +378 -0
- package/scripts/lib/validate.mjs +71 -0
- package/scripts/lib/workspace.mjs +49 -0
- package/scripts/loop-companion.mjs +849 -0
- package/skills/cloop/SKILL.md +177 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "cloop",
|
|
3
|
+
"owner": {
|
|
4
|
+
"name": "jhlee"
|
|
5
|
+
},
|
|
6
|
+
"metadata": {
|
|
7
|
+
"description": "CodexLoop — iterative improvement loop plugin that drives OpenAI Codex as a multi-role critic.",
|
|
8
|
+
"version": "0.1.0"
|
|
9
|
+
},
|
|
10
|
+
"plugins": [
|
|
11
|
+
{
|
|
12
|
+
"name": "cloop",
|
|
13
|
+
"source": "./",
|
|
14
|
+
"description": "Drive OpenAI Codex through evaluate \u2192 suggest \u2192 rank \u2192 apply \u2192 validate \u2192 record iterations until convergence. Ships five slash commands (/cloop:start, /cloop:iterate, /cloop:status, /cloop:stop, /cloop:result) plus a stand-alone shell wrapper usable from Codex CLI.",
|
|
15
|
+
"version": "0.1.0",
|
|
16
|
+
"author": {
|
|
17
|
+
"name": "jhlee"
|
|
18
|
+
},
|
|
19
|
+
"repository": "https://github.com/jhlee0619/CodexLoop",
|
|
20
|
+
"license": "MIT",
|
|
21
|
+
"keywords": [
|
|
22
|
+
"codex",
|
|
23
|
+
"loop",
|
|
24
|
+
"iterative",
|
|
25
|
+
"refinement",
|
|
26
|
+
"review",
|
|
27
|
+
"adversarial",
|
|
28
|
+
"convergence",
|
|
29
|
+
"cloop"
|
|
30
|
+
],
|
|
31
|
+
"category": "automation"
|
|
32
|
+
}
|
|
33
|
+
]
|
|
34
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "cloop",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "CodexLoop: iterative improvement loop plugin that drives OpenAI Codex as a multi-role critic (evaluate \u2192 suggest \u2192 rank \u2192 apply \u2192 validate \u2192 record).",
|
|
5
|
+
"author": { "name": "jhlee" },
|
|
6
|
+
"keywords": ["codex", "loop", "iterative", "refinement", "review", "cloop"],
|
|
7
|
+
"license": "MIT"
|
|
8
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "cloop",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "CodexLoop: drives Codex through strict evaluate \u2192 suggest \u2192 rank \u2192 apply \u2192 validate \u2192 record iterations via the `cloop` shell command. Activates ONLY when the user explicitly mentions one of `cloop`, `codexloop`, or `CodexLoop` by name.",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "jhlee",
|
|
7
|
+
"url": "https://github.com/jhlee0619/CodexLoop"
|
|
8
|
+
},
|
|
9
|
+
"homepage": "https://github.com/jhlee0619/CodexLoop",
|
|
10
|
+
"repository": "https://github.com/jhlee0619/CodexLoop",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"keywords": [
|
|
13
|
+
"cloop",
|
|
14
|
+
"codex",
|
|
15
|
+
"loop",
|
|
16
|
+
"iterative",
|
|
17
|
+
"refinement",
|
|
18
|
+
"review",
|
|
19
|
+
"convergence"
|
|
20
|
+
],
|
|
21
|
+
"skills": "./skills/",
|
|
22
|
+
"interface": {
|
|
23
|
+
"displayName": "CodexLoop",
|
|
24
|
+
"shortDescription": "Iterative Codex loops with convergence guarantees.",
|
|
25
|
+
"longDescription": "CodexLoop drives OpenAI Codex through a strict six-step iteration cycle (evaluate \u2192 suggest \u2192 rank \u2192 apply \u2192 validate \u2192 record) until a goal is met or the loop converges. It forces Codex to emit at least two proposals per iteration, ranks them deterministically on six dimensions, applies the winner via git apply + commit, runs your tests, and records a full forensic trace under .loop/. This plugin activates ONLY when the user explicitly mentions `cloop` or `CodexLoop` by name \u2014 use it for iterative tasks the user wants to drive through cloop, not as a default strategy for generic iterative work.",
|
|
26
|
+
"developerName": "jhlee",
|
|
27
|
+
"category": "Automation",
|
|
28
|
+
"capabilities": ["Interactive", "Write"],
|
|
29
|
+
"websiteURL": "https://github.com/jhlee0619/CodexLoop",
|
|
30
|
+
"defaultPrompt": [
|
|
31
|
+
"Use cloop to iteratively fix the failing tests in tests/",
|
|
32
|
+
"Run a CodexLoop on src/ to eliminate the type errors",
|
|
33
|
+
"Start a cloop with goal X and budget 5 iterations"
|
|
34
|
+
],
|
|
35
|
+
"brandColor": "#3B82F6",
|
|
36
|
+
"logo": "./assets/banner.png"
|
|
37
|
+
}
|
|
38
|
+
}
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 jhlee
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
# CodexLoop
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="assets/banner.png" alt="CodexLoop — iterative improvement loop that drives OpenAI Codex as a multi-role critic" width="640">
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
> Iterative improvement loop plugin that drives OpenAI Codex as a multi-role
|
|
8
|
+
> critic. Ships as a single repo that installs as both a **Claude Code
|
|
9
|
+
> plugin** (via `/plugin marketplace`) and a **Codex CLI plugin** (via the
|
|
10
|
+
> Codex marketplace), plus a standalone `cloop` shell binary for plain
|
|
11
|
+
> terminals and CI.
|
|
12
|
+
|
|
13
|
+
- **Repository**: <https://github.com/jhlee0619/CodexLoop>
|
|
14
|
+
- **npm**: [`@jhlee0619/codexloop`](https://www.npmjs.com/package/@jhlee0619/codexloop) — installs the `cloop` binary globally
|
|
15
|
+
- **Status**: MVP v0.1.0 — six slash commands + background workers + deterministic convergence. See [docs/future-work.md](docs/future-work.md) for the roadmap.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## What it does
|
|
20
|
+
|
|
21
|
+
Every iteration strictly follows six steps:
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
evaluate → suggest → rank → apply → validate → record
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Codex is used as a multi-role critic + improver (reviewer, adversarial
|
|
28
|
+
critic, solution generator, refactor advisor, test designer). Every
|
|
29
|
+
iteration is structurally forced to produce **at least two proposals**, a
|
|
30
|
+
single "judge" Codex call ranks them on six dimensions (correctness,
|
|
31
|
+
requirement satisfaction, simplicity, maintainability, riskInverse,
|
|
32
|
+
testability), the runtime deterministically re-computes the winner,
|
|
33
|
+
applies it with `git apply` + `git commit`, runs your configured
|
|
34
|
+
test/lint/type commands, and records the full forensic trace under
|
|
35
|
+
`<target-repo>/.loop/`.
|
|
36
|
+
|
|
37
|
+
Loops terminate on: **goal-met**, **negligible-improvement**
|
|
38
|
+
(convergence), **plateau**, **regression** / **divergence**, or a
|
|
39
|
+
**budget limit** (iterations / time / codex calls).
|
|
40
|
+
|
|
41
|
+
### Opinionated defaults
|
|
42
|
+
|
|
43
|
+
- **Model**: `gpt-5.4`
|
|
44
|
+
- **Reasoning effort**: `xhigh`
|
|
45
|
+
|
|
46
|
+
These are forwarded to `codex exec` via `--model gpt-5.4 -c
|
|
47
|
+
model_reasoning_effort=xhigh` and applied regardless of your
|
|
48
|
+
`~/.codex/config.toml`, so loops are reproducible across machines and
|
|
49
|
+
config changes. Override per loop with `/cloop:start --model <m>
|
|
50
|
+
--effort <level>` or permanently for the active loop with `/cloop:model
|
|
51
|
+
<m> --effort <level>`.
|
|
52
|
+
|
|
53
|
+
### Interview + approval (Claude Code and Codex CLI)
|
|
54
|
+
|
|
55
|
+
When you invoke CodexLoop through an LLM orchestrator (Claude Code or
|
|
56
|
+
Codex CLI), the plugin runs a **five-phase interview** before launching
|
|
57
|
+
the runtime:
|
|
58
|
+
|
|
59
|
+
1. **Pre-flight** — git repo check, clean working tree, `codex`/`cloop` availability
|
|
60
|
+
2. **Goal capture + clarification** — rewrite vague input into one concrete immutable sentence
|
|
61
|
+
3. **Plan assembly** — infer test/lint/type commands from `package.json` / `Makefile` / `pyproject.toml`, confirm budget, choose `--wait` vs `--background`
|
|
62
|
+
4. **Approval** — show the full plan and wait for explicit go/no-go
|
|
63
|
+
5. **Invoke** — call `cloop start …` with every confirmed flag
|
|
64
|
+
|
|
65
|
+
Direct shell callers (`cloop start …` in a plain terminal or CI job) skip
|
|
66
|
+
the interview and take flags as-is, so CodexLoop remains deterministic
|
|
67
|
+
and scriptable.
|
|
68
|
+
|
|
69
|
+
CodexLoop is a **peer** of the `openai-codex` plugin, not a replacement.
|
|
70
|
+
It uses the same `codex` CLI binary and expects you to have run
|
|
71
|
+
`/codex:setup` once (or `npm install -g @openai/codex`).
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Requirements
|
|
76
|
+
|
|
77
|
+
- **Node.js** 20 or newer.
|
|
78
|
+
- **git** 2.20+ with a clean working tree in the target repository.
|
|
79
|
+
- **`codex` CLI** installed and authenticated.
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Installation
|
|
84
|
+
|
|
85
|
+
CodexLoop installs on three platforms from the same repo. Pick whichever
|
|
86
|
+
matches your workflow — they compose freely and do not conflict.
|
|
87
|
+
|
|
88
|
+
### 1. One-line install script (universal — recommended)
|
|
89
|
+
|
|
90
|
+
Handles Claude Code + Codex CLI + plain shell in one go:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
curl -fsSL https://raw.githubusercontent.com/jhlee0619/CodexLoop/main/install.sh | sh
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The script:
|
|
97
|
+
|
|
98
|
+
1. Downloads the latest `main` tarball from GitHub into `~/.local/share/cloop/`
|
|
99
|
+
2. Symlinks `~/.local/bin/cloop` to the shell wrapper (plain-shell + Codex CLI + CI)
|
|
100
|
+
3. Symlinks `~/.claude/plugins/cloop/local/0.1.0/` to the install dir if `~/.claude/plugins/` exists (Claude Code plugin)
|
|
101
|
+
4. Symlinks `~/plugins/cloop` to the install dir and adds a `cloop` entry to `~/.agents/plugins/marketplace.json` if `codex` is on `$PATH` (Codex CLI plugin)
|
|
102
|
+
|
|
103
|
+
Override paths with env vars (`CLOOP_REPO`, `CLOOP_REF`, `CLOOP_INSTALL_DIR`,
|
|
104
|
+
`CLOOP_BIN_DIR`). The script is idempotent and safe to re-run for updates.
|
|
105
|
+
|
|
106
|
+
### 2. npm global install (shell binary only)
|
|
107
|
+
|
|
108
|
+
If you only need the `cloop` shell wrapper (for CI, Codex CLI, or plain
|
|
109
|
+
terminals) and do not want the Claude Code plugin:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
npm install -g @jhlee0619/codexloop
|
|
113
|
+
cloop --help
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
This installs `bin/cloop` onto your `$PATH` and carries the runtime
|
|
117
|
+
scripts + prompts + schemas along with it. It does NOT register with
|
|
118
|
+
Claude Code — use path 1 or path 3 for that.
|
|
119
|
+
|
|
120
|
+
> **Why `@jhlee0619/codexloop` and not `@jhlee0619/CodexLoop`?** npm
|
|
121
|
+
> requires all package names to be lowercase; scoped names cannot
|
|
122
|
+
> contain capital letters. The package name is the closest lowercase
|
|
123
|
+
> match to the GitHub repo name.
|
|
124
|
+
|
|
125
|
+
### 3. Claude Code plugin marketplace
|
|
126
|
+
|
|
127
|
+
If you only want `/cloop:*` slash commands inside Claude Code:
|
|
128
|
+
|
|
129
|
+
```text
|
|
130
|
+
/plugin marketplace add jhlee0619/CodexLoop
|
|
131
|
+
/plugin install cloop@cloop
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Claude Code clones `github.com/jhlee0619/CodexLoop` into
|
|
135
|
+
`~/.claude/plugins/marketplaces/cloop/`, reads
|
|
136
|
+
`.claude-plugin/marketplace.json`, and installs the single `cloop` plugin
|
|
137
|
+
it lists. The six slash commands appear under `/help` immediately.
|
|
138
|
+
|
|
139
|
+
Update later with:
|
|
140
|
+
|
|
141
|
+
```text
|
|
142
|
+
/plugin marketplace update cloop
|
|
143
|
+
/plugin install cloop@cloop
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### 4. Codex CLI plugin (manual)
|
|
147
|
+
|
|
148
|
+
If you want the Codex CLI plugin without running the one-line installer:
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# Clone or symlink into Codex's home-local plugin dir:
|
|
152
|
+
git clone https://github.com/jhlee0619/CodexLoop.git ~/plugins/cloop
|
|
153
|
+
# or, if you already have the repo somewhere:
|
|
154
|
+
# ln -s /path/to/CodexLoop ~/plugins/cloop
|
|
155
|
+
|
|
156
|
+
# Add cloop to your home-local Codex marketplace:
|
|
157
|
+
mkdir -p ~/.agents/plugins
|
|
158
|
+
cat > ~/.agents/plugins/marketplace.json <<'JSON'
|
|
159
|
+
{
|
|
160
|
+
"name": "home-local",
|
|
161
|
+
"interface": { "displayName": "Home-local plugins" },
|
|
162
|
+
"plugins": [
|
|
163
|
+
{
|
|
164
|
+
"name": "cloop",
|
|
165
|
+
"source": { "source": "local", "path": "./plugins/cloop" },
|
|
166
|
+
"policy": { "installation": "AVAILABLE", "authentication": "NONE" },
|
|
167
|
+
"category": "Automation"
|
|
168
|
+
}
|
|
169
|
+
]
|
|
170
|
+
}
|
|
171
|
+
JSON
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Restart any active Codex session. Codex will auto-load
|
|
175
|
+
`skills/cloop/SKILL.md`, which activates the skill **only** when you name
|
|
176
|
+
cloop by keyword (see "Codex activation gating" below).
|
|
177
|
+
|
|
178
|
+
### 5. Manual clone + symlink (for plugin development)
|
|
179
|
+
|
|
180
|
+
If you are hacking on CodexLoop itself:
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
git clone https://github.com/jhlee0619/CodexLoop.git /path/to/CodexLoop
|
|
184
|
+
ln -s /path/to/CodexLoop ~/.claude/plugins/cloop/local/0.1.0
|
|
185
|
+
ln -s /path/to/CodexLoop ~/plugins/cloop
|
|
186
|
+
ln -s /path/to/CodexLoop/bin/cloop ~/.local/bin/cloop
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Edits under `/path/to/CodexLoop/` take effect immediately because the
|
|
190
|
+
plugin caches are symlinks.
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Quickstart (Claude Code)
|
|
195
|
+
|
|
196
|
+
```text
|
|
197
|
+
/codex:setup # one-time Codex install + auth (from openai-codex plugin)
|
|
198
|
+
|
|
199
|
+
# inside the target repo, with a clean working tree — just say what you want:
|
|
200
|
+
/cloop:start fix the failing add tests
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Claude Code walks you through:
|
|
204
|
+
|
|
205
|
+
1. **Goal** — reads `TASK.md` / `GOAL.md` / `PRD.md` / `AGENTS.md` if present, or asks you directly; rewrites your wording into one concrete sentence and confirms.
|
|
206
|
+
2. **Plan** — infers plausible `--test-cmd` / `--lint-cmd` / `--type-cmd` candidates from `package.json` / `Makefile` / `pyproject.toml` / `go.mod`; confirms the budget (iterations, time); defaults the model to `gpt-5.4` + `xhigh`.
|
|
207
|
+
3. **Approval** — shows the full plan and asks `Start loop with this plan?` before launching anything. You can edit the goal or plan details without re-typing.
|
|
208
|
+
4. **Run** — records the current HEAD as `seedCommit`, adds `.loop/` to `.gitignore`, and drives the six-step iteration cycle until the verdict is `goal-met` AND validation exits 0, OR the loop converges, OR a budget limit fires.
|
|
209
|
+
|
|
210
|
+
Then check `/cloop:result --diff` for the final state.
|
|
211
|
+
|
|
212
|
+
Power users can bypass the approval dialog by supplying every flag up front and adding `--yes`:
|
|
213
|
+
|
|
214
|
+
```text
|
|
215
|
+
/cloop:start --wait --yes \
|
|
216
|
+
--goal "fix the failing add test" \
|
|
217
|
+
--max-iter 3 \
|
|
218
|
+
--test-cmd "node --test tests/" \
|
|
219
|
+
--model gpt-5.4 --effort xhigh
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Commands
|
|
225
|
+
|
|
226
|
+
| Command | What it does |
|
|
227
|
+
|---|---|
|
|
228
|
+
| `/cloop:start` | Start a new loop. Runs the five-phase interview + approval first (skip with `--yes`), then launches the runtime. `--wait` for foreground, `--background` for OS-detached. |
|
|
229
|
+
| `/cloop:iterate` | Run exactly one iteration synchronously. `--model <m>` / `--effort <e>` override the loop's stored values for this iteration only. |
|
|
230
|
+
| `/cloop:status` | Show loop state, iteration history, budget consumption, convergence metrics. |
|
|
231
|
+
| `/cloop:stop` | SIGTERM a running background worker (graceful, waits 60 s). Use `--force` for SIGKILL. |
|
|
232
|
+
| `/cloop:result` | Dump the full iteration history with ranking breakdowns and validation output. |
|
|
233
|
+
| `/cloop:model` | Show or change the Codex model + reasoning effort used by the active loop. Defaults are `gpt-5.4` / `xhigh`. |
|
|
234
|
+
|
|
235
|
+
See [docs/command-spec.md](docs/command-spec.md) for every flag on every
|
|
236
|
+
command.
|
|
237
|
+
|
|
238
|
+
### `--wait` vs `--background`
|
|
239
|
+
|
|
240
|
+
| Flag | Mode | Use when |
|
|
241
|
+
|---|---|---|
|
|
242
|
+
| `--wait` | **Foreground** — `cloop start` blocks the tool session until the loop terminates. Claude streams every iteration transcript inline. | Short loops, `--dry-run`, watching reasoning live, budget sanity checks. |
|
|
243
|
+
| `--background` | **OS-detached** — runtime spawns a detached Node worker via `spawnDetached({ detached: true, stdio: "ignore" }).unref()`, writes `.loop/loop.pid`, and exits. The worker runs independently of Claude Code; closing the session does NOT kill it. | Real multi-minute loops. Poll progress with `/cloop:status`, stop with `/cloop:stop`. |
|
|
244
|
+
|
|
245
|
+
Never pass both flags. If you pass neither, the interview recommends one
|
|
246
|
+
based on `--max-iter` and `--dry-run`.
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## Codex activation gating
|
|
251
|
+
|
|
252
|
+
Codex auto-loads `skills/cloop/SKILL.md` on session start, but the skill's
|
|
253
|
+
`description` field gates activation on an **explicit keyword match**. The
|
|
254
|
+
skill only runs when your request contains one of these tokens
|
|
255
|
+
(case-insensitive):
|
|
256
|
+
|
|
257
|
+
- `cloop`
|
|
258
|
+
- `codexloop`
|
|
259
|
+
- `CodexLoop`
|
|
260
|
+
- `CODEXLOOP`
|
|
261
|
+
|
|
262
|
+
Activation examples:
|
|
263
|
+
|
|
264
|
+
| What you say to Codex | Skill activates? |
|
|
265
|
+
|---|---|
|
|
266
|
+
| "Use cloop to fix the failing auth tests" | **yes** |
|
|
267
|
+
| "codexloop this failing build" | **yes** |
|
|
268
|
+
| "Start a CodexLoop on src/add.js" | **yes** |
|
|
269
|
+
| "CODEXLOOP — fix the broken adds function" | **yes** |
|
|
270
|
+
| "iteratively fix the auth tests" | no — no cloop keyword |
|
|
271
|
+
| "keep trying until the tests pass" | no — no cloop keyword |
|
|
272
|
+
| "loop on this until it works" | no — no cloop keyword |
|
|
273
|
+
|
|
274
|
+
When the skill activates, it walks through the same five-phase interview
|
|
275
|
+
(pre-flight → goal → plan → approval → invoke) that `/cloop:start` does
|
|
276
|
+
in Claude Code, and only then invokes `cloop start …` as a shell command.
|
|
277
|
+
Defaults (`gpt-5.4`, `xhigh`) and the immutable goal hash apply
|
|
278
|
+
identically on both platforms.
|
|
279
|
+
|
|
280
|
+
Codex calls `cloop` as a regular shell command; because CodexLoop spawns
|
|
281
|
+
its own `codex exec` children, the running Codex session is used only to
|
|
282
|
+
orchestrate the wrapper — each loop iteration is an **independent**,
|
|
283
|
+
ephemeral Codex invocation, so the outer Codex session's context is
|
|
284
|
+
never burned by the loop.
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## Using `cloop` directly from a shell (no LLM)
|
|
289
|
+
|
|
290
|
+
When the `cloop` wrapper is on `$PATH` (via any install path above), you
|
|
291
|
+
can drive it directly from any terminal or script. There is **no
|
|
292
|
+
interview** on this path — the runtime takes your flags verbatim, so it
|
|
293
|
+
is safe for CI, scripted automation, and power-user shell usage.
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
# Full-flag form:
|
|
297
|
+
cloop start --wait --yes \
|
|
298
|
+
--goal "fix the failing add test" \
|
|
299
|
+
--test-cmd "node --test tests/" \
|
|
300
|
+
--max-iter 3
|
|
301
|
+
|
|
302
|
+
# Positional shortcut — the goal is the free-form text:
|
|
303
|
+
cloop start --wait --yes "fix the failing add test" \
|
|
304
|
+
--test-cmd "node --test tests/" --max-iter 3
|
|
305
|
+
|
|
306
|
+
cloop status
|
|
307
|
+
cloop result --diff
|
|
308
|
+
cloop stop
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
The interview only runs when an LLM orchestration layer (Claude via
|
|
312
|
+
`/cloop:start`, or Codex via the skill) sits above `cloop`. In a plain
|
|
313
|
+
shell, `cloop` is deterministic and trusts its flags.
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## Safety by default
|
|
318
|
+
|
|
319
|
+
- **Clean working tree required.** Every iteration starts from a clean state so `git reset --hard` is an unambiguous rollback path.
|
|
320
|
+
- **Atomic state writes.** Every write to `.loop/state.json` uses temp-file-plus-rename so a crash mid-write never corrupts the canonical state.
|
|
321
|
+
- **Reward-hacking guards.** The runtime rejects patches that delete test files, add `.skip` / `xit` / `.only` in test files, or modify tests without a substantive justification. Both the `suggest` and `rank` prompts forbid these patterns, and `apply.mjs` hard-rejects them if they slip through.
|
|
322
|
+
- **Deterministic ranking.** The judge's weighted sum is never trusted blindly — `rank.mjs::recomputeWinner` re-computes from the raw dimensional scores and uses the runtime's pick if the two disagree.
|
|
323
|
+
- **Budget caps.** `--max-iter`, `--max-time`, and `--max-calls` are enforced before every iteration starts (including dry-run).
|
|
324
|
+
- **Goal immutability.** `state.goal.goalHash = sha256(text + acceptanceCriteria + testCmd + lintCmd + typeCmd)` is asserted on every iteration. Any drift fails loudly.
|
|
325
|
+
- **Single loop per repo.** An advisory lock at `.loop/loop.lock` (keyed by worker PID) refuses a second concurrent loop in the same repo.
|
|
326
|
+
|
|
327
|
+
---
|
|
328
|
+
|
|
329
|
+
## Documentation
|
|
330
|
+
|
|
331
|
+
| File | Purpose |
|
|
332
|
+
|---|---|
|
|
333
|
+
| [docs/architecture.md](docs/architecture.md) | Design overview, directory layout, Codex invocation pattern |
|
|
334
|
+
| [docs/iteration-policy.md](docs/iteration-policy.md) | The six-step cycle, role definitions, proposal rules |
|
|
335
|
+
| [docs/state-schema.md](docs/state-schema.md) | The `.loop/state.json` shape + per-iteration dump shape |
|
|
336
|
+
| [docs/command-spec.md](docs/command-spec.md) | Every flag on every slash command |
|
|
337
|
+
| [docs/stopping-criteria.md](docs/stopping-criteria.md) | Quality score formula + convergence triggers |
|
|
338
|
+
| [docs/known-limitations.md](docs/known-limitations.md) | What MVP does NOT do (yet) |
|
|
339
|
+
| [docs/future-work.md](docs/future-work.md) | v0.5 / v1 roadmap |
|
|
340
|
+
| [docs/examples/01-bugfix-loop.md](docs/examples/01-bugfix-loop.md) | Step-by-step walk-through of a real bugfix loop |
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
## Tests
|
|
345
|
+
|
|
346
|
+
No framework dependency — every test is a standalone `node` script.
|
|
347
|
+
|
|
348
|
+
```bash
|
|
349
|
+
node tests/unit/state.test.mjs # 55 tests: defaults, round-trip, lock, migration, model/effort
|
|
350
|
+
node tests/unit/rank.test.mjs # 15 tests: weighted score, tiebreaker, reward-hacking floor
|
|
351
|
+
node tests/unit/convergence.test.mjs # 16 tests: quality score, goal-met, budget, negligible, plateau, divergence
|
|
352
|
+
node tests/integration/loop-smoke.test.mjs # 50 tests: end-to-end dry-run with mock codex
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
The integration smoke test sets up a scratch git repo in `/tmp`, points
|
|
356
|
+
the runtime at `tests/fixtures/mock-codex-exec.mjs` via
|
|
357
|
+
`CODEXLOOP_CODEX_BIN`, and runs a full dry-run loop that reaches
|
|
358
|
+
`goal-met`. It spends zero Codex budget.
|
|
359
|
+
|
|
360
|
+
**Total: 136 tests passing.**
|
|
361
|
+
|
|
362
|
+
---
|
|
363
|
+
|
|
364
|
+
## Plugin manifests
|
|
365
|
+
|
|
366
|
+
CodexLoop ships two platform manifests at the repo root:
|
|
367
|
+
|
|
368
|
+
- `.claude-plugin/plugin.json` + `.claude-plugin/marketplace.json` — Claude Code plugin + marketplace entry
|
|
369
|
+
- `.codex-plugin/plugin.json` — Codex CLI plugin manifest (skills discovered from `skills/`)
|
|
370
|
+
|
|
371
|
+
The same Node runtime (`scripts/loop-companion.mjs`) and the same shell
|
|
372
|
+
wrapper (`bin/cloop`) back both plugins. No code is duplicated between
|
|
373
|
+
platforms.
|
|
374
|
+
|
|
375
|
+
---
|
|
376
|
+
|
|
377
|
+
## Acknowledgments
|
|
378
|
+
|
|
379
|
+
CodexLoop is an independent project but its design borrows generously
|
|
380
|
+
from two upstream projects. Neither is a dependency — every line of
|
|
381
|
+
CodexLoop is an original reinterpretation — but both deserve explicit
|
|
382
|
+
credit.
|
|
383
|
+
|
|
384
|
+
### [snarktank/ralph](https://github.com/snarktank/ralph)
|
|
385
|
+
|
|
386
|
+
Ralph (after Geoffrey Huntley's "Ralph Wiggum" pattern) is the
|
|
387
|
+
iterative-loop project that popularized the idea of **driving an LLM
|
|
388
|
+
through a stateless fresh-context loop with file-based state carrying
|
|
389
|
+
the only memory between iterations**. CodexLoop inherits that core
|
|
390
|
+
principle:
|
|
391
|
+
|
|
392
|
+
- Every Codex call runs `codex exec --ephemeral` — no session reuse, no conversational memory across iterations.
|
|
393
|
+
- All state lives in files under `<repo>/.loop/` — `state.json` is the canonical shape, per-iteration dumps are in `iterations/NNNN.json`, and an append-only `progress.log` captures the learning trajectory the way Ralph's `progress.txt` does.
|
|
394
|
+
- One story (one proposal) applied per iteration, scope-disciplined by design.
|
|
395
|
+
- An explicit "completion marker" decides when the loop terminates — in CodexLoop the reviewer emits `verdict: "goal-met"` and the runtime verifies via one more validation pass before accepting it.
|
|
396
|
+
|
|
397
|
+
Ralph's stateless-loop pattern is the mental model CodexLoop extends to
|
|
398
|
+
a multi-role critic system.
|
|
399
|
+
|
|
400
|
+
### [openai/codex-plugin-cc](https://github.com/openai/codex-plugin-cc)
|
|
401
|
+
|
|
402
|
+
The official Codex plugin for Claude Code is the reference architecture
|
|
403
|
+
for how a Claude Code plugin should wrap the `codex` CLI. CodexLoop
|
|
404
|
+
studies (but does not import) many of its structural conventions:
|
|
405
|
+
|
|
406
|
+
- Minimal `.claude-plugin/plugin.json` manifest with auto-discovery for `commands/`, `skills/`, `agents/`.
|
|
407
|
+
- Command file frontmatter with `description`, `argument-hint`, `disable-model-invocation`, `allowed-tools`, plus an inline `!`node ...`` body shell form for read-only commands.
|
|
408
|
+
- `$ARGUMENTS` passthrough + the foreground-vs-background UX pattern using `AskUserQuestion`.
|
|
409
|
+
- A Node companion script (`scripts/codex-companion.mjs`) that verb-dispatches, spawns detached background workers via `{ detached: true, stdio: "ignore" }` + `.unref()`, and manages workspace-scoped state directories.
|
|
410
|
+
- JSON-schema-driven Codex outputs via `codex exec --output-schema`, with XML-tagged prompt templates (`<role>`, `<task>`, `<structured_output_contract>`, `<grounding_rules>`, `<final_check>`).
|
|
411
|
+
- The `codex-rescue` subagent pattern — a thin forwarder that delegates to Codex via a skill-routed one-shot call.
|
|
412
|
+
|
|
413
|
+
CodexLoop's `scripts/loop-companion.mjs`, `commands/*.md`,
|
|
414
|
+
`prompts/*.md`, and `schemas/*.json` are original code written from
|
|
415
|
+
scratch — no file is copied — but they are line-by-line recognizable as
|
|
416
|
+
siblings of the openai-codex plugin's counterparts. A reader familiar
|
|
417
|
+
with that plugin can navigate CodexLoop with zero cognitive tax.
|
|
418
|
+
|
|
419
|
+
Thanks to both projects for publishing their work openly.
|
|
420
|
+
|
|
421
|
+
---
|
|
422
|
+
|
|
423
|
+
## License
|
|
424
|
+
|
|
425
|
+
MIT — see [LICENSE](LICENSE).
|
|
Binary file
|
package/bin/cloop
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env sh
|
|
2
|
+
# cloop — shell wrapper for CodexLoop's loop-companion.mjs.
|
|
3
|
+
#
|
|
4
|
+
# Works from any directory, including when this script is symlinked into
|
|
5
|
+
# /usr/local/bin or ~/.local/bin. It locates the companion script relative
|
|
6
|
+
# to its own physical path and execs `node` against it.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# cloop start [flags]
|
|
10
|
+
# cloop iterate [flags]
|
|
11
|
+
# cloop status [flags]
|
|
12
|
+
# cloop stop [flags]
|
|
13
|
+
# cloop result [flags]
|
|
14
|
+
#
|
|
15
|
+
# See `docs/command-spec.md` or `cloop --help` for every flag.
|
|
16
|
+
|
|
17
|
+
set -eu
|
|
18
|
+
|
|
19
|
+
# Resolve the physical path of this script, following symlinks manually so
|
|
20
|
+
# we do not depend on GNU `readlink -f` (macOS ships BSD readlink by default).
|
|
21
|
+
SCRIPT=$0
|
|
22
|
+
while [ -h "$SCRIPT" ]; do
|
|
23
|
+
DIR=$(cd -P "$(dirname "$SCRIPT")" && pwd)
|
|
24
|
+
LINK=$(readlink "$SCRIPT")
|
|
25
|
+
case "$LINK" in
|
|
26
|
+
/*) SCRIPT=$LINK ;;
|
|
27
|
+
*) SCRIPT="$DIR/$LINK" ;;
|
|
28
|
+
esac
|
|
29
|
+
done
|
|
30
|
+
SCRIPT_DIR=$(cd -P "$(dirname "$SCRIPT")" && pwd)
|
|
31
|
+
|
|
32
|
+
# bin/ lives at the plugin root; the companion is in scripts/.
|
|
33
|
+
COMPANION="$SCRIPT_DIR/../scripts/loop-companion.mjs"
|
|
34
|
+
|
|
35
|
+
if [ ! -f "$COMPANION" ]; then
|
|
36
|
+
printf 'cloop: cannot find loop-companion.mjs (looked at %s)\n' "$COMPANION" >&2
|
|
37
|
+
exit 1
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
if ! command -v node >/dev/null 2>&1; then
|
|
41
|
+
printf 'cloop: node is not on PATH. Install Node 20+ first.\n' >&2
|
|
42
|
+
exit 1
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
exec node "$COMPANION" "$@"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Run exactly one iteration of the current CodexLoop (evaluate -> suggest -> rank -> apply -> validate -> record)
|
|
3
|
+
argument-hint: '[--dry-run] [--skip-apply]'
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
allowed-tools: Read, Bash(node:*), Bash(git:*)
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
Run exactly one iteration of the current CodexLoop. Always synchronous and foreground so the user sees the full per-iteration transcript (evaluation findings, ranked proposals, applied patch, validation summary, quality delta).
|
|
9
|
+
|
|
10
|
+
Raw slash-command arguments:
|
|
11
|
+
`$ARGUMENTS`
|
|
12
|
+
|
|
13
|
+
Core constraint:
|
|
14
|
+
- You do not generate proposals, apply patches, or run tests yourself.
|
|
15
|
+
- Return the companion script's stdout verbatim to the user. Do not paraphrase.
|
|
16
|
+
|
|
17
|
+
Run:
|
|
18
|
+
```bash
|
|
19
|
+
node "${CLAUDE_PLUGIN_ROOT}/scripts/loop-companion.mjs" iterate $ARGUMENTS
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Return the command stdout verbatim. Preserve the evaluation findings, proposal summaries, ranking breakdown, patch apply result, and validation output exactly as printed.
|
|
23
|
+
|
|
24
|
+
If the companion script reports "No active loop", tell the user to run `/cloop:start` first.
|
|
25
|
+
If the companion script reports `Codex CLI is not ready`, tell the user to run `/codex:setup` from the `openai-codex` plugin.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Show, set, or clear the Codex model and reasoning effort used by the current CodexLoop
|
|
3
|
+
argument-hint: '[<model>] [--effort <level>] [--list] [--clear] [--json]'
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
allowed-tools: Bash(node:*)
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
!`node "${CLAUDE_PLUGIN_ROOT}/scripts/loop-companion.mjs" model $ARGUMENTS`
|
|
9
|
+
|
|
10
|
+
Present the output verbatim to the user. Preserve the model name, the
|
|
11
|
+
reasoning effort, the previous/current labels, and any alias-expansion note
|
|
12
|
+
exactly as printed.
|
|
13
|
+
|
|
14
|
+
Usage summary:
|
|
15
|
+
- `/cloop:model` — show the current model and reasoning effort for the active loop
|
|
16
|
+
- `/cloop:model <name>` — set the model (e.g. `/cloop:model gpt-5.4-mini`, or the `spark` alias)
|
|
17
|
+
- `/cloop:model --effort <level>` — set the reasoning effort (one of `none`, `minimal`, `low`, `medium`, `high`, `xhigh`)
|
|
18
|
+
- `/cloop:model <name> --effort <level>` — set both at once
|
|
19
|
+
- `/cloop:model --clear` — reset BOTH model and effort to the codex CLI default (from `~/.codex/config.toml`)
|
|
20
|
+
- `/cloop:model --list` — list known model aliases and valid reasoning effort values (repo-independent)
|
|
21
|
+
- `/cloop:model --json` — emit structured JSON instead of Markdown
|
|
22
|
+
|
|
23
|
+
**Defaults**: `cloop` ships with `gpt-5.4` as the default model and `xhigh`
|
|
24
|
+
as the default reasoning effort. These are applied at `/cloop:start` time
|
|
25
|
+
regardless of your `~/.codex/config.toml`, so the loop is reproducible even
|
|
26
|
+
if your Codex config changes.
|
|
27
|
+
|
|
28
|
+
If the command reports "No active loop", tell the user to run
|
|
29
|
+
`/cloop:start` first, or run `/cloop:model --list` which works without an
|
|
30
|
+
active loop.
|
|
31
|
+
|
|
32
|
+
If the command reports that the loop is in a terminal state, tell the user
|
|
33
|
+
to start a new loop with `/cloop:start --model <name> --effort <level>`.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Retrieve the full result of the current or completed CodexLoop (iteration history, ranking breakdowns, validation results)
|
|
3
|
+
argument-hint: '[--iteration N] [--json] [--diff]'
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
allowed-tools: Bash(node:*)
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
!`node "${CLAUDE_PLUGIN_ROOT}/scripts/loop-companion.mjs" result $ARGUMENTS`
|
|
9
|
+
|
|
10
|
+
Present the full command output verbatim. Preserve all details including:
|
|
11
|
+
- status table and iteration history
|
|
12
|
+
- per-iteration evaluation findings, proposals, ranking breakdown
|
|
13
|
+
- applied patch results, validation summaries, quality scores
|
|
14
|
+
- file paths and sha hints exactly as printed
|
|
15
|
+
- any reported stopReason or error
|
|
16
|
+
|
|
17
|
+
Do not summarize or condense the output.
|