clean-room-skill 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/.codex-plugin/plugin.json +1 -1
- package/README.md +35 -8
- package/agents/clean-architect.md +7 -1
- package/agents/clean-implementer-verifier-shell.md +4 -0
- package/agents/clean-polish-reviewer.md +3 -0
- package/agents/clean-qa-editor.md +4 -0
- package/agents/contaminated-handoff-sanitizer.md +3 -0
- package/agents/contaminated-manager-verifier.md +10 -1
- package/agents/contaminated-source-analyst.md +8 -1
- package/bin/install.js +11 -1621
- package/docs/ARCHITECTURE.md +7 -1
- package/docs/HOOKS.md +14 -10
- package/docs/REFERENCE.md +31 -6
- package/examples/codex/.codex/agents/clean-architect.toml +7 -5
- package/examples/codex/.codex/agents/clean-polish-reviewer.toml +2 -2
- package/examples/codex/.codex/agents/clean-qa-editor.toml +3 -2
- package/examples/codex/.codex/agents/contaminated-handoff-sanitizer.toml +2 -2
- package/examples/codex/.codex/agents/contaminated-manager-verifier.toml +10 -4
- package/examples/codex/.codex/agents/contaminated-source-analyst.toml +7 -3
- package/hooks/validate-json-schema.py +14 -0
- package/lib/bootstrap.cjs +5 -1
- package/lib/doctor.cjs +157 -5
- package/lib/hooks.cjs +18 -0
- package/lib/install-artifacts.cjs +178 -4
- package/lib/install-claude-plugin.cjs +374 -0
- package/lib/install-cli.cjs +99 -0
- package/lib/install-operations.cjs +376 -0
- package/lib/install-options.cjs +149 -0
- package/lib/install-runtime-selection.cjs +180 -0
- package/lib/install-status.cjs +292 -0
- package/lib/install-tui.cjs +359 -0
- package/lib/preflight-bootstrap.cjs +39 -0
- package/lib/preflight-cli.cjs +95 -0
- package/lib/preflight-constants.cjs +25 -0
- package/lib/preflight-output.cjs +37 -0
- package/lib/preflight-paths.cjs +67 -0
- package/lib/preflight-template.cjs +103 -0
- package/lib/preflight-validation.cjs +276 -0
- package/lib/preflight.cjs +18 -461
- package/lib/run-clean-artifacts.cjs +276 -0
- package/lib/run-cli.cjs +90 -0
- package/lib/run-constants.cjs +171 -0
- package/lib/run-controller.cjs +247 -0
- package/lib/run-coverage.cjs +350 -0
- package/lib/run-hooks.cjs +96 -0
- package/lib/run-manifest.cjs +111 -0
- package/lib/run-progress.cjs +160 -0
- package/lib/run-results.cjs +433 -0
- package/lib/run-roots.cjs +230 -0
- package/lib/run-stages.cjs +409 -0
- package/lib/run.cjs +4 -1998
- package/lib/runtime-layout.cjs +12 -5
- package/package.json +8 -2
- package/plugin.json +1 -1
- package/skills/attended/SKILL.md +2 -0
- package/skills/clean-room/SKILL.md +6 -6
- package/skills/clean-room/assets/coverage-ledger.schema.json +95 -0
- package/skills/clean-room/assets/task-manifest.schema.json +25 -0
- package/skills/clean-room/examples/contaminated-side/task-manifest.json +14 -2
- package/skills/clean-room/references/CONTROLLER-LOOP.md +5 -0
- package/skills/clean-room/references/PROCESS.md +12 -4
- package/skills/clean-room/references/SPEC-SCHEMA.md +11 -2
- package/skills/refocus/SKILL.md +2 -0
- package/skills/unattended/SKILL.md +2 -0
package/docs/ARCHITECTURE.md
CHANGED
|
@@ -52,6 +52,7 @@ To assist in logical unit decomposition, the workflow supports an optional sourc
|
|
|
52
52
|
* **Execution Boundary**: This tooling runs exclusively in the contaminated domain before clean-room role sessions are initialized.
|
|
53
53
|
* **Traversal Bounds**: Source indexing enforces file count, per-file byte, total byte, batch token, and segment caps. It validates file size again after reading, skips files that change during read, records directory walk errors, and prunes traversal after global limits are exhausted with an aggregate skipped entry.
|
|
54
54
|
* **Agent 0 Use**: Agent 0 consumes `source-index.json` only to create neutral `task-manifest.json` units and per-unit `source_index_refs`. In visual fallback runs, Agent 0 consumes `visual-index.json` only to create neutral units and per-unit `visual_index_refs`. Both indexes stay contaminated-only and do not cross to Agent 1.5, Agent 2, Agent 3, Agent 4, or clean handoff packages.
|
|
55
|
+
* **Discovery Leads**: When Agent 1 detects an authorized related surface that cannot be analyzed inside the assigned unit, Agent 0 tracks it in contaminated `coverage-ledger.json` `discovery_leads`. High-priority leads must be resolved before the unit can be marked covered; the runner does not expand approved scope automatically.
|
|
55
56
|
* **Tool Trust Policy**: By default, tool discovery operates in `stat-only` mode and does not execute third-party binaries. It queries version strings only when explicitly invoked with `--probe-tools`. Tools discovered under `/opt/homebrew` or `/usr/local` remain stat-only unless `--allow-user-toolchain-probes` is also supplied. Project-local directories (such as `.bin` or `node_modules/.bin`) are ignored unless the environment variable `RE_SKILLS_TRUST_PROJECT_TOOLS=1` or the flag `--allow-working-project-tools` is supplied.
|
|
56
57
|
* **Local Tool Install Safety**: Explicit npm-backed helper installs are strict-version pinned and serialized with a cache-local lock before mutating `~/.cache/re-skills/clean-room-tools/npm`. Prefix creation failures, subprocess timeouts, and subprocess launch errors are returned as structured JSON facts instead of raw tracebacks.
|
|
57
58
|
|
|
@@ -177,6 +178,7 @@ The architecture delegates work across six distinct custom role agents to enforc
|
|
|
177
178
|
* Produces `clean-run-context.json` for Agent 2, Agent 3, and Agent 4 instead of handing over the full `task-manifest.json` or full `preflight-goal.json`.
|
|
178
179
|
* Influences Agent 2, Agent 3, and Agent 4 only through durable sanitized artifacts, never direct chat, progress feedback, implementation hints, or priority changes.
|
|
179
180
|
* Performs final verification of clean specification and implementation coverage against the source scope.
|
|
181
|
+
* Blocks handoff or coverage completion when high-priority contaminated discovery leads remain unresolved.
|
|
180
182
|
* Writes the inner-loop `clean-room-result.json` only after contaminated-side coverage verification.
|
|
181
183
|
* Consumes Agent 3 reports only after Agent 3 reaches a terminal state, and consumes Agent 4 reports only after the configured polish review reaches a terminal state, then sends only abstract delta tickets into a fresh clean artifact cycle.
|
|
182
184
|
|
|
@@ -187,6 +189,8 @@ The architecture delegates work across six distinct custom role agents to enforc
|
|
|
187
189
|
* Analyzes the authorized source code within assigned units or batches.
|
|
188
190
|
* Uses target stack and compatibility policy from preflight instead of inferring product goals from source.
|
|
189
191
|
* Writes neutral draft behavioral specifications based on observed behavior, public contracts, invariants, state transitions, and errors.
|
|
192
|
+
* Inventories the assigned unit's observable CLI, env, TUI, UI, protocol, config, command, and public behavior surfaces when relevant.
|
|
193
|
+
* Records authorized related surfaces that cannot be analyzed in the assigned context as contaminated `discovery_leads`, not clean spec fields.
|
|
190
194
|
* Generates evidence references pointing to contaminated ledgers instead of copying raw source code or comments.
|
|
191
195
|
* Flags suspected leakage but does not approve its own work for clean handoff.
|
|
192
196
|
|
|
@@ -251,10 +255,12 @@ Agent 3's terminal report is not enough to return. If configured, Agent 4 must p
|
|
|
251
255
|
* Locks the contaminated artifact root with `.clean-room-run.lock`.
|
|
252
256
|
* Reloads durable artifacts before each iteration.
|
|
253
257
|
* Selects at most one pending or gap unit inside `loop_context.approved_scope_refs`.
|
|
258
|
+
* Requires exactly one `unit_kind: "foundation"` unit, named by `loop_context.foundation_unit_ref`; behavior units cannot run or complete until that foundation unit is covered.
|
|
254
259
|
* Spawns configured role commands with `shell: false`, bounded output, and bounded timeout.
|
|
255
260
|
* In strict context-management mode, requires each configured stage to provide `context.fresh_session: true` and `context.brief_path`, then validates the session brief before spawn.
|
|
256
261
|
* Supports the optional `clean-polish-review` phase between `clean-implement-qc` and `contaminated-coverage-verify`.
|
|
257
262
|
* Validates schema, leakage, and handoff integrity before advancing state.
|
|
263
|
+
* Rejects `covered` coverage-ledger units that still have unresolved high-priority `discovery_leads`.
|
|
258
264
|
* Records controller memory in contaminated-side `controller-run-ledger.json`.
|
|
259
265
|
* Writes `clean-room-result.json` before returning to the outer spec loop.
|
|
260
266
|
|
|
@@ -282,7 +288,7 @@ Note: Even though clean and source-denied roles (such as Agent 1.5, 2, 3, and 4)
|
|
|
282
288
|
|
|
283
289
|
## Guardrails and Hooks
|
|
284
290
|
|
|
285
|
-
The architecture relies on agent/tool hook scaffolding located in `hooks/` to enforce boundary rules dynamically during agent sessions. Use installer-generated Codex or Claude hook configs with absolute wrapper paths. Static cwd-relative plugin hook declarations are not treated as an enforcement boundary. Use strict hooks for dedicated Codex or
|
|
291
|
+
The architecture relies on agent/tool hook scaffolding located in `hooks/` to enforce boundary rules dynamically during agent sessions. Use installer-generated Codex or Claude hook configs with absolute wrapper paths, or the generated OpenCode local plugin bridge. Static cwd-relative plugin hook declarations are not treated as an enforcement boundary. Use strict hooks for dedicated Codex, Claude, or OpenCode clean-room homes; safe hooks are compatibility-only between runs and begin enforcing when init/onboarding launches role sessions with clean-room environment variables.
|
|
286
292
|
|
|
287
293
|
Matcher coverage depends on the host runtime emitting hook events for the tool invocation. Hosts that do not emit a pre/post tool event for a file, terminal, or resource tool are not protected by adding that tool name to the generated hook config. Run `clean-room-skill doctor --runtime codex --hooks=strict --coverage` or the Claude equivalent after install.
|
|
288
294
|
|
package/docs/HOOKS.md
CHANGED
|
@@ -6,7 +6,7 @@ The hooks are engineering guardrails. They reduce accidental cross-domain reads
|
|
|
6
6
|
|
|
7
7
|
## Install Locations
|
|
8
8
|
|
|
9
|
-
The installer copies the Python hook files for every supported runtime layout. Runtime hook registration is verified
|
|
9
|
+
The installer copies the Python hook files for every supported runtime layout. Runtime hook registration is verified for Codex, Claude Code, and OpenCode.
|
|
10
10
|
|
|
11
11
|
| Runtime | Hook files copied to | Active hook config |
|
|
12
12
|
| --- | --- | --- |
|
|
@@ -14,7 +14,7 @@ The installer copies the Python hook files for every supported runtime layout. R
|
|
|
14
14
|
| Claude Code | `<targetRoot>/hooks/clean-room/*.py` | `<targetRoot>/settings.json` |
|
|
15
15
|
| Antigravity | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
|
|
16
16
|
| Gemini CLI | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
|
|
17
|
-
| OpenCode | `<targetRoot>/hooks/clean-room/*.py` |
|
|
17
|
+
| OpenCode | `<targetRoot>/hooks/clean-room/*.py` | `<targetRoot>/plugins/clean-room.ts` |
|
|
18
18
|
| Kilo | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
|
|
19
19
|
| Cursor | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
|
|
20
20
|
| GitHub Copilot | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
|
|
@@ -31,8 +31,8 @@ Codex uses `CODEX_HOME` or `~/.codex` for global installs. Claude Code uses `CLA
|
|
|
31
31
|
|
|
32
32
|
| Mode | Behavior |
|
|
33
33
|
| --- | --- |
|
|
34
|
-
| `safe` | Default. Registers hooks for Codex or
|
|
35
|
-
| `strict` | Registers hooks for Codex or
|
|
34
|
+
| `safe` | Default. Registers hooks for Codex, Claude, or OpenCode, but `clean-room-hook.py` no-ops until a clean-room role environment is present or `CLEAN_ROOM_HOOK_ENFORCE` is truthy. |
|
|
35
|
+
| `strict` | Registers hooks for Codex, Claude, or OpenCode and fails closed even without clean-room role environment. Use only in dedicated clean-room runtime homes. |
|
|
36
36
|
| `copy-only` | Copies hook files without modifying runtime hook config. This is also the effective behavior for runtimes without verified hook registration support. |
|
|
37
37
|
|
|
38
38
|
`--no-hooks` is an alias for `--hooks=copy-only`.
|
|
@@ -41,6 +41,8 @@ Codex uses `CODEX_HOME` or `~/.codex` for global installs. Claude Code uses `CLA
|
|
|
41
41
|
|
|
42
42
|
When hook mode is `safe` or `strict`, the installer registers four managed hook entries for Codex and Claude. Each entry invokes the installed `clean-room-hook.py` wrapper with an absolute Python path, an absolute wrapper path, the requested hook mode, and one or more `--check` scripts.
|
|
43
43
|
|
|
44
|
+
For OpenCode, the installer writes a generated local plugin at `<targetRoot>/plugins/clean-room.ts`. OpenCode auto-loads that plugin from its config directory. The plugin subscribes to `tool.execute.before` and `tool.execute.after`, translates OpenCode tool payloads into the existing clean-room hook payload shape, and invokes the installed Python wrapper with `shell: false`. `copy-only` omits this plugin.
|
|
45
|
+
|
|
44
46
|
| Event | Matcher | Checks |
|
|
45
47
|
| --- | --- | --- |
|
|
46
48
|
| `PreToolUse` | <code>Bash|Shell|PowerShell|Monitor|exec_command|shell_command|write_stdin</code> | `require-clean-room-env.py`, `deny-clean-room-shell.py` |
|
|
@@ -205,26 +207,28 @@ The hook policy is deny-by-default during active clean-room role sessions.
|
|
|
205
207
|
|
|
206
208
|
## Verification
|
|
207
209
|
|
|
208
|
-
Use `doctor` after installing Codex or
|
|
210
|
+
Use `doctor` after installing Codex, Claude, or OpenCode hooks:
|
|
209
211
|
|
|
210
212
|
```bash
|
|
211
213
|
clean-room-skill doctor --runtime codex --hooks=safe
|
|
212
214
|
clean-room-skill doctor --runtime codex --hooks=strict
|
|
213
215
|
clean-room-skill doctor --runtime codex --hooks=strict --coverage
|
|
214
216
|
clean-room-skill doctor --runtime claude --hooks=strict --coverage
|
|
217
|
+
clean-room-skill doctor --runtime opencode --hooks=strict --coverage
|
|
215
218
|
```
|
|
216
219
|
|
|
217
220
|
Add `--config-dir <path>` when checking a non-default runtime config root.
|
|
218
221
|
|
|
219
222
|
`doctor` verifies that:
|
|
220
223
|
|
|
221
|
-
- The hook config exists.
|
|
222
|
-
- Exactly four managed clean-room hook entries are present.
|
|
223
|
-
- Managed commands use absolute Python and wrapper paths.
|
|
224
|
+
- The hook config or OpenCode local plugin exists.
|
|
225
|
+
- Exactly four managed clean-room hook entries are present for Codex and Claude.
|
|
226
|
+
- Managed Codex and Claude commands use absolute Python and wrapper paths.
|
|
227
|
+
- The OpenCode plugin declares `tool.execute.before`, `tool.execute.after`, an absolute wrapper path, and `shell: false`.
|
|
224
228
|
- The requested safe or strict mode is configured.
|
|
225
229
|
- Safe mode no-ops without clean-room environment.
|
|
226
230
|
- Strict mode and enforced safe mode fail without required environment.
|
|
227
231
|
- Smoke payloads fail for source reads, source writes, shell bypasses, and malformed post-write JSON.
|
|
228
|
-
- `--coverage` prints matcher and check coverage for
|
|
232
|
+
- `--coverage` prints matcher and check coverage for generated hook config entries or OpenCode plugin coverage.
|
|
229
233
|
|
|
230
|
-
`doctor` is a smoke test. It does not prove host event coverage, legal sufficiency, or full runtime isolation.
|
|
234
|
+
`doctor` is a smoke test. It does not prove host event coverage, legal sufficiency, or full runtime isolation. For OpenCode, it verifies the generated plugin bridge and Python guardrail checks, not every OpenCode tool surface.
|
package/docs/REFERENCE.md
CHANGED
|
@@ -64,12 +64,12 @@ Verified:
|
|
|
64
64
|
|
|
65
65
|
- Codex
|
|
66
66
|
- Claude Code
|
|
67
|
+
- OpenCode
|
|
67
68
|
|
|
68
69
|
Layout-only or experimental:
|
|
69
70
|
|
|
70
71
|
- Antigravity
|
|
71
72
|
- Gemini CLI
|
|
72
|
-
- OpenCode
|
|
73
73
|
- Kilo
|
|
74
74
|
- Cursor
|
|
75
75
|
- GitHub Copilot
|
|
@@ -80,7 +80,18 @@ Layout-only or experimental:
|
|
|
80
80
|
- Hermes Agent
|
|
81
81
|
- CodeBuddy
|
|
82
82
|
|
|
83
|
-
Layout-only installs write files to expected runtime locations, but this repository does not verify that those hosts load the files or emit all hook events needed for clean-room enforcement.
|
|
83
|
+
Layout-only installs write files to expected runtime locations, but this repository does not verify that those hosts load the files or emit all hook events needed for clean-room enforcement. OpenCode installs are verified through a generated local plugin bridge at `plugins/clean-room.ts`; `doctor` verifies that bridge and the Python guardrails, not every OpenCode tool surface.
|
|
84
|
+
|
|
85
|
+
### Pi Package Compatibility
|
|
86
|
+
|
|
87
|
+
Pi can install this package and load the bundled skills from the package metadata:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pi install npm:clean-room-skill@latest
|
|
91
|
+
pi install https://github.com/whit3rabbit/clean-room-skill
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Pi invokes skills as `/skill:<name>`. Use `/skill:init` for the setup pass, `/skill:clean-room` for the startup wizard, `/skill:attended` for attended controller mode, and `/skill:unattended` for bounded unattended mode. Pi support is package compatibility only: it does not add a `--pi` installer target, does not participate in `--all`, and does not register clean-room hooks. Clean-room safety still depends on role separation, path isolation, schema validation, and supported hook runtimes.
|
|
84
95
|
|
|
85
96
|
Global install roots:
|
|
86
97
|
|
|
@@ -103,12 +114,20 @@ Global install roots:
|
|
|
103
114
|
|
|
104
115
|
Local installs use each runtime's project config directory. Antigravity local installs write `.agents/plugins/clean-room/`.
|
|
105
116
|
|
|
117
|
+
## Agent Metadata Compatibility
|
|
118
|
+
|
|
119
|
+
Runtime agent metadata is intentionally runtime-specific. Claude Code Markdown agents support documented `model`, `effort`, `color`, and optional `memory` frontmatter. Clean-room role agents use `model`, `effort`, and `color` only. They do not use persistent `memory`, because clean-room state must come from durable artifacts, role-session briefs, and fresh role sessions rather than runtime recall.
|
|
120
|
+
|
|
121
|
+
Codex TOML agents support documented session config fields such as `model`, `model_reasoning_effort`, `developer_instructions`, `sandbox_mode`, `mcp_servers`, and `skills.config`. Do not copy Claude aliases such as `sonnet` or `opus`, Claude `color`, or Claude `memory` fields into Codex TOML templates.
|
|
122
|
+
|
|
123
|
+
Codex hooks support `updatedInput`, but clean-room hook enforcement should stay fail-closed through exit status and explicit deny decisions. Do not rewrite clean-room tool calls in hooks; command mutation makes boundary behavior harder to review and test.
|
|
124
|
+
|
|
106
125
|
## Hook Modes And Doctor
|
|
107
126
|
|
|
108
127
|
Hook modes:
|
|
109
128
|
|
|
110
129
|
- `safe`: default. Copies hooks and registers a wrapper that no-ops until role sessions provide clean-room environment variables. `CLEAN_ROOM_HOOK_ENFORCE=1` remains available for explicit smoke tests.
|
|
111
|
-
- `strict`: fail-closed mode for dedicated Codex or
|
|
130
|
+
- `strict`: fail-closed mode for dedicated Codex, Claude, or OpenCode clean-room homes.
|
|
112
131
|
- `copy-only`: copies hook files without runtime hook registration.
|
|
113
132
|
|
|
114
133
|
Smoke test generated hook registration:
|
|
@@ -117,11 +136,12 @@ Smoke test generated hook registration:
|
|
|
117
136
|
clean-room-skill doctor --runtime codex --hooks=safe
|
|
118
137
|
clean-room-skill doctor --runtime codex --hooks=strict
|
|
119
138
|
clean-room-skill doctor --runtime codex --hooks=strict --coverage
|
|
139
|
+
clean-room-skill doctor --runtime opencode --hooks=strict --coverage
|
|
120
140
|
```
|
|
121
141
|
|
|
122
142
|
Use `--runtime claude` for Claude Code, and add `--config-dir <path>` when testing an alternate config root.
|
|
123
143
|
|
|
124
|
-
`doctor` checks that Codex or Claude hook config exists,
|
|
144
|
+
`doctor` checks that Codex or Claude hook config exists, or that the OpenCode local plugin exists. It verifies generated clean-room hooks or plugin wiring, absolute wrapper paths, the requested safe or strict mode, and smoke payload failures for missing environment, source reads, source writes, shell use, and malformed post-write JSON. Safe mode also verifies no-op behavior without clean-room env.
|
|
125
145
|
|
|
126
146
|
It does not prove legal sufficiency, full runtime hook event coverage, host-side feature enablement, or full JSON Schema conformance.
|
|
127
147
|
|
|
@@ -210,7 +230,11 @@ Options:
|
|
|
210
230
|
| `--schema-dir <path>` | Override bundled schema directory. |
|
|
211
231
|
| `--python <path>` | Python executable for validation hooks; default is `python3`. |
|
|
212
232
|
|
|
213
|
-
The task manifest must already include preflight references, the required handoff sequence, unattended controller policy, finite iteration bounds, and `loop_context.approved_scope_refs`.
|
|
233
|
+
The task manifest must already include preflight references, the required handoff sequence, unattended controller policy, finite iteration bounds, `loop_context.foundation_unit_ref`, and `loop_context.approved_scope_refs`.
|
|
234
|
+
|
|
235
|
+
Unattended code-development manifests must include exactly one `unit_kind: "foundation"` unit. The runner rejects non-foundation approved slices until that unit is covered.
|
|
236
|
+
|
|
237
|
+
`coverage-ledger.json` may record contaminated-only `source_units[].discovery_leads` for authorized related surfaces that were detected but not analyzed in the assigned unit. The runner rejects a `covered` unit while any high-priority discovery lead remains open or deferred. It does not add follow-up units or expand `loop_context.approved_scope_refs`; Agent 0 must return an abstract delta, mark coverage partial or blocked, or pause for attended approval.
|
|
214
238
|
|
|
215
239
|
Minimal agent command adapter shape for advisory or disabled context management:
|
|
216
240
|
|
|
@@ -323,7 +347,8 @@ The runner exports `CLEAN_ROOM_SESSION_BRIEF_PATH`, `CLEAN_ROOM_ROLE_SESSION_ID`
|
|
|
323
347
|
| `install lock is held` | Another install or uninstall is mutating the same target root | Wait for the other process to finish; stale locks are handled conservatively. |
|
|
324
348
|
| Hook config write failed after files copied | Partial installer state | Fix the filesystem error, then re-run the same installer command. |
|
|
325
349
|
| Install manifest remains `installing` | The previous install did not complete | Re-run the same installer command for that runtime and target root. |
|
|
326
|
-
| `clean-room run` rejects the manifest | Invalid or incomplete unattended loop metadata | Fix `controller_policy`, `loop_context`, and `approved_scope_refs`, then retry `--dry-run`. |
|
|
350
|
+
| `clean-room run` rejects the manifest | Invalid or incomplete unattended loop metadata | Fix `controller_policy`, `loop_context.foundation_unit_ref`, and `approved_scope_refs`, then retry `--dry-run`. |
|
|
351
|
+
| `clean-room run` rejects a covered unit with `discovery_leads` | A high-priority contaminated discovery lead is still unresolved | Analyze the lead in an authorized follow-up unit, mark it resolved, or keep coverage partial/blocked and return an abstract delta. |
|
|
327
352
|
| `clean-room run` rejects an agent command stage in strict context mode | The stage is missing `context.fresh_session: true`, missing `context.brief_path`, or points the brief outside the allowed artifact root | Fix the stage context and regenerate the role-session brief for the selected unit. |
|
|
328
353
|
| `clean-room run` reports no progress | Configured stages exited without durable artifact changes | Check role command cwd/argv, selected unit, and artifact write roots. |
|
|
329
354
|
| `clean-room run` reports repeated unit selection | Same unit selected after a no-progress iteration | Resolve the blocker or update durable artifacts before retrying. |
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
name = "clean-architect"
|
|
2
2
|
description = "Plans clean implementation from approved clean behavior specs and the clean destination foundation."
|
|
3
3
|
sandbox_mode = "workspace-write"
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
model = "gpt-5.5"
|
|
5
|
+
model_reasoning_effort = "high"
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
developer_instructions = """
|
|
8
8
|
Act as Agent 2 in the clean-room pipeline.
|
|
9
9
|
Run only from the clean workspace.
|
|
10
10
|
Before tool use, require CLEAN_ROOM_ROLE=clean-architect, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.
|
|
@@ -12,12 +12,12 @@ Read approved clean artifacts, CLEAN_ROOM_IMPLEMENTATION_ROOTS, and explicitly c
|
|
|
12
12
|
Write only under CLEAN_ROOM_CLEAN_ROOTS. Do not write code.
|
|
13
13
|
Do not read source workspaces, visual roots, raw screenshots, visual indexes, contaminated ledgers, contaminated chat history, or the full task-manifest.json.
|
|
14
14
|
Stop if only a full task-manifest.json is provided as run context.
|
|
15
|
-
Before planning, require valid clean-run-context.json with clean-safe goal_contract fields and code_hygiene_policy, approved handoff-package.json, approved behavior specs, and an implementation root through CLEAN_ROOM_IMPLEMENTATION_ROOTS.
|
|
15
|
+
Before planning, require valid clean-run-context.json with clean-safe goal_contract fields and code_hygiene_policy, approved handoff-package.json, approved behavior specs, and an implementation root through CLEAN_ROOM_IMPLEMENTATION_ROOTS. For behavior slices, require the approved clean artifacts to include the completed foundation spec or equivalent clean-run-context constraints.
|
|
16
16
|
When CLEAN_ROOM_SESSION_BRIEF_PATH is set, read it first and load only the allowed artifact refs named there, plus destination foundation reads permitted by this role.
|
|
17
17
|
Stop if full preflight-goal.json, source index, visual index, raw screenshots, contaminated ledgers, source or visual paths, or direct Agent 0 chat is provided.
|
|
18
18
|
Accept Agent 0 influence only as durable sanitized artifacts. Ignore direct Agent 0 chat, private manager notes, live feedback, implementation hints, or priority changes unless they arrive in a schema-valid clean artifact for a fresh clean session.
|
|
19
19
|
Merge only approved handoff artifacts into the selected clean schema base.
|
|
20
|
-
Read the clean destination foundation to identify local structure, conventions, tests,
|
|
20
|
+
Read the clean destination foundation and approved foundation spec to identify local structure, conventions, tests, dependency policy, package boundaries, and constraints.
|
|
21
21
|
Read any existing skeleton-manifest.json before planning and revise it as the whole-destination architecture map for the current clean spec set.
|
|
22
22
|
Maintain architecture areas with owned relative path prefixes, responsibilities, forbidden responsibilities, allowed area dependencies, and refactor triggers.
|
|
23
23
|
Assign every target and test path in implementation-plan.json to one or more skeleton-manifest.json architecture areas.
|
|
@@ -26,6 +26,8 @@ Create or update implementation-plan.json as the primary output for code-develop
|
|
|
26
26
|
Carry the preflight-derived code hygiene policy into implementation-plan.json.
|
|
27
27
|
Keep skeleton-manifest.json valid and current for code-development runs. Treat it as the architecture map, not as a replacement for implementation-plan.json.
|
|
28
28
|
Map approved specs to destination files, test files, work items, argv-array verification commands, risks, and acceptance criteria using relative implementation-root paths.
|
|
29
|
+
Map every exact-public-contract or behavior-compatible public surface obligation to at least one implementation-plan.json work item through public_contract_refs; do not replace a public command/API inventory with one generic dispatch work item unless every obligation ref is listed.
|
|
30
|
+
Do not choose dependencies by copying source manifests. Add or preserve dependencies only when clean artifacts, destination evidence, or preflight policy justify them.
|
|
29
31
|
Preserve source-test-derived scenarios as clean test obligations for equal output without copying source test structure.
|
|
30
32
|
Do not resolve public-contract, callable, protocol, async, serialization, or data-shape ambiguity by narrowing semantics. Mark the work blocked or create an abstract delta when the approved clean specs do not decide it.
|
|
31
33
|
Stop if contaminated material appears in clean inputs.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
name = "clean-polish-reviewer"
|
|
2
2
|
description = "Performs final clean code polish, repo hygiene, verification review, and constrained implementation-root commit."
|
|
3
3
|
sandbox_mode = "workspace-write"
|
|
4
|
+
model = "gpt-5.4-mini"
|
|
4
5
|
model_reasoning_effort = "high"
|
|
5
|
-
enabled_skills = ["clean-room"]
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
developer_instructions = """
|
|
8
8
|
Act as Agent 4 in the clean-room pipeline.
|
|
9
9
|
Run only in the clean domain after Agent 3 has produced terminal implementation and QC reports.
|
|
10
10
|
Before tool use, require CLEAN_ROOM_ROLE=clean-polish-reviewer, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
name = "clean-qa-editor"
|
|
2
2
|
description = "Implements the clean plan, verifies clean destination code, and emits one terminal report for Agent 0."
|
|
3
3
|
sandbox_mode = "workspace-write"
|
|
4
|
+
model = "gpt-5.4-mini"
|
|
4
5
|
model_reasoning_effort = "high"
|
|
5
|
-
enabled_skills = ["clean-room"]
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
developer_instructions = """
|
|
8
8
|
Act as Agent 3 in the clean-room pipeline.
|
|
9
9
|
Run only in the clean domain.
|
|
10
10
|
Before tool use, require CLEAN_ROOM_ROLE=clean-qa-editor, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.
|
|
@@ -30,6 +30,7 @@ Do not report progress, ask Agent 0 for guidance, or send partial findings while
|
|
|
30
30
|
Record argv-array verification commands, optional clean-safe container metadata, implementation status, changed relative paths, verification results, blockers, and abstract delta tickets in CLEAN_ROOM_CLEAN_ROOTS/implementation-report.json.
|
|
31
31
|
Review leakage risk and record contamination incidents.
|
|
32
32
|
Flag missing source-test parity, missing equal-output assertions, and mismatches between specs, implementation plan, public contracts, and test obligations.
|
|
33
|
+
Verify public-surface inventory parity item by item. Every required public_surface:<spec_id>:<kind>:<name> ref must be covered by tests, mapped to a completed work item, and represented in terminal verification; passing test counts or broad command-dispatch coverage is not enough.
|
|
33
34
|
Require invariant-level tests for compatibility-critical behavior. Passing module coverage or API-name coverage is not sufficient when protocol, serialization, streaming, queueing, error-budget, async, or typed-data invariants are in scope.
|
|
34
35
|
Keep CLEAN_ROOM_CLEAN_ROOTS/qc-report.json updated when the run expects it.
|
|
35
36
|
Record code hygiene violations as code-hygiene findings in CLEAN_ROOM_CLEAN_ROOTS/qc-report.json.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
name = "contaminated-handoff-sanitizer"
|
|
2
2
|
description = "Reviews Agent 1 draft specs from a fresh source-denied contaminated context and approves only scrubbed handoff artifacts."
|
|
3
3
|
sandbox_mode = "workspace-write"
|
|
4
|
+
model = "gpt-5.4-mini"
|
|
4
5
|
model_reasoning_effort = "high"
|
|
5
|
-
enabled_skills = ["clean-room"]
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
developer_instructions = """
|
|
8
8
|
Act as Agent 1.5 in the clean-room pipeline.
|
|
9
9
|
Operate in the contaminated domain, but without source access or Agent 1 source-reading chat history.
|
|
10
10
|
Before tool use, require CLEAN_ROOM_ROLE=contaminated-handoff-sanitizer, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
name = "contaminated-manager-verifier"
|
|
2
2
|
description = "Consumes contaminated source indexes, tracks source coverage, and emits only abstract clean-room delta tickets."
|
|
3
3
|
sandbox_mode = "workspace-write"
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
model = "gpt-5.5"
|
|
5
|
+
model_reasoning_effort = "high"
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
developer_instructions = """
|
|
8
8
|
Act as Agent 0 in the clean-room pipeline.
|
|
9
9
|
Operate only in the contaminated domain.
|
|
10
10
|
Read authorized source and contaminated ledgers as needed.
|
|
@@ -12,16 +12,22 @@ Before source discovery or decomposition, require validated preflight-goal.json,
|
|
|
12
12
|
Do not infer target language, dependency policy, license policy, exactness policy, output directory, or feature add/remove policy from source.
|
|
13
13
|
When acting as agent zero/controller, define and pass CLEAN_ROOM_ROLE, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SCHEMA_DIR, and clean/source-denied CLEAN_ROOM_ALLOWED_READ_ROOTS into every new role session.
|
|
14
14
|
When context management is enabled, maintain contaminated-side controller-status.json and create one compact role-session-brief.json per role launch. In strict mode, launch each role from a fresh model session, profile, or thread.
|
|
15
|
-
Missing controller_policy means attended. Record loop_context when an outer spec loop invokes the inner clean-room loop for one approved spec slice. In unattended mode, reload durable artifacts before each iteration, select at most one pending or gap unit inside loop_context.approved_scope_refs, launch roles from fresh context, validate schema and leakage before advancing state, and stop on configured safety or ambiguity conditions.
|
|
15
|
+
Missing controller_policy means attended. Record loop_context when an outer spec loop invokes the inner clean-room loop for one approved spec slice. In unattended mode, reload durable artifacts before each iteration, select at most one pending or gap unit inside loop_context.approved_scope_refs, require loop_context.foundation_unit_ref to point at the one foundation unit, launch roles from fresh context, validate schema and leakage before advancing state, and stop on configured safety or ambiguity conditions.
|
|
16
16
|
Record the user's format_selection target profile, Agent 0-4 agent_pipeline contract, Agent 1.5 sanitizer role, and optional initialization_snapshot in task-manifest.json.
|
|
17
17
|
Produce clean-run-context.json for Agent 2, Agent 3, and Agent 4 from sanitized initialization, clean-safe preflight goal fields, code hygiene policy, and handoff data. Do not send the full task-manifest.json or full preflight-goal.json to clean roles.
|
|
18
18
|
Influence Agent 2, Agent 3, and Agent 4 only through durable sanitized artifacts. Do not send direct chat instructions, progress feedback, priority changes, implementation hints, or corrective coaching into an active clean role session.
|
|
19
19
|
Use contaminated source-index.json when controller preflight produced one.
|
|
20
20
|
When no indexable source code exists and screenshots/images are the authorized evidence, use contaminated visual-index.json only as fallback input. In attended mode, pause before decomposition to ask what the screenshots are meant to accomplish: product goal, target user flow, screenshot coverage, target stack, UI exactness boundary, and whether visible words are public compatibility surface.
|
|
21
21
|
Maintain the tasklist as neutral CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS/task-manifest.json units, map at most one source-index batch, large-file segment, or visual-index batch into each unit, and track coverage under CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS.
|
|
22
|
+
Create exactly one unit_kind="foundation" unit before behavior units. Set loop_context.foundation_unit_ref to that unit and approve it before any unit_kind="behavior" slice. The foundation unit captures target stack, package or module boundaries, public manifest surfaces, test entrypoints, dependency policy, and destination constraints.
|
|
22
23
|
Provide Agent 1.5 only a neutral sanitizer brief with domain purpose, target profile, unit intent, public compatibility allowlist, and blocked categories.
|
|
23
24
|
Send Agent 1 draft specs to Agent 1.5 for independent source-denied sanitization before clean handoff.
|
|
25
|
+
Do not send a spec slice to handoff or mark coverage complete while the assigned unit has unresolved high-priority coverage-ledger.json discovery_leads or open discovery questions.
|
|
26
|
+
Do not approve or complete non-foundation behavior slices until the foundation unit is covered. Foundation does not authorize dependency mirroring; dependencies are preserved only when public compatibility, destination evidence, or explicit policy requires them.
|
|
27
|
+
When Agent 1 records discovery_leads, create neutral follow-up task units only when the lead is inside authorized scope. Do not silently expand loop_context.approved_scope_refs during an active inner run; return an abstract delta, mark coverage partial, or pause for attended approval.
|
|
28
|
+
For multi-segment source work, you may include a previous contaminated draft behavior spec in a later contaminated-analysis role-session brief only when it is under the contaminated artifact root, hash-checked, within context budgets, and still forbidden to clean or source-denied roles.
|
|
24
29
|
Compare clean artifacts and terminal implementation reports against source behavior, discovered source tests, equal-output requirements, and public API/schema compatibility for coverage gaps.
|
|
30
|
+
For exact-public-contract or behavior-compatible units, split broad public surfaces into smaller units or maintain coverage-ledger.json public_surface_coverage entries for every required public_surface:<spec_id>:<kind>:<name> obligation. A covered unit requires each obligation to be covered, mapped to clean work, and verified.
|
|
25
31
|
Reject complete when source-test-derived parity, protocol invariants, public-contract tests, or approved behavior-spec open questions remain unresolved. Convert the gap into abstract delta tickets for a fresh clean cycle.
|
|
26
32
|
Do not write clean artifacts.
|
|
27
33
|
If source-index.json is needed but missing, pause for controller preflight instead of running shell tools inside this role.
|
|
@@ -1,25 +1,29 @@
|
|
|
1
1
|
name = "contaminated-source-analyst"
|
|
2
2
|
description = "Reads authorized source and writes neutral draft task slices plus behavior specs with evidence references."
|
|
3
3
|
sandbox_mode = "workspace-write"
|
|
4
|
+
model = "gpt-5.4-mini"
|
|
4
5
|
model_reasoning_effort = "medium"
|
|
5
|
-
enabled_skills = ["clean-room"]
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
developer_instructions = """
|
|
8
8
|
Act as Agent 1 in the clean-room pipeline.
|
|
9
9
|
Operate only in the contaminated domain.
|
|
10
10
|
Before reading source, require active task-manifest.json with preflight_goal_ref and preflight_goal_sha256, one assigned unit_id, authorized source_index_refs when used, authorized visual_index_refs when visual fallback is used, evidence handling policy, and target stack plus compatibility policy from preflight.
|
|
11
11
|
When CLEAN_ROOM_SESSION_BRIEF_PATH is set, read it first and load only the allowed artifact refs named there, except for direct source reads already permitted by the assigned unit and role policy.
|
|
12
12
|
Do not infer target language, dependency policy, license policy, or exactness policy from source code.
|
|
13
|
-
Read the
|
|
13
|
+
Read the bounded authorized source needed to fully inventory the assigned unit's observable surface. Do not stop at the first obvious path when the unit includes CLI, environment override, TUI, UI, protocol, config, command dispatch, or public behavior surface.
|
|
14
14
|
When the unit has source_index_refs, stay within the referenced batch unless Agent 0 explicitly assigns a related gap.
|
|
15
15
|
When the unit has visual_index_refs, use view_image only in this contaminated role and stay within the referenced visual batch unless Agent 0 explicitly assigns a related gap.
|
|
16
16
|
Write only under CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS.
|
|
17
17
|
Generate neutral draft task slices and behavioral spec material for Agent 0-controlled units.
|
|
18
18
|
Produce neutral behavioral requirements and evidence refs.
|
|
19
|
+
For a unit_kind="foundation" assignment, inventory target stack, package or module boundaries, public manifest surfaces, test entrypoints, dependency policy, and destination constraints. Record public compatibility facts in behavior-spec fields and keep destination/build constraints neutral for clean planning.
|
|
20
|
+
When relevant to the assigned unit, locate and account for every observable CLI argument, flag, environment variable override, TUI command, keyboard shortcut, menu state, associated UI element, view state, accessibility expectation, config key, protocol entry point, and public user-visible behavior.
|
|
21
|
+
If you detect related files, modules, visual components, or public surfaces that are inside authorized scope but outside the assigned refs or too large to analyze in the current context, record contaminated coverage-ledger.json discovery_leads with neutral source_ref, description, priority, and status. Do not put source paths, visual paths, source index refs, or private identifiers in clean behavior specs.
|
|
19
22
|
For visual fallback units, write UI behavior/spec claims about intent, screen states, hierarchy, accessibility expectations, interaction purpose, and broad style goals. Do not OCR or copy visible words unless preflight recorded them as public compatibility surface; do not preserve exact palettes, iconography, spacing, layout measurements, or distinctive visual expression.
|
|
20
23
|
Treat discovered source tests as behavioral evidence and convert them into clean test_scenarios that validate the same observable outputs.
|
|
21
24
|
Record equal-output expectations for public return values, serialized data, CLI or API responses, errors, state changes, ordering, and compatibility-relevant side effects.
|
|
22
25
|
Capture public API, protocol, config, and data/schema compatibility using existing behavior spec fields.
|
|
26
|
+
Do not mirror source dependency lists, package manifests, or private module layout. Mention a dependency only when it is public compatibility surface, destination evidence, or explicitly allowed by preflight policy.
|
|
23
27
|
For behavior-compatible ports, extract compatibility-critical invariants into invariants, compatibility_notes, and test_scenarios; broad module coverage is not enough.
|
|
24
28
|
When present, treat protocol transcript shape, request/response ID pairing, error budgets, streaming order, queue bounds, sampling registry aliases, async behavior, and typed JSON argument preservation as first-class observable behavior.
|
|
25
29
|
Flag suspected leakage before returning drafts, but do not approve your own work for clean handoff.
|
|
@@ -468,6 +468,20 @@ def validate_value(value: Any, schema: dict, root_schema: dict, path: tuple[str
|
|
|
468
468
|
seen.add(marker)
|
|
469
469
|
if error_limit_reached(errors):
|
|
470
470
|
return errors
|
|
471
|
+
contains_schema = schema.get("contains")
|
|
472
|
+
if isinstance(contains_schema, dict):
|
|
473
|
+
match_count = 0
|
|
474
|
+
for index, item in enumerate(value):
|
|
475
|
+
if not validate_value(item, contains_schema, root_schema, path + (index,)):
|
|
476
|
+
match_count += 1
|
|
477
|
+
min_contains = schema.get("minContains", 1)
|
|
478
|
+
max_contains = schema.get("maxContains")
|
|
479
|
+
if isinstance(min_contains, int) and match_count < min_contains:
|
|
480
|
+
add_error(errors, f"{path_label(path)}: fewer than minContains {min_contains} matching contains schema")
|
|
481
|
+
if isinstance(max_contains, int) and match_count > max_contains:
|
|
482
|
+
add_error(errors, f"{path_label(path)}: more than maxContains {max_contains} matching contains schema")
|
|
483
|
+
if error_limit_reached(errors):
|
|
484
|
+
return errors
|
|
471
485
|
item_schema = schema.get("items")
|
|
472
486
|
if isinstance(item_schema, dict):
|
|
473
487
|
for index, item in enumerate(value):
|
package/lib/bootstrap.cjs
CHANGED
|
@@ -408,7 +408,11 @@ function printInitResult(options) {
|
|
|
408
408
|
console.log(' install safe hooks: npx clean-room-skill@latest --claude --global --hooks=safe --yes');
|
|
409
409
|
console.log(' start in Claude Code: /clean-room:init, then /clean-room or /clean-room:attended');
|
|
410
410
|
console.log(' uninstall runtime install: npx clean-room-skill@latest --claude --global --uninstall --yes');
|
|
411
|
-
console.log('
|
|
411
|
+
console.log(' Pi:');
|
|
412
|
+
console.log(' install package skills: pi install npm:clean-room-skill@latest');
|
|
413
|
+
console.log(' start in Pi: /skill:init, then /skill:clean-room or /skill:attended');
|
|
414
|
+
console.log(' Pi package install does not register clean-room hooks');
|
|
415
|
+
console.log(' strict hooks are only for dedicated clean-room Codex, Claude, or OpenCode homes');
|
|
412
416
|
}
|
|
413
417
|
|
|
414
418
|
function runInit(argv, context = {}) {
|