npm - clean-room-skill - Versions diffs - 0.1.11 → 0.1.13 - Mend

clean-room-skill 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/.codex-plugin/plugin.json +1 -1
package/README.md +35 -8
package/agents/clean-architect.md +7 -1
package/agents/clean-implementer-verifier-shell.md +4 -0
package/agents/clean-polish-reviewer.md +3 -0
package/agents/clean-qa-editor.md +4 -0
package/agents/contaminated-handoff-sanitizer.md +3 -0
package/agents/contaminated-manager-verifier.md +10 -1
package/agents/contaminated-source-analyst.md +8 -1
package/bin/install.js +11 -1621
package/docs/ARCHITECTURE.md +7 -1
package/docs/HOOKS.md +14 -10
package/docs/REFERENCE.md +31 -6
package/examples/codex/.codex/agents/clean-architect.toml +7 -5
package/examples/codex/.codex/agents/clean-polish-reviewer.toml +2 -2
package/examples/codex/.codex/agents/clean-qa-editor.toml +3 -2
package/examples/codex/.codex/agents/contaminated-handoff-sanitizer.toml +2 -2
package/examples/codex/.codex/agents/contaminated-manager-verifier.toml +10 -4
package/examples/codex/.codex/agents/contaminated-source-analyst.toml +7 -3
package/hooks/validate-json-schema.py +14 -0
package/lib/bootstrap.cjs +5 -1
package/lib/doctor.cjs +157 -5
package/lib/hooks.cjs +18 -0
package/lib/install-artifacts.cjs +178 -4
package/lib/install-claude-plugin.cjs +374 -0
package/lib/install-cli.cjs +99 -0
package/lib/install-operations.cjs +376 -0
package/lib/install-options.cjs +149 -0
package/lib/install-runtime-selection.cjs +180 -0
package/lib/install-status.cjs +292 -0
package/lib/install-tui.cjs +359 -0
package/lib/preflight-bootstrap.cjs +39 -0
package/lib/preflight-cli.cjs +95 -0
package/lib/preflight-constants.cjs +25 -0
package/lib/preflight-output.cjs +37 -0
package/lib/preflight-paths.cjs +67 -0
package/lib/preflight-template.cjs +103 -0
package/lib/preflight-validation.cjs +276 -0
package/lib/preflight.cjs +18 -461
package/lib/run-clean-artifacts.cjs +276 -0
package/lib/run-cli.cjs +90 -0
package/lib/run-constants.cjs +171 -0
package/lib/run-controller.cjs +247 -0
package/lib/run-coverage.cjs +350 -0
package/lib/run-hooks.cjs +96 -0
package/lib/run-manifest.cjs +111 -0
package/lib/run-progress.cjs +160 -0
package/lib/run-results.cjs +433 -0
package/lib/run-roots.cjs +230 -0
package/lib/run-stages.cjs +409 -0
package/lib/run.cjs +4 -1998
package/lib/runtime-layout.cjs +12 -5
package/package.json +8 -2
package/plugin.json +1 -1
package/skills/attended/SKILL.md +2 -0
package/skills/clean-room/SKILL.md +6 -6
package/skills/clean-room/assets/coverage-ledger.schema.json +95 -0
package/skills/clean-room/assets/task-manifest.schema.json +25 -0
package/skills/clean-room/examples/contaminated-side/task-manifest.json +14 -2
package/skills/clean-room/references/CONTROLLER-LOOP.md +5 -0
package/skills/clean-room/references/PROCESS.md +12 -4
package/skills/clean-room/references/SPEC-SCHEMA.md +11 -2
package/skills/refocus/SKILL.md +2 -0
package/skills/unattended/SKILL.md +2 -0

package/docs/ARCHITECTURE.md CHANGED Viewed

@@ -52,6 +52,7 @@ To assist in logical unit decomposition, the workflow supports an optional sourc
 *   **Execution Boundary**: This tooling runs exclusively in the contaminated domain before clean-room role sessions are initialized.
 *   **Traversal Bounds**: Source indexing enforces file count, per-file byte, total byte, batch token, and segment caps. It validates file size again after reading, skips files that change during read, records directory walk errors, and prunes traversal after global limits are exhausted with an aggregate skipped entry.
 *   **Agent 0 Use**: Agent 0 consumes `source-index.json` only to create neutral `task-manifest.json` units and per-unit `source_index_refs`. In visual fallback runs, Agent 0 consumes `visual-index.json` only to create neutral units and per-unit `visual_index_refs`. Both indexes stay contaminated-only and do not cross to Agent 1.5, Agent 2, Agent 3, Agent 4, or clean handoff packages.
+*   **Discovery Leads**: When Agent 1 detects an authorized related surface that cannot be analyzed inside the assigned unit, Agent 0 tracks it in contaminated `coverage-ledger.json` `discovery_leads`. High-priority leads must be resolved before the unit can be marked covered; the runner does not expand approved scope automatically.
 *   **Tool Trust Policy**: By default, tool discovery operates in `stat-only` mode and does not execute third-party binaries. It queries version strings only when explicitly invoked with `--probe-tools`. Tools discovered under `/opt/homebrew` or `/usr/local` remain stat-only unless `--allow-user-toolchain-probes` is also supplied. Project-local directories (such as `.bin` or `node_modules/.bin`) are ignored unless the environment variable `RE_SKILLS_TRUST_PROJECT_TOOLS=1` or the flag `--allow-working-project-tools` is supplied.
 *   **Local Tool Install Safety**: Explicit npm-backed helper installs are strict-version pinned and serialized with a cache-local lock before mutating `~/.cache/re-skills/clean-room-tools/npm`. Prefix creation failures, subprocess timeouts, and subprocess launch errors are returned as structured JSON facts instead of raw tracebacks.
@@ -177,6 +178,7 @@ The architecture delegates work across six distinct custom role agents to enforc
     *   Produces `clean-run-context.json` for Agent 2, Agent 3, and Agent 4 instead of handing over the full `task-manifest.json` or full `preflight-goal.json`.
     *   Influences Agent 2, Agent 3, and Agent 4 only through durable sanitized artifacts, never direct chat, progress feedback, implementation hints, or priority changes.
     *   Performs final verification of clean specification and implementation coverage against the source scope.
+    *   Blocks handoff or coverage completion when high-priority contaminated discovery leads remain unresolved.
     *   Writes the inner-loop `clean-room-result.json` only after contaminated-side coverage verification.
     *   Consumes Agent 3 reports only after Agent 3 reaches a terminal state, and consumes Agent 4 reports only after the configured polish review reaches a terminal state, then sends only abstract delta tickets into a fresh clean artifact cycle.
@@ -187,6 +189,8 @@ The architecture delegates work across six distinct custom role agents to enforc
     *   Analyzes the authorized source code within assigned units or batches.
     *   Uses target stack and compatibility policy from preflight instead of inferring product goals from source.
     *   Writes neutral draft behavioral specifications based on observed behavior, public contracts, invariants, state transitions, and errors.
+    *   Inventories the assigned unit's observable CLI, env, TUI, UI, protocol, config, command, and public behavior surfaces when relevant.
+    *   Records authorized related surfaces that cannot be analyzed in the assigned context as contaminated `discovery_leads`, not clean spec fields.
     *   Generates evidence references pointing to contaminated ledgers instead of copying raw source code or comments.
     *   Flags suspected leakage but does not approve its own work for clean handoff.
@@ -251,10 +255,12 @@ Agent 3's terminal report is not enough to return. If configured, Agent 4 must p
 *   Locks the contaminated artifact root with `.clean-room-run.lock`.
 *   Reloads durable artifacts before each iteration.
 *   Selects at most one pending or gap unit inside `loop_context.approved_scope_refs`.
+*   Requires exactly one `unit_kind: "foundation"` unit, named by `loop_context.foundation_unit_ref`; behavior units cannot run or complete until that foundation unit is covered.
 *   Spawns configured role commands with `shell: false`, bounded output, and bounded timeout.
 *   In strict context-management mode, requires each configured stage to provide `context.fresh_session: true` and `context.brief_path`, then validates the session brief before spawn.
 *   Supports the optional `clean-polish-review` phase between `clean-implement-qc` and `contaminated-coverage-verify`.
 *   Validates schema, leakage, and handoff integrity before advancing state.
+*   Rejects `covered` coverage-ledger units that still have unresolved high-priority `discovery_leads`.
 *   Records controller memory in contaminated-side `controller-run-ledger.json`.
 *   Writes `clean-room-result.json` before returning to the outer spec loop.
@@ -282,7 +288,7 @@ Note: Even though clean and source-denied roles (such as Agent 1.5, 2, 3, and 4)
 ## Guardrails and Hooks
-The architecture relies on agent/tool hook scaffolding located in `hooks/` to enforce boundary rules dynamically during agent sessions. Use installer-generated Codex or Claude hook configs with absolute wrapper paths. Static cwd-relative plugin hook declarations are not treated as an enforcement boundary. Use strict hooks for dedicated Codex or Claude clean-room homes; safe hooks are compatibility-only between runs and begin enforcing when init/onboarding launches role sessions with clean-room environment variables.
+The architecture relies on agent/tool hook scaffolding located in `hooks/` to enforce boundary rules dynamically during agent sessions. Use installer-generated Codex or Claude hook configs with absolute wrapper paths, or the generated OpenCode local plugin bridge. Static cwd-relative plugin hook declarations are not treated as an enforcement boundary. Use strict hooks for dedicated Codex, Claude, or OpenCode clean-room homes; safe hooks are compatibility-only between runs and begin enforcing when init/onboarding launches role sessions with clean-room environment variables.
 Matcher coverage depends on the host runtime emitting hook events for the tool invocation. Hosts that do not emit a pre/post tool event for a file, terminal, or resource tool are not protected by adding that tool name to the generated hook config. Run `clean-room-skill doctor --runtime codex --hooks=strict --coverage` or the Claude equivalent after install.

package/docs/HOOKS.md CHANGED Viewed

@@ -6,7 +6,7 @@ The hooks are engineering guardrails. They reduce accidental cross-domain reads
 ## Install Locations
-The installer copies the Python hook files for every supported runtime layout. Runtime hook registration is verified only for Codex and Claude Code.
+The installer copies the Python hook files for every supported runtime layout. Runtime hook registration is verified for Codex, Claude Code, and OpenCode.
 | Runtime | Hook files copied to | Active hook config |
 | --- | --- | --- |
@@ -14,7 +14,7 @@ The installer copies the Python hook files for every supported runtime layout. R
 | Claude Code | `<targetRoot>/hooks/clean-room/*.py` | `<targetRoot>/settings.json` |
 | Antigravity | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
 | Gemini CLI | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
-| OpenCode | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
+| OpenCode | `<targetRoot>/hooks/clean-room/*.py` | `<targetRoot>/plugins/clean-room.ts` |
 | Kilo | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
 | Cursor | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
 | GitHub Copilot | `<targetRoot>/hooks/clean-room/*.py` | Unsupported, copy only |
@@ -31,8 +31,8 @@ Codex uses `CODEX_HOME` or `~/.codex` for global installs. Claude Code uses `CLA
 | Mode | Behavior |
 | --- | --- |
-| `safe` | Default. Registers hooks for Codex or Claude, but `clean-room-hook.py` no-ops until a clean-room role environment is present or `CLEAN_ROOM_HOOK_ENFORCE` is truthy. |
-| `strict` | Registers hooks for Codex or Claude and fails closed even without clean-room role environment. Use only in dedicated clean-room runtime homes. |
+| `safe` | Default. Registers hooks for Codex, Claude, or OpenCode, but `clean-room-hook.py` no-ops until a clean-room role environment is present or `CLEAN_ROOM_HOOK_ENFORCE` is truthy. |
+| `strict` | Registers hooks for Codex, Claude, or OpenCode and fails closed even without clean-room role environment. Use only in dedicated clean-room runtime homes. |
 | `copy-only` | Copies hook files without modifying runtime hook config. This is also the effective behavior for runtimes without verified hook registration support. |
 `--no-hooks` is an alias for `--hooks=copy-only`.
@@ -41,6 +41,8 @@ Codex uses `CODEX_HOME` or `~/.codex` for global installs. Claude Code uses `CLA
 When hook mode is `safe` or `strict`, the installer registers four managed hook entries for Codex and Claude. Each entry invokes the installed `clean-room-hook.py` wrapper with an absolute Python path, an absolute wrapper path, the requested hook mode, and one or more `--check` scripts.
+For OpenCode, the installer writes a generated local plugin at `<targetRoot>/plugins/clean-room.ts`. OpenCode auto-loads that plugin from its config directory. The plugin subscribes to `tool.execute.before` and `tool.execute.after`, translates OpenCode tool payloads into the existing clean-room hook payload shape, and invokes the installed Python wrapper with `shell: false`. `copy-only` omits this plugin.
 | Event | Matcher | Checks |
 | --- | --- | --- |
 | `PreToolUse` | <code>Bash&#124;Shell&#124;PowerShell&#124;Monitor&#124;exec_command&#124;shell_command&#124;write_stdin</code> | `require-clean-room-env.py`, `deny-clean-room-shell.py` |
@@ -205,26 +207,28 @@ The hook policy is deny-by-default during active clean-room role sessions.
 ## Verification
-Use `doctor` after installing Codex or Claude hooks:
+Use `doctor` after installing Codex, Claude, or OpenCode hooks:
 ```bash
 clean-room-skill doctor --runtime codex --hooks=safe
 clean-room-skill doctor --runtime codex --hooks=strict
 clean-room-skill doctor --runtime codex --hooks=strict --coverage
 clean-room-skill doctor --runtime claude --hooks=strict --coverage
+clean-room-skill doctor --runtime opencode --hooks=strict --coverage
 ```
 Add `--config-dir <path>` when checking a non-default runtime config root.
 `doctor` verifies that:
-- The hook config exists.
-- Exactly four managed clean-room hook entries are present.
-- Managed commands use absolute Python and wrapper paths.
+- The hook config or OpenCode local plugin exists.
+- Exactly four managed clean-room hook entries are present for Codex and Claude.
+- Managed Codex and Claude commands use absolute Python and wrapper paths.
+- The OpenCode plugin declares `tool.execute.before`, `tool.execute.after`, an absolute wrapper path, and `shell: false`.
 - The requested safe or strict mode is configured.
 - Safe mode no-ops without clean-room environment.
 - Strict mode and enforced safe mode fail without required environment.
 - Smoke payloads fail for source reads, source writes, shell bypasses, and malformed post-write JSON.
-- `--coverage` prints matcher and check coverage for the generated entries.
+- `--coverage` prints matcher and check coverage for generated hook config entries or OpenCode plugin coverage.
-`doctor` is a smoke test. It does not prove host event coverage, legal sufficiency, or full runtime isolation.
+`doctor` is a smoke test. It does not prove host event coverage, legal sufficiency, or full runtime isolation. For OpenCode, it verifies the generated plugin bridge and Python guardrail checks, not every OpenCode tool surface.

package/docs/REFERENCE.md CHANGED Viewed

@@ -64,12 +64,12 @@ Verified:
 - Codex
 - Claude Code
+- OpenCode
 Layout-only or experimental:
 - Antigravity
 - Gemini CLI
-- OpenCode
 - Kilo
 - Cursor
 - GitHub Copilot
@@ -80,7 +80,18 @@ Layout-only or experimental:
 - Hermes Agent
 - CodeBuddy
-Layout-only installs write files to expected runtime locations, but this repository does not verify that those hosts load the files or emit all hook events needed for clean-room enforcement.
+Layout-only installs write files to expected runtime locations, but this repository does not verify that those hosts load the files or emit all hook events needed for clean-room enforcement. OpenCode installs are verified through a generated local plugin bridge at `plugins/clean-room.ts`; `doctor` verifies that bridge and the Python guardrails, not every OpenCode tool surface.
+### Pi Package Compatibility
+Pi can install this package and load the bundled skills from the package metadata:
+```bash
+pi install npm:clean-room-skill@latest
+pi install https://github.com/whit3rabbit/clean-room-skill
+```
+Pi invokes skills as `/skill:<name>`. Use `/skill:init` for the setup pass, `/skill:clean-room` for the startup wizard, `/skill:attended` for attended controller mode, and `/skill:unattended` for bounded unattended mode. Pi support is package compatibility only: it does not add a `--pi` installer target, does not participate in `--all`, and does not register clean-room hooks. Clean-room safety still depends on role separation, path isolation, schema validation, and supported hook runtimes.
 Global install roots:
@@ -103,12 +114,20 @@ Global install roots:
 Local installs use each runtime's project config directory. Antigravity local installs write `.agents/plugins/clean-room/`.
+## Agent Metadata Compatibility
+Runtime agent metadata is intentionally runtime-specific. Claude Code Markdown agents support documented `model`, `effort`, `color`, and optional `memory` frontmatter. Clean-room role agents use `model`, `effort`, and `color` only. They do not use persistent `memory`, because clean-room state must come from durable artifacts, role-session briefs, and fresh role sessions rather than runtime recall.
+Codex TOML agents support documented session config fields such as `model`, `model_reasoning_effort`, `developer_instructions`, `sandbox_mode`, `mcp_servers`, and `skills.config`. Do not copy Claude aliases such as `sonnet` or `opus`, Claude `color`, or Claude `memory` fields into Codex TOML templates.
+Codex hooks support `updatedInput`, but clean-room hook enforcement should stay fail-closed through exit status and explicit deny decisions. Do not rewrite clean-room tool calls in hooks; command mutation makes boundary behavior harder to review and test.
 ## Hook Modes And Doctor
 Hook modes:
 - `safe`: default. Copies hooks and registers a wrapper that no-ops until role sessions provide clean-room environment variables. `CLEAN_ROOM_HOOK_ENFORCE=1` remains available for explicit smoke tests.
-- `strict`: fail-closed mode for dedicated Codex or Claude clean-room homes.
+- `strict`: fail-closed mode for dedicated Codex, Claude, or OpenCode clean-room homes.
 - `copy-only`: copies hook files without runtime hook registration.
 Smoke test generated hook registration:
@@ -117,11 +136,12 @@ Smoke test generated hook registration:
 clean-room-skill doctor --runtime codex --hooks=safe
 clean-room-skill doctor --runtime codex --hooks=strict
 clean-room-skill doctor --runtime codex --hooks=strict --coverage
+clean-room-skill doctor --runtime opencode --hooks=strict --coverage
 ```
 Use `--runtime claude` for Claude Code, and add `--config-dir <path>` when testing an alternate config root.
-`doctor` checks that Codex or Claude hook config exists, contains generated clean-room hooks, uses absolute wrapper paths, uses the requested safe or strict mode, and that smoke payloads fail for missing environment, source reads, source writes, shell use, and malformed post-write JSON. Safe mode also verifies no-op behavior without clean-room env.
+`doctor` checks that Codex or Claude hook config exists, or that the OpenCode local plugin exists. It verifies generated clean-room hooks or plugin wiring, absolute wrapper paths, the requested safe or strict mode, and smoke payload failures for missing environment, source reads, source writes, shell use, and malformed post-write JSON. Safe mode also verifies no-op behavior without clean-room env.
 It does not prove legal sufficiency, full runtime hook event coverage, host-side feature enablement, or full JSON Schema conformance.
@@ -210,7 +230,11 @@ Options:
 | `--schema-dir <path>` | Override bundled schema directory. |
 | `--python <path>` | Python executable for validation hooks; default is `python3`. |
-The task manifest must already include preflight references, the required handoff sequence, unattended controller policy, finite iteration bounds, and `loop_context.approved_scope_refs`.
+The task manifest must already include preflight references, the required handoff sequence, unattended controller policy, finite iteration bounds, `loop_context.foundation_unit_ref`, and `loop_context.approved_scope_refs`.
+Unattended code-development manifests must include exactly one `unit_kind: "foundation"` unit. The runner rejects non-foundation approved slices until that unit is covered.
+`coverage-ledger.json` may record contaminated-only `source_units[].discovery_leads` for authorized related surfaces that were detected but not analyzed in the assigned unit. The runner rejects a `covered` unit while any high-priority discovery lead remains open or deferred. It does not add follow-up units or expand `loop_context.approved_scope_refs`; Agent 0 must return an abstract delta, mark coverage partial or blocked, or pause for attended approval.
 Minimal agent command adapter shape for advisory or disabled context management:
@@ -323,7 +347,8 @@ The runner exports `CLEAN_ROOM_SESSION_BRIEF_PATH`, `CLEAN_ROOM_ROLE_SESSION_ID`
 | `install lock is held` | Another install or uninstall is mutating the same target root | Wait for the other process to finish; stale locks are handled conservatively. |
 | Hook config write failed after files copied | Partial installer state | Fix the filesystem error, then re-run the same installer command. |
 | Install manifest remains `installing` | The previous install did not complete | Re-run the same installer command for that runtime and target root. |
-| `clean-room run` rejects the manifest | Invalid or incomplete unattended loop metadata | Fix `controller_policy`, `loop_context`, and `approved_scope_refs`, then retry `--dry-run`. |
+| `clean-room run` rejects the manifest | Invalid or incomplete unattended loop metadata | Fix `controller_policy`, `loop_context.foundation_unit_ref`, and `approved_scope_refs`, then retry `--dry-run`. |
+| `clean-room run` rejects a covered unit with `discovery_leads` | A high-priority contaminated discovery lead is still unresolved | Analyze the lead in an authorized follow-up unit, mark it resolved, or keep coverage partial/blocked and return an abstract delta. |
 | `clean-room run` rejects an agent command stage in strict context mode | The stage is missing `context.fresh_session: true`, missing `context.brief_path`, or points the brief outside the allowed artifact root | Fix the stage context and regenerate the role-session brief for the selected unit. |
 | `clean-room run` reports no progress | Configured stages exited without durable artifact changes | Check role command cwd/argv, selected unit, and artifact write roots. |
 | `clean-room run` reports repeated unit selection | Same unit selected after a no-progress iteration | Resolve the blocker or update durable artifacts before retrying. |

package/examples/codex/.codex/agents/clean-architect.toml CHANGED Viewed

@@ -1,10 +1,10 @@
 name = "clean-architect"
 description = "Plans clean implementation from approved clean behavior specs and the clean destination foundation."
 sandbox_mode = "workspace-write"
-model_reasoning_effort = "medium"
-enabled_skills = ["clean-room"]
+model = "gpt-5.5"
+model_reasoning_effort = "high"
-instructions = """
+developer_instructions = """
 Act as Agent 2 in the clean-room pipeline.
 Run only from the clean workspace.
 Before tool use, require CLEAN_ROOM_ROLE=clean-architect, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.
@@ -12,12 +12,12 @@ Read approved clean artifacts, CLEAN_ROOM_IMPLEMENTATION_ROOTS, and explicitly c
 Write only under CLEAN_ROOM_CLEAN_ROOTS. Do not write code.
 Do not read source workspaces, visual roots, raw screenshots, visual indexes, contaminated ledgers, contaminated chat history, or the full task-manifest.json.
 Stop if only a full task-manifest.json is provided as run context.
-Before planning, require valid clean-run-context.json with clean-safe goal_contract fields and code_hygiene_policy, approved handoff-package.json, approved behavior specs, and an implementation root through CLEAN_ROOM_IMPLEMENTATION_ROOTS.
+Before planning, require valid clean-run-context.json with clean-safe goal_contract fields and code_hygiene_policy, approved handoff-package.json, approved behavior specs, and an implementation root through CLEAN_ROOM_IMPLEMENTATION_ROOTS. For behavior slices, require the approved clean artifacts to include the completed foundation spec or equivalent clean-run-context constraints.
 When CLEAN_ROOM_SESSION_BRIEF_PATH is set, read it first and load only the allowed artifact refs named there, plus destination foundation reads permitted by this role.
 Stop if full preflight-goal.json, source index, visual index, raw screenshots, contaminated ledgers, source or visual paths, or direct Agent 0 chat is provided.
 Accept Agent 0 influence only as durable sanitized artifacts. Ignore direct Agent 0 chat, private manager notes, live feedback, implementation hints, or priority changes unless they arrive in a schema-valid clean artifact for a fresh clean session.
 Merge only approved handoff artifacts into the selected clean schema base.
-Read the clean destination foundation to identify local structure, conventions, tests, dependencies, and constraints.
+Read the clean destination foundation and approved foundation spec to identify local structure, conventions, tests, dependency policy, package boundaries, and constraints.
 Read any existing skeleton-manifest.json before planning and revise it as the whole-destination architecture map for the current clean spec set.
 Maintain architecture areas with owned relative path prefixes, responsibilities, forbidden responsibilities, allowed area dependencies, and refactor triggers.
 Assign every target and test path in implementation-plan.json to one or more skeleton-manifest.json architecture areas.
@@ -26,6 +26,8 @@ Create or update implementation-plan.json as the primary output for code-develop
 Carry the preflight-derived code hygiene policy into implementation-plan.json.
 Keep skeleton-manifest.json valid and current for code-development runs. Treat it as the architecture map, not as a replacement for implementation-plan.json.
 Map approved specs to destination files, test files, work items, argv-array verification commands, risks, and acceptance criteria using relative implementation-root paths.
+Map every exact-public-contract or behavior-compatible public surface obligation to at least one implementation-plan.json work item through public_contract_refs; do not replace a public command/API inventory with one generic dispatch work item unless every obligation ref is listed.
+Do not choose dependencies by copying source manifests. Add or preserve dependencies only when clean artifacts, destination evidence, or preflight policy justify them.
 Preserve source-test-derived scenarios as clean test obligations for equal output without copying source test structure.
 Do not resolve public-contract, callable, protocol, async, serialization, or data-shape ambiguity by narrowing semantics. Mark the work blocked or create an abstract delta when the approved clean specs do not decide it.
 Stop if contaminated material appears in clean inputs.

package/examples/codex/.codex/agents/clean-polish-reviewer.toml CHANGED Viewed

@@ -1,10 +1,10 @@
 name = "clean-polish-reviewer"
 description = "Performs final clean code polish, repo hygiene, verification review, and constrained implementation-root commit."
 sandbox_mode = "workspace-write"
+model = "gpt-5.4-mini"
 model_reasoning_effort = "high"
-enabled_skills = ["clean-room"]
-instructions = """
+developer_instructions = """
 Act as Agent 4 in the clean-room pipeline.
 Run only in the clean domain after Agent 3 has produced terminal implementation and QC reports.
 Before tool use, require CLEAN_ROOM_ROLE=clean-polish-reviewer, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.

package/examples/codex/.codex/agents/clean-qa-editor.toml CHANGED Viewed

@@ -1,10 +1,10 @@
 name = "clean-qa-editor"
 description = "Implements the clean plan, verifies clean destination code, and emits one terminal report for Agent 0."
 sandbox_mode = "workspace-write"
+model = "gpt-5.4-mini"
 model_reasoning_effort = "high"
-enabled_skills = ["clean-room"]
-instructions = """
+developer_instructions = """
 Act as Agent 3 in the clean-room pipeline.
 Run only in the clean domain.
 Before tool use, require CLEAN_ROOM_ROLE=clean-qa-editor, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.
@@ -30,6 +30,7 @@ Do not report progress, ask Agent 0 for guidance, or send partial findings while
 Record argv-array verification commands, optional clean-safe container metadata, implementation status, changed relative paths, verification results, blockers, and abstract delta tickets in CLEAN_ROOM_CLEAN_ROOTS/implementation-report.json.
 Review leakage risk and record contamination incidents.
 Flag missing source-test parity, missing equal-output assertions, and mismatches between specs, implementation plan, public contracts, and test obligations.
+Verify public-surface inventory parity item by item. Every required public_surface:<spec_id>:<kind>:<name> ref must be covered by tests, mapped to a completed work item, and represented in terminal verification; passing test counts or broad command-dispatch coverage is not enough.
 Require invariant-level tests for compatibility-critical behavior. Passing module coverage or API-name coverage is not sufficient when protocol, serialization, streaming, queueing, error-budget, async, or typed-data invariants are in scope.
 Keep CLEAN_ROOM_CLEAN_ROOTS/qc-report.json updated when the run expects it.
 Record code hygiene violations as code-hygiene findings in CLEAN_ROOM_CLEAN_ROOTS/qc-report.json.

package/examples/codex/.codex/agents/contaminated-handoff-sanitizer.toml CHANGED Viewed

@@ -1,10 +1,10 @@
 name = "contaminated-handoff-sanitizer"
 description = "Reviews Agent 1 draft specs from a fresh source-denied contaminated context and approves only scrubbed handoff artifacts."
 sandbox_mode = "workspace-write"
+model = "gpt-5.4-mini"
 model_reasoning_effort = "high"
-enabled_skills = ["clean-room"]
-instructions = """
+developer_instructions = """
 Act as Agent 1.5 in the clean-room pipeline.
 Operate in the contaminated domain, but without source access or Agent 1 source-reading chat history.
 Before tool use, require CLEAN_ROOM_ROLE=contaminated-handoff-sanitizer, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_ALLOWED_READ_ROOTS, and CLEAN_ROOM_SCHEMA_DIR.

package/examples/codex/.codex/agents/contaminated-manager-verifier.toml CHANGED Viewed

@@ -1,10 +1,10 @@
 name = "contaminated-manager-verifier"
 description = "Consumes contaminated source indexes, tracks source coverage, and emits only abstract clean-room delta tickets."
 sandbox_mode = "workspace-write"
-model_reasoning_effort = "medium"
-enabled_skills = ["clean-room"]
+model = "gpt-5.5"
+model_reasoning_effort = "high"
-instructions = """
+developer_instructions = """
 Act as Agent 0 in the clean-room pipeline.
 Operate only in the contaminated domain.
 Read authorized source and contaminated ledgers as needed.
@@ -12,16 +12,22 @@ Before source discovery or decomposition, require validated preflight-goal.json,
 Do not infer target language, dependency policy, license policy, exactness policy, output directory, or feature add/remove policy from source.
 When acting as agent zero/controller, define and pass CLEAN_ROOM_ROLE, CLEAN_ROOM_SOURCE_ROOTS, CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS, CLEAN_ROOM_CLEAN_ROOTS, CLEAN_ROOM_IMPLEMENTATION_ROOTS, CLEAN_ROOM_SCHEMA_DIR, and clean/source-denied CLEAN_ROOM_ALLOWED_READ_ROOTS into every new role session.
 When context management is enabled, maintain contaminated-side controller-status.json and create one compact role-session-brief.json per role launch. In strict mode, launch each role from a fresh model session, profile, or thread.
-Missing controller_policy means attended. Record loop_context when an outer spec loop invokes the inner clean-room loop for one approved spec slice. In unattended mode, reload durable artifacts before each iteration, select at most one pending or gap unit inside loop_context.approved_scope_refs, launch roles from fresh context, validate schema and leakage before advancing state, and stop on configured safety or ambiguity conditions.
+Missing controller_policy means attended. Record loop_context when an outer spec loop invokes the inner clean-room loop for one approved spec slice. In unattended mode, reload durable artifacts before each iteration, select at most one pending or gap unit inside loop_context.approved_scope_refs, require loop_context.foundation_unit_ref to point at the one foundation unit, launch roles from fresh context, validate schema and leakage before advancing state, and stop on configured safety or ambiguity conditions.
 Record the user's format_selection target profile, Agent 0-4 agent_pipeline contract, Agent 1.5 sanitizer role, and optional initialization_snapshot in task-manifest.json.
 Produce clean-run-context.json for Agent 2, Agent 3, and Agent 4 from sanitized initialization, clean-safe preflight goal fields, code hygiene policy, and handoff data. Do not send the full task-manifest.json or full preflight-goal.json to clean roles.
 Influence Agent 2, Agent 3, and Agent 4 only through durable sanitized artifacts. Do not send direct chat instructions, progress feedback, priority changes, implementation hints, or corrective coaching into an active clean role session.
 Use contaminated source-index.json when controller preflight produced one.
 When no indexable source code exists and screenshots/images are the authorized evidence, use contaminated visual-index.json only as fallback input. In attended mode, pause before decomposition to ask what the screenshots are meant to accomplish: product goal, target user flow, screenshot coverage, target stack, UI exactness boundary, and whether visible words are public compatibility surface.
 Maintain the tasklist as neutral CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS/task-manifest.json units, map at most one source-index batch, large-file segment, or visual-index batch into each unit, and track coverage under CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS.
+Create exactly one unit_kind="foundation" unit before behavior units. Set loop_context.foundation_unit_ref to that unit and approve it before any unit_kind="behavior" slice. The foundation unit captures target stack, package or module boundaries, public manifest surfaces, test entrypoints, dependency policy, and destination constraints.
 Provide Agent 1.5 only a neutral sanitizer brief with domain purpose, target profile, unit intent, public compatibility allowlist, and blocked categories.
 Send Agent 1 draft specs to Agent 1.5 for independent source-denied sanitization before clean handoff.
+Do not send a spec slice to handoff or mark coverage complete while the assigned unit has unresolved high-priority coverage-ledger.json discovery_leads or open discovery questions.
+Do not approve or complete non-foundation behavior slices until the foundation unit is covered. Foundation does not authorize dependency mirroring; dependencies are preserved only when public compatibility, destination evidence, or explicit policy requires them.
+When Agent 1 records discovery_leads, create neutral follow-up task units only when the lead is inside authorized scope. Do not silently expand loop_context.approved_scope_refs during an active inner run; return an abstract delta, mark coverage partial, or pause for attended approval.
+For multi-segment source work, you may include a previous contaminated draft behavior spec in a later contaminated-analysis role-session brief only when it is under the contaminated artifact root, hash-checked, within context budgets, and still forbidden to clean or source-denied roles.
 Compare clean artifacts and terminal implementation reports against source behavior, discovered source tests, equal-output requirements, and public API/schema compatibility for coverage gaps.
+For exact-public-contract or behavior-compatible units, split broad public surfaces into smaller units or maintain coverage-ledger.json public_surface_coverage entries for every required public_surface:<spec_id>:<kind>:<name> obligation. A covered unit requires each obligation to be covered, mapped to clean work, and verified.
 Reject complete when source-test-derived parity, protocol invariants, public-contract tests, or approved behavior-spec open questions remain unresolved. Convert the gap into abstract delta tickets for a fresh clean cycle.
 Do not write clean artifacts.
 If source-index.json is needed but missing, pause for controller preflight instead of running shell tools inside this role.

package/examples/codex/.codex/agents/contaminated-source-analyst.toml CHANGED Viewed

@@ -1,25 +1,29 @@
 name = "contaminated-source-analyst"
 description = "Reads authorized source and writes neutral draft task slices plus behavior specs with evidence references."
 sandbox_mode = "workspace-write"
+model = "gpt-5.4-mini"
 model_reasoning_effort = "medium"
-enabled_skills = ["clean-room"]
-instructions = """
+developer_instructions = """
 Act as Agent 1 in the clean-room pipeline.
 Operate only in the contaminated domain.
 Before reading source, require active task-manifest.json with preflight_goal_ref and preflight_goal_sha256, one assigned unit_id, authorized source_index_refs when used, authorized visual_index_refs when visual fallback is used, evidence handling policy, and target stack plus compatibility policy from preflight.
 When CLEAN_ROOM_SESSION_BRIEF_PATH is set, read it first and load only the allowed artifact refs named there, except for direct source reads already permitted by the assigned unit and role policy.
 Do not infer target language, dependency policy, license policy, or exactness policy from source code.
-Read the minimum authorized source needed for the assigned unit.
+Read the bounded authorized source needed to fully inventory the assigned unit's observable surface. Do not stop at the first obvious path when the unit includes CLI, environment override, TUI, UI, protocol, config, command dispatch, or public behavior surface.
 When the unit has source_index_refs, stay within the referenced batch unless Agent 0 explicitly assigns a related gap.
 When the unit has visual_index_refs, use view_image only in this contaminated role and stay within the referenced visual batch unless Agent 0 explicitly assigns a related gap.
 Write only under CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS.
 Generate neutral draft task slices and behavioral spec material for Agent 0-controlled units.
 Produce neutral behavioral requirements and evidence refs.
+For a unit_kind="foundation" assignment, inventory target stack, package or module boundaries, public manifest surfaces, test entrypoints, dependency policy, and destination constraints. Record public compatibility facts in behavior-spec fields and keep destination/build constraints neutral for clean planning.
+When relevant to the assigned unit, locate and account for every observable CLI argument, flag, environment variable override, TUI command, keyboard shortcut, menu state, associated UI element, view state, accessibility expectation, config key, protocol entry point, and public user-visible behavior.
+If you detect related files, modules, visual components, or public surfaces that are inside authorized scope but outside the assigned refs or too large to analyze in the current context, record contaminated coverage-ledger.json discovery_leads with neutral source_ref, description, priority, and status. Do not put source paths, visual paths, source index refs, or private identifiers in clean behavior specs.
 For visual fallback units, write UI behavior/spec claims about intent, screen states, hierarchy, accessibility expectations, interaction purpose, and broad style goals. Do not OCR or copy visible words unless preflight recorded them as public compatibility surface; do not preserve exact palettes, iconography, spacing, layout measurements, or distinctive visual expression.
 Treat discovered source tests as behavioral evidence and convert them into clean test_scenarios that validate the same observable outputs.
 Record equal-output expectations for public return values, serialized data, CLI or API responses, errors, state changes, ordering, and compatibility-relevant side effects.
 Capture public API, protocol, config, and data/schema compatibility using existing behavior spec fields.
+Do not mirror source dependency lists, package manifests, or private module layout. Mention a dependency only when it is public compatibility surface, destination evidence, or explicitly allowed by preflight policy.
 For behavior-compatible ports, extract compatibility-critical invariants into invariants, compatibility_notes, and test_scenarios; broad module coverage is not enough.
 When present, treat protocol transcript shape, request/response ID pairing, error budgets, streaming order, queue bounds, sampling registry aliases, async behavior, and typed JSON argument preservation as first-class observable behavior.
 Flag suspected leakage before returning drafts, but do not approve your own work for clean handoff.

package/hooks/validate-json-schema.py CHANGED Viewed

@@ -468,6 +468,20 @@ def validate_value(value: Any, schema: dict, root_schema: dict, path: tuple[str
                 seen.add(marker)
             if error_limit_reached(errors):
                 return errors
+        contains_schema = schema.get("contains")
+        if isinstance(contains_schema, dict):
+            match_count = 0
+            for index, item in enumerate(value):
+                if not validate_value(item, contains_schema, root_schema, path + (index,)):
+                    match_count += 1
+            min_contains = schema.get("minContains", 1)
+            max_contains = schema.get("maxContains")
+            if isinstance(min_contains, int) and match_count < min_contains:
+                add_error(errors, f"{path_label(path)}: fewer than minContains {min_contains} matching contains schema")
+            if isinstance(max_contains, int) and match_count > max_contains:
+                add_error(errors, f"{path_label(path)}: more than maxContains {max_contains} matching contains schema")
+            if error_limit_reached(errors):
+                return errors
         item_schema = schema.get("items")
         if isinstance(item_schema, dict):
             for index, item in enumerate(value):

package/lib/bootstrap.cjs CHANGED Viewed

@@ -408,7 +408,11 @@ function printInitResult(options) {
   console.log('    install safe hooks: npx clean-room-skill@latest --claude --global --hooks=safe --yes');
   console.log('    start in Claude Code: /clean-room:init, then /clean-room or /clean-room:attended');
   console.log('    uninstall runtime install: npx clean-room-skill@latest --claude --global --uninstall --yes');
-  console.log('  strict hooks are only for dedicated clean-room Codex or Claude homes');
+  console.log('  Pi:');
+  console.log('    install package skills: pi install npm:clean-room-skill@latest');
+  console.log('    start in Pi: /skill:init, then /skill:clean-room or /skill:attended');
+  console.log('    Pi package install does not register clean-room hooks');
+  console.log('  strict hooks are only for dedicated clean-room Codex, Claude, or OpenCode homes');
 }
 function runInit(argv, context = {}) {