npm - codebyplan - Versions diffs - 1.13.53 → 1.13.55 - Mend

codebyplan 1.13.53 → 1.13.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/templates/hooks/cbp-test-hooks.sh CHANGED Viewed

@@ -537,18 +537,18 @@ if [ ! -f "$GUARD_HOOK" ]; then
   test_result "cbp-skill-context-guard.sh present" "passed" "missing"
 else
-  # Case 1: over-threshold + cbp-round-execute (heavy) → permissionDecision=deny
+  # Case 1: over-threshold + cbp-round-build (heavy) → permissionDecision=deny
   STDIN=$(jq -n \
     --arg t "$FIXTURES_GUARD/over-threshold.jsonl" \
-    --arg s "cbp-round-execute" \
+    --arg s "cbp-round-build" \
     '{transcript_path:$t,tool_input:{skill:$s}}')
   OUTPUT=$(echo "$STDIN" | CBP_CONTEXT_WARN_TOKENS=200000 bash "$GUARD_HOOK" 2>/dev/null)
   EXIT_CODE=$?
   if [ "$EXIT_CODE" = "0" ] \
      && echo "$OUTPUT" | jq -e '.hookSpecificOutput.permissionDecision == "deny"' >/dev/null 2>&1; then
-    test_result "cbp-skill-context-guard.sh over-threshold + cbp-round-execute → deny" "passed" "passed"
+    test_result "cbp-skill-context-guard.sh over-threshold + cbp-round-build → deny" "passed" "passed"
   else
-    test_result "cbp-skill-context-guard.sh over-threshold + cbp-round-execute → deny" "passed" "failed (exit=$EXIT_CODE output=$(echo "$OUTPUT" | head -c 80))"
+    test_result "cbp-skill-context-guard.sh over-threshold + cbp-round-build → deny" "passed" "failed (exit=$EXIT_CODE output=$(echo "$OUTPUT" | head -c 80))"
   fi
   # Case 2: over-threshold + cbp-clear-prep (exempt) → empty stdout, exit 0
@@ -577,17 +577,17 @@ else
     test_result "cbp-skill-context-guard.sh over-threshold + cbp-clear-continue (exempt) → empty stdout" "passed" "failed (exit=$EXIT_CODE)"
   fi
-  # Case 4: under-threshold + cbp-round-execute → empty stdout, exit 0
+  # Case 4: under-threshold + cbp-round-build → empty stdout, exit 0
   STDIN=$(jq -n \
     --arg t "$FIXTURES_GUARD/under-threshold.jsonl" \
-    --arg s "cbp-round-execute" \
+    --arg s "cbp-round-build" \
     '{transcript_path:$t,tool_input:{skill:$s}}')
   OUTPUT=$(echo "$STDIN" | CBP_CONTEXT_WARN_TOKENS=200000 bash "$GUARD_HOOK" 2>/dev/null)
   EXIT_CODE=$?
   if [ "$EXIT_CODE" = "0" ] && [ -z "$OUTPUT" ]; then
-    test_result "cbp-skill-context-guard.sh under-threshold + cbp-round-execute → empty stdout" "passed" "passed"
+    test_result "cbp-skill-context-guard.sh under-threshold + cbp-round-build → empty stdout" "passed" "passed"
   else
-    test_result "cbp-skill-context-guard.sh under-threshold + cbp-round-execute → empty stdout" "passed" "failed (exit=$EXIT_CODE)"
+    test_result "cbp-skill-context-guard.sh under-threshold + cbp-round-build → empty stdout" "passed" "failed (exit=$EXIT_CODE)"
   fi
   # Case 5: empty skill_name → empty stdout, exit 0
@@ -603,7 +603,7 @@ else
   fi
   # Case 6: missing transcript_path → empty stdout, exit 0 (fast-path)
-  STDIN=$(jq -n --arg s "cbp-round-execute" '{tool_input:{skill:$s}}')
+  STDIN=$(jq -n --arg s "cbp-round-build" '{tool_input:{skill:$s}}')
   OUTPUT=$(echo "$STDIN" | CBP_CONTEXT_WARN_TOKENS=200000 bash "$GUARD_HOOK" 2>/dev/null)
   EXIT_CODE=$?
   if [ "$EXIT_CODE" = "0" ] && [ -z "$OUTPUT" ]; then

package/templates/hooks/validate-structure-lengths.sh CHANGED Viewed

@@ -12,7 +12,7 @@ _get_limit() {
     # Documented exceptions — higher budget
     /docs/templates/*/task.md)                          echo "450 900"; return;;
     /.claude/docs/architecture/development.md)          echo "1200 2000"; return;;
-    /.claude/skills/cbp-round-start/SKILL.md)           echo "600 1000"; return;;
+    /.claude/skills/cbp-round-plan/SKILL.md)            echo "600 1000"; return;;
     /.claude/rules/development-workflow.md)             echo "250 400"; return;;
     # Unlimited files
     /CHANGELOG.md|*/CHANGELOG.md|*/user-input.md|/.claude/docs/research/*) echo ""; return;;

package/templates/hooks/validate-structure-patterns.sh CHANGED Viewed

@@ -39,7 +39,7 @@ if match_path '^/\.claude/docs/stack/' \
 fi
 # Notation consistency (warn-only): flag bare-colon command notation in .claude/ markdown
-# See: cbp-round-start Step 0 "CHK / TASK / ROUND Identifier Notation Vocabulary" — all command refs must use /cbp-* form
+# See: cbp-round-plan Step 0 "CHK / TASK / ROUND Identifier Notation Vocabulary" — all command refs must use /cbp-* form
 if match_path '^/\.claude/(rules|skills|agents)/' && match_path '\.md$'; then
   CONTENT=$(read_input_content)
   [ -z "$CONTENT" ] && [ -f "$FILE_PATH" ] && CONTENT=$(cat "$FILE_PATH" 2>/dev/null || true)

package/templates/rules/README.md CHANGED Viewed

@@ -34,7 +34,7 @@ The `install`/`update`/`uninstall` flow handles these files identically to how i
 ## Current status
-Nine rules are shipped:
+Eight rules are shipped:
 | Rule file | Scope | Summary |
 |---|---|---|
@@ -45,7 +45,6 @@ Nine rules are shipped:
 | `agent-claim-verification.md` | `org-shared` | Verify an agent's claimed outcomes against ground truth (git, filesystem, tool results) before trusting them |
 | `e2e-mandatory.md` | `org-shared` | E2E is opt-out: an eligible framework whose source changed in a round must run its specialist or record a valid skip |
 | `parallel-waves.md` | `org-shared` | Wave-dispatch contract for parallel round execution — topological ordering and per-wave testing |
-| `task-routing-recommendation.md` | `repo-only:codebyplan` | Two-family command surface (checkpoint-bound vs standalone) and identifier-format routing — installed only in codebyplan-family repos |
 | `cbp-operating-gotchas.md` | `org-shared` | Cross-repo CBP-tooling traps (ship/timeout/MCP-replace/worktree/lint-baseline/approval-reconcile) + behavioral prefs, inherited once by all consumers |
 ## Contributing a rule

package/templates/rules/agent-claim-verification.md CHANGED Viewed

@@ -10,7 +10,7 @@ paths:
 <!-- Delivery: subagents receive this rule via the [[agent-claim-verification]] pointer in each
      agent's .md file. The `paths:` frontmatter surfaces it when an agent file is being *edited*,
      not when the agent is *running* — both mechanisms are intentional, so don't drop the pointer.
-     Scope is agents-only by design (cbp-round-executor, cbp-research); skill files are out of
+     Scope is agents-only by design (cbp-round-builder, cbp-research); skill files are out of
      scope this round. -->
 Subagents routinely emit tool calls and explanatory text that depend on a named thing *existing* — a JSON config key, a schema field, an environment-variable name, an external API's request/response shape. When that name is recalled from memory instead of read from the source, it is often subtly wrong: a renamed field, a key that moved to a different file, an API shape from an older version. Those hallucinated names cost correction rounds.

package/templates/rules/context-file-loading.md CHANGED Viewed

@@ -12,20 +12,20 @@ paths:
 | Context File | Loaded By | Phase | Purpose |
 |--------------|-----------|-------|---------|
-| `context/testing/unit.md` | `cbp-round-executor` | Step 3.6 | Unit test patterns per framework |
+| `context/testing/unit.md` | `cbp-round-builder` | Step 3.6 | Unit test patterns per framework |
 | `context/testing/e2e.md` | `cbp-e2e-playwright`, `cbp-e2e-maestro`, `cbp-e2e-tauri`, `cbp-e2e-vscode`, `cbp-e2e-xcuitest` | Entry | Shared contract: Input/Output, preflight, failure classification, dispatch routing |
 | `context/testing/e2e.md` | `cbp-testing-qa-agent` | Preflight | Env var list per framework |
 | `context/testing/e2e.md` | `cbp-checkpoint-plan` | Step 4 | Discovery probe dispatch contract |
-| `context/testing/e2e.md` | `cbp-round-execute` | Step 5 | E2E specialist dispatch routing |
+| `context/testing/e2e.md` | `cbp-round-build` | Step 5 | E2E specialist dispatch routing |
 | `context/testing/e2e.md` | `cbp-checkpoint-check` | Step 5b | Whole-checkpoint e2e dispatch |
-| `context/testing/eslint.md` | `cbp-task-planner` | Phase 1.5 | ESLint Compliance Checklist |
-| `context/testing/eslint.md` | `cbp-improve-round` | Phase 1.5 | Config-file compliance audit |
-| `context/mcp-docs.md` | `cbp-task-planner` | Phase 2.6 | MCP library doc lookup contract — per-dependency consultation via DocsByPlan MCP tools (resolve_library_id → search_chunks/lookup_symbol → get_chunk) |
-| `context/mcp-docs.md` | `cbp-round-executor` | Step 3.4 | Library-specific reference — pre-write API verification via DocsByPlan MCP tools |
+| `context/testing/eslint.md` | `cbp-round-planner` | Phase 1.5 | ESLint Compliance Checklist |
+| `context/testing/eslint.md` | `cbp-verify-reviewer` | Config-File Mode | Config-file compliance audit |
+| `context/mcp-docs.md` | `cbp-round-planner` | Phase 2.6 | MCP library doc lookup contract — per-dependency consultation via DocsByPlan MCP tools (resolve_library_id → search_chunks/lookup_symbol → get_chunk) |
+| `context/mcp-docs.md` | `cbp-round-builder` | Step 3.4 | Library-specific reference — pre-write API verification via DocsByPlan MCP tools |
 | `context/architecture/arch-map-spec.md` | `cbp-map-architecture` | Entry | Canonical architecture-map artifact format — per-module frontmatter + sections, INDEX.md row format, dependency-graph format |
-| `context/architecture-map.md` | `cbp-task-planner` | Phase 3 | Architecture map consultation contract — when + how to read per-module maps before finalizing scope |
-| `context/architecture-map.md` | `cbp-round-executor` | Step 2.4 | Architecture map consultation contract — when + how to read per-module maps before editing files |
-| `rules/parallel-waves.md` | `cbp-task-planner` | Phase 5.6 | Wave schema, invariants (3..15 file-count), and the proximity-split algorithm (a `rules/` file, not `context/**`; listed here for consumer discoverability) |
+| `context/architecture-map.md` | `cbp-round-planner` | Phase 3 | Architecture map consultation contract — when + how to read per-module maps before finalizing scope |
+| `context/architecture-map.md` | `cbp-round-builder` | Step 2.4 | Architecture map consultation contract — when + how to read per-module maps before editing files |
+| `rules/parallel-waves.md` | `cbp-round-planner` | Phase 5.6 | Wave schema, invariants (3..15 file-count), and the proximity-split algorithm (a `rules/` file, not `context/**`; listed here for consumer discoverability) |
 New context files MUST be added here in the same change that introduces the consumer — or the file is orphan infrastructure.
@@ -37,7 +37,7 @@ New context files MUST be added here in the same change that introduces the cons
 ## Why Fail Loudly
-Silent fallback hides drift. A rename or deletion of `context/testing/unit.md` would let `cbp-round-executor` keep writing tests from memory, drifting from the canonical recipe. A failed agent surfaces the drift on the first invocation; silent fallback lets it compound.
+Silent fallback hides drift. A rename or deletion of `context/testing/unit.md` would let `cbp-round-builder` keep writing tests from memory, drifting from the canonical recipe. A failed agent surfaces the drift on the first invocation; silent fallback lets it compound.
 ## Path Convention

package/templates/rules/development-workflow.md ADDED Viewed

@@ -0,0 +1,73 @@
+---
+description: The full CodeByPlan development loop — session, planning, the round build/verify cycle, and checkpoint ship — as a concise skill map.
+paths:
+  - ".claude/skills/cbp-session-start/**"
+  - ".claude/skills/cbp-todo/**"
+  - ".claude/skills/cbp-round-plan/**"
+  - ".claude/skills/cbp-verify/**"
+  - ".claude/skills/cbp-finalize/**"
+---
+# Development Workflow
+The full pipeline, as a map (not a tutorial). Each arrow is an auto-trigger or a single `Next:`
+directive — never an A/B/C menu (`feedback-close-out-routing.md`). Skill names below are the
+canonical post-redesign names.
+## Session Frame
+```
+/cbp-session-start   →   ... work ...   →   /cbp-session-end
+```
+`/cbp-todo` answers "what do I work on next" inside a session.
+## Planning a Unit of Work
+```
+checkpoint-bound:  /cbp-checkpoint-create → /cbp-checkpoint-plan → /cbp-checkpoint-start
+standalone:        /cbp-standalone-task-create → /cbp-standalone-task-start
+```
+`cbp-checkpoint-create` is mechanical (checkpoint row + feat branch, zero tasks);
+`cbp-checkpoint-plan` does the deep planning (tasks as vertical slices); `cbp-checkpoint-start`
+activates + claims. Standalone work skips checkpoint shipment entirely.
+## The Round Cycle (per task)
+```
+/cbp-round-plan  →  /cbp-round-build  →  /cbp-verify (scope=round)  →  /cbp-round-complete
+       ↑                                                                      │
+       └──────────────── more work wanted on the task ───────────────────────┘
+```
+- **`/cbp-round-plan`** — round planning entry (round-1 planning + the folded-in round-input
+  deep-analysis role). Spawns `cbp-round-planner`.
+- **`/cbp-round-build`** — owns the plan-approval gate + per-wave execution. Spawns
+  `cbp-round-builder`; auto-triggers `/cbp-verify`.
+- **`/cbp-verify` (round scope)** — unified verify stage: deterministic gates +
+  real-execution proof + fresh-context review (`cbp-verify-reviewer`). Any fail → `/cbp-round-plan`
+  fix round.
+- **`/cbp-round-complete`** — the separate `ask`-tier, `disable-model-invocation` human git-add
+  finalizer. The user stages approved files; it reconciles + completes the round.
+## Closing the Task → Checkpoint
+On the **last clean round** of a task, verify escalates to task scope and finalizes:
+```
+/cbp-verify (scope=task)  →  /cbp-finalize  →  /cbp-checkpoint-check  →  /cbp-checkpoint-end
+```
+- **`/cbp-verify` (scope=task)** — whole-repo `codebyplan check --scope task`, holistic reviewer,
+  one batched human walkthrough, writes `task.context.verify_verdict={verdict:'READY'}`.
+- **`/cbp-finalize`** — task-level ship finalizer.
+- **`/cbp-checkpoint-check`** then **`/cbp-checkpoint-end`** — checkpoint re-evaluation +
+  shipment (hardcore CI tier, `rules/two-tier-ci.md`).
+## Cross-References
+- `rules/two-tier-ci.md` — soft (round/task) vs hardcore (checkpoint) gate strictness.
+- `rules/execution-proof.md` — the committed-artifact obligation at every verify.
+- `rules/spawn-failure-is-gate-failure.md` — agent spawn failure is a STOP, not a self-grade.
+- `rules/task-routing-recommendation.md` — checkpoint-bound vs standalone command families.

package/templates/rules/e2e-mandatory.md CHANGED Viewed

@@ -14,7 +14,7 @@ A framework is **eligible** in a round when ALL hold:
 - The framework's `app` source path intersects the round's `files_changed` (repo root for
   single-app repos).
-When eligible, `/cbp-round-execute` Step 5 spawns the matching specialist in parallel with
+When eligible, `/cbp-round-build` Step 5 spawns the matching specialist in parallel with
 `cbp-testing-qa-agent`; `/cbp-checkpoint-check` Step 5b does the same against the aggregated
 file union with `whole_checkpoint_mode: true`.
@@ -28,8 +28,8 @@ no hard-fail fires.
 ## `e2e_eligible_skipped` Hard-Fail
 If a framework was eligible this round but no specialist ran AND no valid skip reason is
-recorded, the round **hard-fails** and `/cbp-round-execute` Step 6 auto-triggers
-`/cbp-round-input`. Silent skips are bugs, not conveniences — this is the enforcement behind
+recorded, the round **hard-fails** and `/cbp-round-build` Step 6 auto-triggers
+`/cbp-round-plan`. Silent skips are bugs, not conveniences — this is the enforcement behind
 the opt-out contract.
 **Valid skip reasons** (must be recorded in `round.context.e2e_outputs[framework]` or the
@@ -54,8 +54,8 @@ check. Pre-flight (`context/testing/e2e.md` Step 6.5.1) is the only mechanism fo
 env-conditional skipping.
 A spec that ran with `passed === 0 && skipped > 0` for any path touching `files_changed` is a
-**hard fail**, not a pass — `cbp-task-check` (`agents/cbp-task-check.md`) refuses a READY
-verdict on a zero-assertion e2e run and routes to a fix round per this rule.
+**hard fail**, not a pass — `cbp-verify-reviewer` (`agents/cbp-verify-reviewer.md`) refuses a
+READY verdict on a zero-assertion e2e run and routes to a fix round per this rule.
 ## Committed-Screenshot Enforcement
@@ -64,7 +64,7 @@ path it touched is a defect — not a valid pass. Every framework must write at
 PNG to its committed dir (per the table in `context/testing/e2e.md` § Committed-Screenshot
 Mandate) and `git add` it before reporting `status: 'completed'`.
-`cbp-task-check` refuses a READY verdict when `e2e_gallery[]` is empty AND the round
+`cbp-verify-reviewer` refuses a READY verdict when `e2e_gallery[]` is empty AND the round
 touched UI source paths for an eligible framework — sole exception: `vscode-test`-only
 rounds (SD-3, behavior-only extensions; see below). The fix path is the same as for a
 zero-assertion run: open a fix round that captures the missing committed screenshots.
@@ -77,7 +77,7 @@ has no visual output (behavior-only tests). Agents must still define the dir and
 - `context/testing/e2e.md` — Input/Output contract, pre-flight loop, failure classification,
   committed-screenshot mandate, auto-new/gated-changed model, and dispatch routing table.
-- `agents/cbp-task-check.md` — enforces the zero-assertion hard-fail and the empty
+- `agents/cbp-verify-reviewer.md` — enforces the zero-assertion hard-fail and the empty
   `e2e_gallery[]` hard-fail at verdict time.
-- `skills/cbp-round-execute/SKILL.md` Step 5/6, `skills/cbp-checkpoint-check/SKILL.md` Step 5b
+- `skills/cbp-round-build/SKILL.md` Step 5/6, `skills/cbp-checkpoint-check/SKILL.md` Step 5b
   — the config-driven dispatch and `e2e_eligible_skipped` gate implementations.

package/templates/rules/execution-proof.md ADDED Viewed

@@ -0,0 +1,70 @@
+---
+description: Real execution proof is a non-skippable verify obligation — tiered by what the round touched, every tier producing a COMMITTED artifact, never prose.
+paths:
+  - ".claude/skills/cbp-verify/**"
+  - ".claude/skills/cbp-round-build/**"
+  - ".claude/agents/cbp-verify-reviewer.md"
+  - ".claude/agents/cbp-e2e-playwright.md"
+  - ".claude/agents/cbp-e2e-maestro.md"
+  - ".claude/agents/cbp-e2e-tauri.md"
+  - ".claude/agents/cbp-e2e-vscode.md"
+  - ".claude/agents/cbp-e2e-xcuitest.md"
+---
+# Execution Proof
+"I verified the build" is not proof. Proof is a **committed artifact** that an auditor can
+re-inspect after the session ends. `cbp-verify` Phase 3 produces it; a passing verdict without
+it is invalid. The required artifact is **tiered by what the round's diff actually touched** —
+the tier is chosen from `files_changed`, not from a `has_ui_work` guess.
+## Tiers
+| Tier | Round touched | Proof obligation | Asserted by |
+|------|---------------|------------------|-------------|
+| **1** | A configured e2e framework's `app` source (`.codebyplan/e2e.json`) | `cbp-e2e-*` specialist runs the app and **commits screenshots** to the framework's committed dir | `codebyplan e2e verify-round` (non-empty gallery + non-zero assertions) |
+| **2** | UI source, but NO e2e framework configured for that app | **MANDATORY** dev-server run + at least one committed route screenshot or HTTP response trace for each changed route | manifest `artifacts[]` + `git ls-files --error-unmatch` |
+| **3** | Backend / API only (route handlers, server actions, endpoints) | Hit each changed endpoint; record an HTTP status trace (method, path, status, ms) committed to the round artifact dir | manifest `artifacts[]` |
+| **4** | `claude_only` / docs / config only (no app surface) | Proof IS the build/test commands — `codebyplan check --scope round\|task` (+ `bash -n` for touched hooks); profile-valid, no screenshot | manifest `gates[]` |
+A round can hit multiple tiers; satisfy each tier its diff touches.
+## Hard Rules
+- **Empty proof on a UI-touching diff is a GATE FAILURE.** A round whose `files_changed`
+  includes UI source but whose manifest carries zero committed screenshots/traces fails verify —
+  route to a fix round that captures the missing artifact. (Mirrors `e2e-mandatory.md`
+  Committed-Screenshot Enforcement; sole exception: `vscode-test`-only behavior rounds.)
+- **Screenshots must be committed, not `/tmp`.** Each artifact path is proven present with
+  `git ls-files --error-unmatch <path>` — an unstaged or `/tmp` file is not proof.
+- **Prose is never proof.** A narrative claim with no artifact path does not satisfy any tier.
+## Manifest Schema
+`cbp-verify` writes a `verify_manifest` into round/task context — the durable record of which
+gates ran and what proof exists:
+```yaml
+verify_manifest:
+  scope: round | task
+  gates:                       # deterministic gate results
+    - name: gate6 | lint | typecheck | tests | audit
+      exit_code: number
+      new_failures: string[]   # post-baseline-diff; [] = pass
+  proof:
+    tier: 1 | 2 | 3 | 4
+    artifacts:                 # committed proof, one per affected surface
+      - kind: screenshot | http_trace | command_log
+        path: string           # repo-relative; verified via git ls-files --error-unmatch
+        affected: string       # route / endpoint / file this proves
+    e2e_verify_round:          # present for Tier 1
+      pass: boolean
+      failed_checks: string[]  # e2e_eligible_skipped | zero_assertion_run | empty_gallery
+  decided_at: ISO8601
+```
+## Cross-References
+- `rules/e2e-mandatory.md` — Tier 1 opt-out contract + committed-screenshot mandate.
+- `rules/two-tier-ci.md` — how proof feeds the soft (round/task) vs hardcore (checkpoint) tiers.
+- `skills/cbp-verify/reference/deterministic-gates.md` — the gate command contracts + manifest write.

package/templates/rules/model-invocation-convention.md CHANGED Viewed

@@ -7,8 +7,8 @@ a skill is strictly user-only (i.e. it must never auto-trigger from another skil
 The absence of `disable-model-invocation` (or `disable-model-invocation: false`) is the normal
 state. It allows the skill to be auto-triggered via the Skill tool from within other skills —
-which is how the auto-trigger close-out flow works (e.g. `cbp-task-check` → `cbp-task-testing`,
-`cbp-task-testing` → `cbp-task-complete`).
+which is how the auto-trigger close-out flow works (e.g. `cbp-round-build` → `cbp-verify`,
+`cbp-verify` task scope → `cbp-finalize`).
 ## The sole exception: `cbp-round-complete`

package/templates/rules/parallel-waves.md CHANGED Viewed

@@ -1,24 +1,24 @@
 ---
 name: parallel-waves
-description: Wave schema, invariants, and proximity-split algorithm for cbp-task-planner Phase 5.6 wave decomposition.
+description: Wave schema, invariants, and proximity-split algorithm for cbp-round-planner Phase 5.6 wave decomposition.
 paths:
-  - .claude/agents/cbp-task-planner.md
+  - .claude/agents/cbp-round-planner.md
 ---
 # Parallel Waves
-Authoritative expansion of `cbp-task-planner` Phase 5.6. The planner reads this file at wave decomposition time.
+Authoritative expansion of `cbp-round-planner` Phase 5.6. The planner reads this file at wave decomposition time.
 ## Wave Schema
-Each entry in `plan.waves[]` carries these fields (source: `.claude/agents/cbp-task-planner.md` Phase 5.6 "Output" block):
+Each entry in `plan.waves[]` carries these fields (source: `.claude/agents/cbp-round-planner.md` Phase 5.6 "Output" block):
 ```yaml
 - name: string               # short identifier, e.g. "web-ui", "backend", "db"
-  agent_type: 'round-executor' | 'inline'
+  agent_type: 'round-builder' | 'inline'
   files: string[]            # repo-relative paths owned by this wave
   depends_on: string[]       # names of waves that must complete before this one starts
-  skill_preloads: string[]   # skills invoked by the executor before Step 3 (e.g. "frontend-design")
+  skill_preloads: string[]   # skills invoked by the builder before Step 3 (e.g. "frontend-design")
   note: string               # optional — required on continuation waves from an arbitrary-boundary split
 ```
@@ -31,9 +31,9 @@ Each entry in `plan.waves[]` carries these fields (source: `.claude/agents/cbp-t
 **(III) 3–15 files per wave** — every wave holds between 3 and 15 files (inclusive).
   - Below 3: merge into a sibling wave.
   - Above 15: apply the proximity-split algorithm below.
-  - Sole exception — trivially small plans are exempt from the lower bound: a plan with fewer than 3 total files uses one single wave, and a single-app plan with ≤5 total files MAY skip decomposition entirely (one wave, or `waves[]` omitted — see `cbp-task-planner` Phase 5.6). Zero waves (omitted `waves[]`) trivially satisfies this invariant.
+  - Sole exception — trivially small plans are exempt from the lower bound: a plan with fewer than 3 total files uses one single wave, and a single-app plan with ≤5 total files MAY skip decomposition entirely (one wave, or `waves[]` omitted — see `cbp-round-planner` Phase 5.6). Zero waves (omitted `waves[]`) trivially satisfies this invariant.
-**(IV) UI skill preloads** — for each wave whose `files[]` contains UI-bearing paths (`*.tsx`, `*.jsx`, `*.scss`, etc.), add `"frontend-design"` to `skill_preloads[]` (source: `.claude/agents/cbp-task-planner.md` Phase 5.6 step "Populate `skill_preloads[]`").
+**(IV) UI skill preloads** — for each wave whose `files[]` contains UI-bearing paths (`*.tsx`, `*.jsx`, `*.scss`, etc.), add `"frontend-design"` to `skill_preloads[]` (source: `.claude/agents/cbp-round-planner.md` Phase 5.6 step "Populate `skill_preloads[]`").
 ## Proximity-Split Algorithm
@@ -57,7 +57,7 @@ Invariants I (disjoint files), II (acyclic `depends_on` DAG), and III (3–15 fi
 ## Cross-References
-- `agents/cbp-task-planner.md` Phase 5.6 — consumer of this rule; steps 1–6 and the `validate-waves` verification call.
+- `agents/cbp-round-planner.md` Phase 5.6 — consumer of this rule; steps 1–6 and the `validate-waves` verification call.
 - `packages/codebyplan-package/src/lib/validate-waves.ts` — deterministic enforcement of invariants I–III.
-- `agents/cbp-round-executor.md` Step 2.6 — wave-mode skill preloads.
-- `skills/cbp-round-execute/SKILL.md` Step 3 — per-wave executor dispatch.
+- `agents/cbp-round-builder.md` Step 2.6 — wave-mode skill preloads.
+- `skills/cbp-round-build/SKILL.md` Step 3 — per-wave builder dispatch.

package/templates/rules/spawn-failure-is-gate-failure.md ADDED Viewed

@@ -0,0 +1,76 @@
+---
+description: A subagent spawn failure is a HARD GATE FAILURE — STOP and retry, never walk the agent's steps inline and self-certify.
+paths:
+  - ".claude/skills/cbp-verify/**"
+  - ".claude/skills/cbp-round-build/**"
+  - ".claude/skills/cbp-finalize/**"
+  - ".claude/agents/cbp-verify-reviewer.md"
+  - ".claude/agents/cbp-round-builder.md"
+---
+# Spawn Failure Is Gate Failure
+When a verify/execution stage delegates work to a subagent (e.g. `cbp-verify` spawning
+`cbp-verify-reviewer`, `cbp-round-build` spawning `cbp-round-builder`), the agent is the
+**fresh-context oracle**. If the agent cannot run, the orchestrator does NOT have an
+equivalent signal — and it must NEVER manufacture one.
+## The Rule
+A **spawn failure** — the agent could not run, or died on a terminal error before producing
+its output contract — is a **HARD GATE FAILURE**. The orchestrator STOPS and surfaces a retry
+directive. It does NOT walk the agent's phase checklist inline with its own tools and grade its
+own work. Self-certification by the orchestrator that spawned the agent is precisely the
+fresh-context blind spot the agent exists to remove; reproducing the agent's steps inline
+re-introduces it.
+Spawn-failure classes (non-exhaustive): provider 5xx, rate-limit / monthly-cap / billing block,
+context overflow at spawn, the agent process dying before emitting its output contract.
+**Retry directive shape** (surface verbatim, then STOP):
+```
+## Verify blocked — reviewer could not spawn
+The fresh-context reviewer (<agent>) failed to spawn: <class> — <verbatim error>.
+This is a hard gate failure, not a pass. Retry when capacity returns:
+  Next: /cbp-verify
+```
+Record `<scope>.context.verify.spawn_failure = { agent, class, error_message, decided_at }` so
+the retry is auditable and a verdict is never written on a missing review.
+## Spawn-Failed vs Spawn-Ran-And-Found-Problems
+These are different outcomes with opposite routes — do not conflate them:
+| Outcome | Meaning | Route |
+|---------|---------|-------|
+| **Spawn failed** | Agent never produced its output contract (terminal error). | HARD GATE FAILURE → STOP + retry directive. No verdict written. |
+| **Spawn ran, found problems** | Agent returned findings / `NOT_READY`. | Normal flow → in-scope mechanical fix or `/cbp-round-plan` fix round. |
+A returned `NOT_READY` is a *successful* review with a negative verdict — it is acted on, not
+retried. Only the absence of a contract is a spawn failure.
+## Carve-Out: The `claude_only` Profile Is Not Inline Fallback
+The `claude_only` profile (rounds with no app surface — `.claude/`-only edits, docs, config)
+has **no agent to spawn by design**. Its proof IS the deterministic command set:
+`codebyplan check --scope round|task` plus `bash -n <hook>` for any touched shell file. Running
+those inline is a **first-class deterministic verification path**, not a banned inline fallback —
+there was never a subagent to substitute for. This carve-out applies ONLY when the resolved
+profile is `claude_only`; for every other profile an agent is expected, and its spawn failure is
+a hard gate failure per above.
+## Why (Replaces Inline-Fallback Self-Certification)
+The retired `inline-fallback.md` procedures let an orchestrator that just failed to spawn an
+agent walk that agent's steps and pass its own work. That defeats the entire point of a
+fresh-context review and silently downgraded quality under sustained outages. This rule replaces
+those procedures: a missing review is a STOP, not a self-graded continue.
+## Cross-References
+- `skills/cbp-verify/SKILL.md` Phase 4 — the reviewer spawn + this hard-fail.
+- `agents/cbp-verify-reviewer.md` — the reviewer whose absence triggers this rule.
+- `rules/execution-proof.md` — the proof obligation a passing verdict still requires.

package/templates/rules/task-routing-recommendation.md CHANGED Viewed

@@ -12,7 +12,7 @@ CodeByPlan has two families of task commands since CHK-141:
 | Family | Commands | When to use |
 |--------|----------|-------------|
-| Checkpoint-bound | `/cbp-task-create`, `/cbp-task-start {chk}-{task}`, `/cbp-task-check`, `/cbp-task-testing`, `/cbp-task-complete` | Work that belongs to a CHK-NNN checkpoint |
+| Checkpoint-bound | `/cbp-task-create`, `/cbp-task-start {chk}-{task}`, `/cbp-verify`, `/cbp-finalize` | Work that belongs to a CHK-NNN checkpoint |
 | Standalone | `/cbp-standalone-task-create`, `/cbp-standalone-task-start {task}`, `/cbp-standalone-task-check`, `/cbp-standalone-task-testing`, `/cbp-standalone-task-complete` | Independent work not tied to any checkpoint |
 ## Round Commands (Both Families)

package/templates/rules/todo-backend.md CHANGED Viewed

@@ -62,8 +62,8 @@ The queue head (`get_todos` `rows[0]`) maps to one of these slash commands. The
 | State | Command | Required context |
 |-------|---------|------------------|
-| Round in progress | `/cbp-round-update` | `{checkpoint_id, task_id, round_id}` |
-| Round pending start | `/cbp-round-start` | `{checkpoint_id, task_id}` |
+| Round in progress | `/cbp-verify` | `{checkpoint_id, task_id, round_id}` |
+| Round pending start | `/cbp-round-plan` | `{checkpoint_id, task_id}` |
 | Task pending start | `/cbp-task-start` | `{checkpoint_id, task_id}` or `{task_id}` for standalone |
 | Checkpoint pending activation | `/cbp-checkpoint-update` | `{checkpoint_id}` |
 | Checkpoint done | `/cbp-checkpoint-check` | `{checkpoint_id}` |
@@ -118,4 +118,4 @@ CHK-111 shipped the original todos queue as Postgres triggers + a 583-LOC `regen
 4. Env vars (from `apps/todo-worker/.env.example`): `SUPABASE_URL`, `SUPABASE_SECRET_KEY` (an `sb_secret_...` key), `LOG_LEVEL`, `WORKER_POLL_MS`.
 5. Save the resulting `project_ref` to `.codebyplan.json` `shipment.surfaces.railway-todo-worker.project_ref`.
-Smoke after deploy: run `/cbp-task-complete` in any worktree → tail Railway logs → expect a `claim → apply` cycle within `WORKER_POLL_MS`.
+Smoke after deploy: run `/cbp-finalize` in any worktree → tail Railway logs → expect a `claim → apply` cycle within `WORKER_POLL_MS`.

package/templates/rules/two-tier-ci.md ADDED Viewed

@@ -0,0 +1,63 @@
+---
+description: Two CI tiers — soft (round/task → feat) is baseline-tolerant; hardcore (checkpoint → main) is whole-repo absolute green. Branch model is feat→main direct.
+paths:
+  - ".claude/skills/cbp-verify/**"
+  - ".claude/skills/cbp-checkpoint-check/**"
+  - ".claude/skills/cbp-checkpoint-end/**"
+  - ".claude/skills/cbp-ship-main/**"
+  - ".codebyplan/ci.json"
+---
+# Two Tier CI
+CodeByPlan gates work at two strictness tiers. The tier is chosen by **what is being
+promoted**, not by preference.
+## Soft Tier — round / task → feat branch
+Runs at every `cbp-verify` (round scope) and the task-scope escalation. **Baseline-tolerant**:
+pre-existing red is non-blocking; only NEW per-package failures fail.
+- `codebyplan check --scope round|task` (NO `--no-baseline`). Each baselined check
+  (`lint` / `typecheck` / `tests` / `audit`) fails ONLY when its `new_failures[]` is non-empty
+  vs `.check-baseline.json`. `gate6` (sibling-identity parity) is **always hard** — never
+  baselined.
+- `codebyplan e2e verify-round --round-id <id> --task-id <id>` per round (Tier-1 e2e proof).
+- Fresh-context review via `cbp-verify-reviewer` (its spawn failure is a hard gate failure —
+  `rules/spawn-failure-is-gate-failure.md`).
+The soft tier keeps the inner loop fast: a feat branch may carry the repo's known baseline red
+forward without blocking, while guaranteeing the work being added is itself clean.
+## Hardcore Tier — checkpoint → main
+Runs at checkpoint close (`cbp-checkpoint-check` / `cbp-checkpoint-end` / ship). **Zero baseline
+forgiveness — whole-repo absolute green.**
+- `codebyplan check --scope merged --no-baseline` = every failing package and every GHSA id
+  counts; any red fails. (`gate6` unchanged — still always hard.)
+- Aggregate e2e proof across the whole checkpoint diff.
+- Every required `main` branch-protection PR check is green (repo-specific — read the repo's
+  configured required checks, never assume a single hardcoded check name).
+## Critical Constraint — feat→main DIRECT, main-only
+The branch model is **feat→main direct**; `.codebyplan/git.json` has `integration: null`,
+`production: "main"`. There is **NO intermediate integration branch** — the "checkpoint branch"
+IS the per-checkpoint feat branch. The hardcore tier runs against that feat branch's merged
+state before it lands on main; do not assume a staging/integration hop exists.
+## Report-Only Rollout
+The whole-repo hardcore CI **job** lands **report-only first** (`continue-on-error: true`) and is
+flipped to a required check ONLY after the `apps/web` baseline is burned down. Until then,
+`--scope merged --no-baseline` is advisory in CI — surfaced, not enforced — so a pre-existing
+`apps/web` red does not block a merge while the baseline is still being paid down. Locally,
+`cbp-verify` still runs and reports it.
+## Cross-References
+- `rules/execution-proof.md` — the committed-artifact obligation feeding both tiers.
+- `rules/spawn-failure-is-gate-failure.md` — fresh-context review is non-substitutable.
+- `skills/cbp-verify/reference/deterministic-gates.md` — exact gate commands + JSON contracts.
+- `.codebyplan/git.json` — authoritative branch model (`integration: null`, `production: main`).

package/templates/settings.project.base.json CHANGED Viewed

@@ -56,9 +56,9 @@
       "Skill(cbp-checkpoint-check)",
       "Skill(cbp-checkpoint-complete)",
       "Skill(cbp-round-complete)",
-      "Skill(cbp-round-execute)",
+      "Skill(cbp-round-build)",
       "Skill(cbp-session-end)",
-      "Skill(cbp-task-complete)",
+      "Skill(cbp-finalize)",
       "Skill(cbp-standalone-task-create)",
       "Skill(cbp-standalone-task-start)",
       "Skill(cbp-standalone-task-complete)",
@@ -126,12 +126,7 @@
       "Skill(cbp-map-architecture)",
       "Skill(cbp-merge-main)",
       "Skill(cbp-refresh-arch-map)",
-      "Skill(cbp-refresh-infra)",
-      "Skill(cbp-round-check)",
-      "Skill(cbp-round-end)",
-      "Skill(cbp-round-input)",
-      "Skill(cbp-round-start)",
-      "Skill(cbp-round-update)",
+      "Skill(cbp-round-plan)",
       "Skill(cbp-session-start)",
       "Skill(cbp-setup-cd)",
       "Skill(cbp-setup-ci)",
@@ -144,11 +139,10 @@
       "Skill(cbp-supabase-branch-check)",
       "Skill(cbp-supabase-migrate)",
       "Skill(cbp-supabase-setup)",
-      "Skill(cbp-task-check)",
       "Skill(cbp-task-create)",
       "Skill(cbp-task-start)",
-      "Skill(cbp-task-testing)",
       "Skill(cbp-todo)",
+      "Skill(cbp-verify)",
       "Skill(supabase)",
       "Skill(supabase-postgres-best-practices)",
       "mcp__codebyplan__get_checkpoints",
@@ -214,6 +208,8 @@
       "Bash(npx codebyplan ports:*)",
       "Bash(codebyplan tech-stack:*)",
       "Bash(npx codebyplan tech-stack:*)",
+      "Bash(codebyplan docs:*)",
+      "Bash(npx codebyplan docs:*)",
       "Bash(codebyplan eslint:*)",
       "Bash(npx codebyplan eslint:*)",
       "Bash(codebyplan lsp:*)",
@@ -228,6 +224,8 @@
       "Bash(npx codebyplan checkpoint:*)",
       "Bash(codebyplan task:*)",
       "Bash(npx codebyplan task:*)",
+      "Bash(codebyplan standalone-task:*)",
+      "Bash(npx codebyplan standalone-task:*)",
       "Bash(codebyplan session:*)",
       "Bash(npx codebyplan session:*)",
       "Bash(codebyplan help:*)",