create-claude-cabinet 0.30.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cli.js +18 -1
- package/package.json +1 -1
- package/templates/README.md +4 -2
- package/templates/cabinet/checkpoint-protocol.md +113 -0
- package/templates/hooks/action-completion-gate.sh +70 -0
- package/templates/skills/cc-upgrade/SKILL.md +14 -0
- package/templates/skills/cc-upgrade/phases/execute-plans-rename-detect.md +77 -0
- package/templates/skills/execute/SKILL.md +30 -46
- package/templates/skills/execute-group/SKILL.md +183 -0
- package/templates/skills/{execute-plans → generate-plan-groups}/SKILL.md +72 -89
- package/templates/skills/plan/SKILL.md +2 -1
- package/templates/skills/validate/phases/validators.md +37 -0
- package/templates/workflows/execute-group.js +495 -0
- /package/templates/skills/{execute-plans → generate-plan-groups}/scripts/build-conflict-graph.js +0 -0
package/lib/cli.js
CHANGED
|
@@ -485,7 +485,7 @@ const MODULES = {
|
|
|
485
485
|
mandatory: false,
|
|
486
486
|
default: true,
|
|
487
487
|
lean: true,
|
|
488
|
-
templates: ['skills/plan', 'skills/execute', 'skills/execute-
|
|
488
|
+
templates: ['skills/plan', 'skills/execute', 'skills/generate-plan-groups', 'skills/execute-group', 'workflows/execute-group.js', 'skills/investigate', 'cabinet/checkpoint-protocol.md'],
|
|
489
489
|
},
|
|
490
490
|
'compliance': {
|
|
491
491
|
name: 'Compliance Stack (rules + enforcement)',
|
|
@@ -1279,6 +1279,23 @@ async function run() {
|
|
|
1279
1279
|
}
|
|
1280
1280
|
}
|
|
1281
1281
|
}
|
|
1282
|
+
// execute-plans/ → generate-plan-groups/ (the plan→parallel split).
|
|
1283
|
+
// Key-matched, not version-gated: if the old key is present it needs
|
|
1284
|
+
// migrating; if it isn't, this no-ops. Idempotent on re-run.
|
|
1285
|
+
for (const key of Object.keys(existingManifest)) {
|
|
1286
|
+
const match = key.match(/\.claude\/skills\/execute-plans\//);
|
|
1287
|
+
if (match) {
|
|
1288
|
+
const newKey = key.replace('skills/execute-plans/', 'skills/generate-plan-groups/');
|
|
1289
|
+
// Partial-state guard: if the project already tracks the new key
|
|
1290
|
+
// (a prior partial migration), keep its hash — don't clobber it with
|
|
1291
|
+
// the stale execute-plans hash, which would force a needless re-copy.
|
|
1292
|
+
if (!existingManifest[newKey]) {
|
|
1293
|
+
existingManifest[newKey] = existingManifest[key];
|
|
1294
|
+
}
|
|
1295
|
+
delete existingManifest[key];
|
|
1296
|
+
migrationCount++;
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1282
1299
|
// Future manifest key migrations go here
|
|
1283
1300
|
if (migrationCount > 0) {
|
|
1284
1301
|
console.log(` 🔄 Migrated ${migrationCount} manifest key${migrationCount === 1 ? '' : 's'} for directory rename`);
|
package/package.json
CHANGED
package/templates/README.md
CHANGED
|
@@ -27,7 +27,7 @@ templates, see [EXTENSIONS.md](EXTENSIONS.md).
|
|
|
27
27
|
| `rules/enforcement-pipeline.md` | Generic enforcement pipeline: capture, classify, promote, encode, monitor. Describes the compliance stack and promotion criteria. |
|
|
28
28
|
| `rules/memory-capture.md` | When and how to capture memories via /cc-remember to the per-file curated layout at ~/.claude/projects/<slug>/memory/. What to capture, what not to, cadence guidance. |
|
|
29
29
|
|
|
30
|
-
### Skills (
|
|
30
|
+
### Skills (24 workflow + 31 cabinet members)
|
|
31
31
|
|
|
32
32
|
**Workflow Skills:**
|
|
33
33
|
|
|
@@ -39,7 +39,8 @@ templates, see [EXTENSIONS.md](EXTENSIONS.md).
|
|
|
39
39
|
| `skills/debrief/` | Session close. Inventory work, close items, run cabinet consultations, update state, persist, record lessons. 9 phase files. |
|
|
40
40
|
| `skills/debrief-quick/` | Quick debrief variant — core phases only, skip presentation. |
|
|
41
41
|
| `skills/execute/` | Execute a plan with cabinet member checkpoints. 3-checkpoint protocol (pre-implementation, per-file-group, pre-commit). 5 phase files. |
|
|
42
|
-
| `skills/
|
|
42
|
+
| `skills/generate-plan-groups/` | Scheduler: find plans with surface-area declarations, build a conflict graph, persist conflict-free parallel groups as pib-db `grp:` tags. Does not execute — hands each group to /execute-group. |
|
|
43
|
+
| `skills/execute-group/` | Runner: execute one generated group via the `execute-group.js` workflow — cabinet pre-review, parallel worktree implementation, sequential merge with per-plan review, integration, informed final review, completion report. |
|
|
43
44
|
| `skills/cc-extract/` | Analyze project artifacts and propose upstream extraction candidates for Claude Cabinet. |
|
|
44
45
|
| `skills/investigate/` | Structured codebase exploration: frame, observe, hypothesize, test, conclude. |
|
|
45
46
|
| `skills/cc-link/` | Set up local development linking for Claude Cabinet source repo work. |
|
|
@@ -103,6 +104,7 @@ mandates and scoped directives.
|
|
|
103
104
|
| `cabinet/eval-protocol.md` | Structured assessment framework for evaluating skill/cabinet member effectiveness. |
|
|
104
105
|
| `cabinet/lifecycle.md` | When to adopt, retire, and assess cabinet members. |
|
|
105
106
|
| `cabinet/output-contract.md` | How cabinet members produce structured findings for the audit system. |
|
|
107
|
+
| `cabinet/checkpoint-protocol.md` | The cabinet checkpoint mechanism (member selection, verdict schema, escalation) shared by /execute and /execute-group — read, not copied, so both stay in sync. |
|
|
106
108
|
| `cabinet/prompt-guide.md` | Craft knowledge for writing cabinet member prompts. 17 principles. |
|
|
107
109
|
|
|
108
110
|
### Scripts (12)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Cabinet Checkpoint Protocol
|
|
2
|
+
|
|
3
|
+
The single source of truth for how cabinet members review work in
|
|
4
|
+
progress. `/execute` and `/execute-group` both **read this file and
|
|
5
|
+
follow it** rather than copying the mechanism — so a change here flows to
|
|
6
|
+
every checkpoint, everywhere, with no copy-drift.
|
|
7
|
+
|
|
8
|
+
A checkpoint is a chance to stop before the cost of fixing goes up. The
|
|
9
|
+
mechanism is the same at every scale; only the **scope** of what's
|
|
10
|
+
reviewed changes.
|
|
11
|
+
|
|
12
|
+
## When you are told to "follow the checkpoint protocol scoped to X"
|
|
13
|
+
|
|
14
|
+
The caller names a scope. The scope determines what each spawned agent
|
|
15
|
+
reviews — everything else (how to spawn, what to collect, how to
|
|
16
|
+
escalate) is identical.
|
|
17
|
+
|
|
18
|
+
| Scope | Reviews | Runs |
|
|
19
|
+
|-------|---------|------|
|
|
20
|
+
| `pre-impl` | The plan text + the list of files it will change | Before any code is written |
|
|
21
|
+
| `this file group` | The git diff for one logical group of changed files | After each file group is implemented |
|
|
22
|
+
| `pre-commit` | The full git diff of all changes | After implementation, before commit |
|
|
23
|
+
| `this group's aggregate` *(group runs only)* | The combined diff of all plans in a parallel group | After a parallel group merges |
|
|
24
|
+
|
|
25
|
+
A *parallel group* (the last row) is `/execute-group`'s unit of work: a
|
|
26
|
+
set of conflict-free plans implemented concurrently in separate worktrees,
|
|
27
|
+
then merged together. `/execute` never exercises that scope — it runs one
|
|
28
|
+
plan at a time and uses only the first three.
|
|
29
|
+
|
|
30
|
+
## Step 1 — Select which members to spawn
|
|
31
|
+
|
|
32
|
+
Spawn one Agent per cabinet member that matches **either**:
|
|
33
|
+
|
|
34
|
+
- **Standing mandate** — `standingMandate` includes the current verb
|
|
35
|
+
(`execute`). Read `.claude/skills/_index.json` to find them. These run
|
|
36
|
+
at every checkpoint regardless of surface area.
|
|
37
|
+
- **Surface area** — a file in the reviewed scope matches the member's
|
|
38
|
+
file patterns, or a keyword in the plan description matches the
|
|
39
|
+
member's topic keywords.
|
|
40
|
+
|
|
41
|
+
Fall back to reading `cabinet-*/SKILL.md` frontmatter if the index is
|
|
42
|
+
missing.
|
|
43
|
+
|
|
44
|
+
**Err toward inclusion.** A member that activates unnecessarily costs a
|
|
45
|
+
few seconds; one that stays silent when it was needed costs rework. For
|
|
46
|
+
`this file group` scope, narrow to members matching *that group's* files
|
|
47
|
+
— a member reviewing 3 changed files gives sharper feedback than one
|
|
48
|
+
reviewing 30.
|
|
49
|
+
|
|
50
|
+
If the project has no cabinet members, skip the checkpoint and proceed —
|
|
51
|
+
checkpoints add depth, not structure.
|
|
52
|
+
|
|
53
|
+
## Step 2 — Spawn the agents (concurrently)
|
|
54
|
+
|
|
55
|
+
Spawn the selected members concurrently — they don't depend on each
|
|
56
|
+
other. **How** you spawn depends on the caller:
|
|
57
|
+
|
|
58
|
+
- From `/execute` (main session): issue all Agent-tool calls in a single
|
|
59
|
+
message so they run in parallel.
|
|
60
|
+
- From `/execute-group` (workflow script): issue the spawns as `agent()`
|
|
61
|
+
calls inside a `parallel()` block. Worktree agents cannot spawn
|
|
62
|
+
reviewers themselves — the workflow orchestrator does it.
|
|
63
|
+
|
|
64
|
+
Either way, each spawned agent receives:
|
|
65
|
+
|
|
66
|
+
- The cabinet member's full `SKILL.md` content
|
|
67
|
+
- Essential project briefing from `.claude/cabinet/_briefing.md` (read it
|
|
68
|
+
once, reuse for every agent)
|
|
69
|
+
- The member's `directives.execute`, if present — paste it in to sharpen
|
|
70
|
+
the member's focus
|
|
71
|
+
- **The scoped material:** plan text + file list (`pre-impl`), or the
|
|
72
|
+
relevant git diff (`this file group`, `pre-commit`, aggregate)
|
|
73
|
+
- An instruction to return the verdict object below
|
|
74
|
+
|
|
75
|
+
## Step 3 — Collect verdicts
|
|
76
|
+
|
|
77
|
+
Each agent returns exactly this shape:
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"cabinet_member": "name",
|
|
82
|
+
"verdict": "continue" | "pause" | "stop",
|
|
83
|
+
"concerns": [
|
|
84
|
+
{ "description": "...", "evidence": "...", "severity": "blocking" | "advisory" }
|
|
85
|
+
]
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Step 4 — Apply escalation
|
|
90
|
+
|
|
91
|
+
Collect every verdict, then:
|
|
92
|
+
|
|
93
|
+
- **Any `stop`** → halt. Show the concern. Require an explicit override
|
|
94
|
+
from the user before proceeding.
|
|
95
|
+
- **Any `pause`** → show the concern with options: proceed / address /
|
|
96
|
+
abort.
|
|
97
|
+
- **3+ `pause`** → escalate to stop-equivalent (halt, require override).
|
|
98
|
+
- **All `continue`** → proceed with a brief one-line summary.
|
|
99
|
+
|
|
100
|
+
At `pre-commit` and aggregate scopes, re-check earlier `continue`
|
|
101
|
+
concerns: a concern that was minor in one file group can become
|
|
102
|
+
significant once all changes are viewed together.
|
|
103
|
+
|
|
104
|
+
## Principles
|
|
105
|
+
|
|
106
|
+
- **Cabinet members are guardrails, not gates.** The user always has the
|
|
107
|
+
final say. A `stop` requires explicit override — it is not an automatic
|
|
108
|
+
rejection.
|
|
109
|
+
- **Scope tightly.** The narrower the diff a member reviews, the better
|
|
110
|
+
the feedback.
|
|
111
|
+
- **The pre-commit sweep catches emergent issues.** File groups that look
|
|
112
|
+
fine alone create problems in combination — type mismatches across
|
|
113
|
+
boundaries, security gaps from API + frontend changes landing together.
|
|
@@ -42,4 +42,74 @@ if [ "$AC_VERIFIED" != "True" ]; then
|
|
|
42
42
|
exit 0
|
|
43
43
|
fi
|
|
44
44
|
|
|
45
|
+
# --- Group-plan gate (Piece 4) ---
|
|
46
|
+
# Plans run via /execute-group carry a grp:<label> tag. For these, the
|
|
47
|
+
# workflow's Completion Report is the proof that the checkpoint sequence ran.
|
|
48
|
+
# The report is the workflow's own execution record — it either ran the
|
|
49
|
+
# checkpoints or it didn't. (Honest ceiling: this proves the workflow ran all
|
|
50
|
+
# checkpoints and they returned continue, NOT that the right members reviewed
|
|
51
|
+
# or that the review was deep. Auto-upgrades when cabinet subagent identity
|
|
52
|
+
# becomes trustworthy.)
|
|
53
|
+
#
|
|
54
|
+
# Tag lookup is best-effort: if pib.db can't be read, GRP_LABEL is empty and
|
|
55
|
+
# this gate is skipped — the base breadcrumb gate above still applies.
|
|
56
|
+
DB_PATH="${PIB_DB_PATH:-pib.db}"
|
|
57
|
+
TAGS=$(python3 -c "
|
|
58
|
+
import sqlite3, sys
|
|
59
|
+
try:
|
|
60
|
+
c = sqlite3.connect('$DB_PATH')
|
|
61
|
+
r = c.execute('SELECT tags FROM actions WHERE fid=?', ('$FID',)).fetchone()
|
|
62
|
+
sys.stdout.write(r[0] if r and r[0] else '')
|
|
63
|
+
except Exception:
|
|
64
|
+
sys.stdout.write('')
|
|
65
|
+
" 2>/dev/null)
|
|
66
|
+
|
|
67
|
+
GRP_LABEL=$(printf '%s' "$TAGS" | tr ',' '\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | grep '^grp:' | head -1 | sed 's/^grp://')
|
|
68
|
+
# Sanitize: group labels are date-style tokens (A-Za-z0-9_-). Strip anything
|
|
69
|
+
# else before interpolating into a file path / python string — defends against
|
|
70
|
+
# path traversal (../) and quote-breaking from a malformed tag.
|
|
71
|
+
GRP_LABEL=$(printf '%s' "$GRP_LABEL" | tr -cd 'A-Za-z0-9_-')
|
|
72
|
+
|
|
73
|
+
if [ -n "$GRP_LABEL" ]; then
|
|
74
|
+
# grp plans must carry the scenarios_updated field (the worktree agent
|
|
75
|
+
# records it — an empty array is fine, but absence means the agent didn't
|
|
76
|
+
# run the feature-file step).
|
|
77
|
+
HAS_SCENARIOS=$(python3 -c "import json; d=json.load(open('$BREADCRUMB')); print('scenarios_updated' in d)" 2>/dev/null)
|
|
78
|
+
if [ "$HAS_SCENARIOS" != "True" ]; then
|
|
79
|
+
echo "{\"decision\":\"block\",\"reason\":\"Action $FID (grp:$GRP_LABEL) breadcrumb is missing the scenarios_updated field. The /execute-group worktree agent records it (empty array if no e2e/features files were affected). Re-run /execute-group $GRP_LABEL so the field is written.\"}"
|
|
80
|
+
exit 0
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
REPORT="$VERIFY_DIR/group-$GRP_LABEL-report.json"
|
|
84
|
+
|
|
85
|
+
if [ ! -f "$REPORT" ]; then
|
|
86
|
+
echo "{\"decision\":\"block\",\"reason\":\"Action $FID carries grp:$GRP_LABEL but its Completion Report is missing ($REPORT). Grouped plans are completed by /execute-group, which writes the report after running cabinet checkpoints and marks merged plans done itself. To complete: run /execute-group $GRP_LABEL. If you are completing this plan outside the group flow, remove the grp:$GRP_LABEL tag from its tags first.\"}"
|
|
87
|
+
exit 0
|
|
88
|
+
fi
|
|
89
|
+
|
|
90
|
+
VERDICT=$(python3 -c "
|
|
91
|
+
import json
|
|
92
|
+
try:
|
|
93
|
+
d = json.load(open('$REPORT'))
|
|
94
|
+
pp = d.get('per_plan', [])
|
|
95
|
+
me = next((p for p in pp if isinstance(p, dict) and p.get('fid') == '$FID'), None)
|
|
96
|
+
cks = d.get('checkpoints', {}) or {}
|
|
97
|
+
integ = cks.get('integration', {}) or {}
|
|
98
|
+
cp3g = cks.get('cp3_group', '')
|
|
99
|
+
if me is None: print('NOT_IN_REPORT')
|
|
100
|
+
elif me.get('status') != 'merged': print('plan-status=' + str(me.get('status')))
|
|
101
|
+
elif cp3g not in ('continue', 'skipped', 'n/a'): print('cp3_group=' + str(cp3g))
|
|
102
|
+
elif integ.get('validate') != 'pass': print('integration.validate=' + str(integ.get('validate')))
|
|
103
|
+
elif integ.get('breadcrumbs') != 'valid': print('integration.breadcrumbs=' + str(integ.get('breadcrumbs')))
|
|
104
|
+
else: print('OK')
|
|
105
|
+
except Exception:
|
|
106
|
+
print('REPORT_UNREADABLE')
|
|
107
|
+
" 2>/dev/null)
|
|
108
|
+
|
|
109
|
+
if [ "$VERDICT" != "OK" ]; then
|
|
110
|
+
echo "{\"decision\":\"block\",\"reason\":\"Action $FID (grp:$GRP_LABEL) is not cleared by its Completion Report: $VERDICT. The report must show this plan with status=merged, checkpoints.cp3_group=continue, integration.validate=pass, and integration.breadcrumbs=valid. Inspect it: cat $REPORT . If the group run did not finish cleanly, re-run /execute-group $GRP_LABEL; do not force-complete a plan the workflow parked or that failed integration.\"}"
|
|
111
|
+
exit 0
|
|
112
|
+
fi
|
|
113
|
+
fi
|
|
114
|
+
|
|
45
115
|
exit 0
|
|
@@ -269,6 +269,20 @@ and correct path.
|
|
|
269
269
|
`~/.claude-cabinet/omega-venv/` exists OR `~/.claude/settings.json`
|
|
270
270
|
contains `omega-venv`, run `phases/omega-migration-detect.md`.
|
|
271
271
|
|
|
272
|
+
### 2.6. Directory Rename Cleanup
|
|
273
|
+
|
|
274
|
+
**Not version-gated — runs on any upgrade, keyed on disk presence.** When
|
|
275
|
+
CC renames a skill directory, the installer re-keys the manifest but
|
|
276
|
+
leaves the old directory on disk (the cleanup loop classifies it as a
|
|
277
|
+
non-template file and keeps it). These phases detect and remove such
|
|
278
|
+
orphans conversationally:
|
|
279
|
+
|
|
280
|
+
- **`execute-plans/` → `generate-plan-groups/` + `execute-group/`:** if
|
|
281
|
+
`.claude/skills/execute-plans/` exists, run
|
|
282
|
+
`phases/execute-plans-rename-detect.md`.
|
|
283
|
+
|
|
284
|
+
If the orphan directory isn't present, the phase skips silently.
|
|
285
|
+
|
|
272
286
|
### 3. Explain What Changed
|
|
273
287
|
|
|
274
288
|
Read `phases/explain-changes.md` for how to present changes.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# execute-plans → generate-plan-groups rename detection
|
|
2
|
+
|
|
3
|
+
In the plan→parallel-execution split, the all-in-one `/execute-plans`
|
|
4
|
+
skill was divided into `/generate-plan-groups` (scheduler) and
|
|
5
|
+
`/execute-group` (runner). The installer's manifest-key migration re-keys
|
|
6
|
+
the tracked files for hash continuity, but it does **not** delete the old
|
|
7
|
+
`.claude/skills/execute-plans/` directory on disk — the cleanup loop
|
|
8
|
+
classifies it as a non-template file and keeps it. So after a mechanical
|
|
9
|
+
upgrade, a project that had `execute-plans` ends up with the orphan
|
|
10
|
+
directory still present, and `/execute-plans` muscle-memory keeps invoking
|
|
11
|
+
the old checkpoint-dropping skill.
|
|
12
|
+
|
|
13
|
+
This phase detects and removes that orphan.
|
|
14
|
+
|
|
15
|
+
## Detection
|
|
16
|
+
|
|
17
|
+
This phase proceeds only if the orphan directory is actually on disk:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
test -d .claude/skills/execute-plans && echo "HAS_ORPHAN=1"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
If the directory is absent (fresh install, or already cleaned), skip this
|
|
24
|
+
phase silently — say nothing.
|
|
25
|
+
|
|
26
|
+
## What to explain to the user
|
|
27
|
+
|
|
28
|
+
When the orphan is present, explain the rename in plain terms:
|
|
29
|
+
|
|
30
|
+
> `/execute-plans` has been split into two skills:
|
|
31
|
+
> - **`/generate-plan-groups`** — finds plans that can run in parallel and
|
|
32
|
+
> tags them into conflict-free groups (the old Steps 1–4).
|
|
33
|
+
> - **`/execute-group <label>`** — runs one group: worktree implementation
|
|
34
|
+
> *plus* cabinet checkpoints (which the old skill claimed to run but
|
|
35
|
+
> couldn't, because worktree agents can't spawn reviewers).
|
|
36
|
+
>
|
|
37
|
+
> Both new skills are now installed. The old `execute-plans/` directory is
|
|
38
|
+
> left over from before the rename and should be removed so `/execute-plans`
|
|
39
|
+
> stops resolving to the obsolete skill.
|
|
40
|
+
|
|
41
|
+
## Removal
|
|
42
|
+
|
|
43
|
+
The orphan is only safe to remove once its **direct replacement** —
|
|
44
|
+
`generate-plan-groups` (the renamed scheduler half) — is on disk. The
|
|
45
|
+
runner half, `execute-group`, may or may not be present (it ships in a
|
|
46
|
+
later piece / may be deselected); its absence must NOT block removal,
|
|
47
|
+
because the scheduler is the rename of the old skill. This single guard
|
|
48
|
+
covers both cases:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
if [ -f .claude/skills/generate-plan-groups/SKILL.md ]; then
|
|
52
|
+
rm -rf .claude/skills/execute-plans
|
|
53
|
+
echo "Removed orphaned .claude/skills/execute-plans/"
|
|
54
|
+
if [ ! -f .claude/skills/execute-group/SKILL.md ]; then
|
|
55
|
+
echo "Note: /execute-group (the runner) is not installed — /generate-plan-groups"
|
|
56
|
+
echo "persists groups; add the runner to execute them, or run /execute per plan."
|
|
57
|
+
fi
|
|
58
|
+
else
|
|
59
|
+
echo "WARN: generate-plan-groups not found — leaving execute-plans/ in place"
|
|
60
|
+
fi
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Never remove the orphan if `generate-plan-groups/SKILL.md` is absent —
|
|
64
|
+
that would delete the only copy of the scheduler logic.
|
|
65
|
+
|
|
66
|
+
## Persisted-group note
|
|
67
|
+
|
|
68
|
+
If the project has actions tagged with `grp:` tokens (from a prior
|
|
69
|
+
`/generate-plan-groups` run), those tags remain valid — they reference
|
|
70
|
+
plans, not the skill directory. No migration of tags is needed.
|
|
71
|
+
|
|
72
|
+
## What this phase does NOT do
|
|
73
|
+
|
|
74
|
+
- It does not rewrite historical pib-db actions that mention
|
|
75
|
+
`execute-plans` in their notes — that's history, left as-is.
|
|
76
|
+
- It does not touch the skill index (`_index.json`) — the installer
|
|
77
|
+
regenerates that from the installed skills on every run.
|
|
@@ -9,6 +9,9 @@ description: |
|
|
|
9
9
|
related:
|
|
10
10
|
- type: skill
|
|
11
11
|
name: validate
|
|
12
|
+
- type: file
|
|
13
|
+
path: .claude/cabinet/checkpoint-protocol.md
|
|
14
|
+
role: "The cabinet checkpoint mechanism — read and followed at Checkpoints 1/2/3"
|
|
12
15
|
- type: file
|
|
13
16
|
path: .claude/skills/execute/phases/load-plan.md
|
|
14
17
|
role: "Project-specific: where plans live and how to read them"
|
|
@@ -162,31 +165,14 @@ If no cabinet members exist in the project, skip all checkpoint steps
|
|
|
162
165
|
(3, 4b, 5) and execute the plan directly. Checkpoints add depth, not
|
|
163
166
|
structure.
|
|
164
167
|
|
|
165
|
-
### 3. Checkpoint 1: Pre-Implementation Review
|
|
166
|
-
|
|
167
|
-
Before writing any code, **spawn one Agent per activated cabinet member**
|
|
168
|
-
in a single message. Each receives:
|
|
169
|
-
- The cabinet member's full SKILL.md content
|
|
170
|
-
- Essential project briefing from `.claude/cabinet/_briefing.md`
|
|
171
|
-
- The plan text and list of files that will change
|
|
172
|
-
- Instructions to evaluate whether the plan is safe to start
|
|
173
|
-
|
|
174
|
-
Each agent returns:
|
|
175
|
-
```json
|
|
176
|
-
{
|
|
177
|
-
"cabinet_member": "name",
|
|
178
|
-
"verdict": "continue" | "pause" | "stop",
|
|
179
|
-
"concerns": [
|
|
180
|
-
{ "description": "...", "evidence": "...", "severity": "blocking" | "advisory" }
|
|
181
|
-
]
|
|
182
|
-
}
|
|
183
|
-
```
|
|
168
|
+
### 3. Checkpoint 1: Pre-Implementation Review
|
|
184
169
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
170
|
+
Before writing any code, **read `.claude/cabinet/checkpoint-protocol.md`
|
|
171
|
+
and follow it, scoped to `pre-impl`.** The protocol covers which members
|
|
172
|
+
to spawn, what each receives, the verdict shape, and the escalation
|
|
173
|
+
rules. The reviewed material at this scope is the plan text and the list
|
|
174
|
+
of files that will change — the question each member answers is "is this
|
|
175
|
+
plan safe to start?"
|
|
190
176
|
|
|
191
177
|
### 4. Implement (File Group by File Group)
|
|
192
178
|
|
|
@@ -206,22 +192,23 @@ For each group:
|
|
|
206
192
|
versions — prop APIs change between major versions and guessing
|
|
207
193
|
wastes build cycles.
|
|
208
194
|
3. **Checkpoint 2: File Group Review** — if cabinet members are active,
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
195
|
+
**read `.claude/cabinet/checkpoint-protocol.md` and follow it, scoped
|
|
196
|
+
to `this file group`.** The reviewed material is the git diff for this
|
|
197
|
+
file group plus plan context; member selection narrows to those
|
|
198
|
+
matching the changed files.
|
|
212
199
|
4. If all continue, move to the next group
|
|
213
200
|
|
|
214
201
|
File-group granularity keeps reviews focused. A cabinet member reviewing
|
|
215
202
|
3 changed files gives better feedback than one reviewing 30.
|
|
216
203
|
|
|
217
|
-
### 5. Checkpoint 3: Pre-Commit Sweep
|
|
204
|
+
### 5. Checkpoint 3: Pre-Commit Sweep
|
|
218
205
|
|
|
219
|
-
After all implementation is complete, **
|
|
220
|
-
cabinet
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
206
|
+
After all implementation is complete, **read
|
|
207
|
+
`.claude/cabinet/checkpoint-protocol.md` and follow it, scoped to
|
|
208
|
+
`pre-commit`.** The reviewed material is the full git diff of all changes
|
|
209
|
+
plus plan context. As the protocol notes for this scope, earlier
|
|
210
|
+
"continue" concerns are re-checked — a concern that was minor in
|
|
211
|
+
isolation may be significant in the aggregate.
|
|
225
212
|
|
|
226
213
|
### 6. Validate and Commit
|
|
227
214
|
|
|
@@ -334,18 +321,15 @@ doesn't define. Execute them at their declared position.
|
|
|
334
321
|
|
|
335
322
|
## Principles
|
|
336
323
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
- **
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
groups may look fine but create problems in combination (type
|
|
347
|
-
mismatches across boundaries, security gaps from API + frontend
|
|
348
|
-
changes together).
|
|
324
|
+
The principles governing the checkpoints themselves — guardrails not
|
|
325
|
+
gates, err toward inclusion, tight scoping, the pre-commit sweep — live
|
|
326
|
+
in `.claude/cabinet/checkpoint-protocol.md` (the single source of truth
|
|
327
|
+
the checkpoints read). The principle specific to `/execute`:
|
|
328
|
+
|
|
329
|
+
- **Verify every acceptance criterion before marking work done.** The
|
|
330
|
+
checkpoints catch design and review issues; the QA gate (Step 7)
|
|
331
|
+
catches "looks complete but the AC was never actually run." Neither
|
|
332
|
+
substitutes for the other.
|
|
349
333
|
|
|
350
334
|
## Calibration
|
|
351
335
|
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: execute-group
|
|
3
|
+
description: |
|
|
4
|
+
Run one parallel plan group produced by /generate-plan-groups. Validates
|
|
5
|
+
the group hasn't drifted, then launches the execute-group workflow:
|
|
6
|
+
cabinet pre-review, parallel worktree implementation, sequential merge with
|
|
7
|
+
per-plan review, integration check, informed final review, and a completion
|
|
8
|
+
report. Use when: "execute group", "run group", "/execute-group".
|
|
9
|
+
disable-model-invocation: true
|
|
10
|
+
argument-hint: "group label — e.g., '2026-05-30-1'"
|
|
11
|
+
related:
|
|
12
|
+
- type: skill
|
|
13
|
+
name: generate-plan-groups
|
|
14
|
+
- type: skill
|
|
15
|
+
name: execute
|
|
16
|
+
- type: file
|
|
17
|
+
path: .claude/cabinet/checkpoint-protocol.md
|
|
18
|
+
role: "The cabinet checkpoint mechanism — the workflow's review agents read and follow it"
|
|
19
|
+
- type: file
|
|
20
|
+
path: .claude/workflows/execute-group.js
|
|
21
|
+
role: "The orchestrator this skill launches"
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
# /execute-group — Run a Generated Parallel Plan Group
|
|
25
|
+
|
|
26
|
+
## Purpose
|
|
27
|
+
|
|
28
|
+
`/generate-plan-groups` decides *what can run in parallel* and persists each
|
|
29
|
+
conflict-free group as pib-db `grp:` tags. This skill *runs one group*: it
|
|
30
|
+
re-checks the group is still safe, then hands off to a **workflow
|
|
31
|
+
orchestrator** (`execute-group.js`) that drives implementation and cabinet
|
|
32
|
+
review end to end.
|
|
33
|
+
|
|
34
|
+
**Why a workflow, not direct Agent-tool spawning:** worktree agents cannot
|
|
35
|
+
spawn sub-agents (no Agent-tool access — empirically verified). So a worktree
|
|
36
|
+
agent cannot run its own cabinet checkpoints. The workflow script solves this
|
|
37
|
+
by being the single orchestrator: it spawns worktree agents for
|
|
38
|
+
implementation AND cabinet agents for review as first-class parallel
|
|
39
|
+
participants. This is the capability the old all-in-one parallel-execution
|
|
40
|
+
skill could not provide.
|
|
41
|
+
|
|
42
|
+
## Prerequisites
|
|
43
|
+
|
|
44
|
+
- The group must have been produced by `/generate-plan-groups` (its plans
|
|
45
|
+
carry `grp:<label>`, `grp-generated:`, and `grp-hash:` tags).
|
|
46
|
+
- Plans must still have `## Surface Area` sections in their notes.
|
|
47
|
+
- The Workflow tool must be available (the orchestrator runs as a workflow).
|
|
48
|
+
|
|
49
|
+
## Honest ceiling — read before relying on this
|
|
50
|
+
|
|
51
|
+
The workflow runs the checkpoints; it does not guarantee the *review was
|
|
52
|
+
thorough*. Specifically:
|
|
53
|
+
|
|
54
|
+
- **No mid-implementation (CP2) review.** Worktree agents implement without
|
|
55
|
+
a reviewer looking over their shoulder. CP1 reviews before, CP3 reviews
|
|
56
|
+
after. For a plan whose diff is large or touches high-risk surface, run
|
|
57
|
+
`/execute <plan>` individually instead of via a group — full `/execute`
|
|
58
|
+
has the per-file-group checkpoint this path sacrifices for parallelism.
|
|
59
|
+
- **Surface area is intent, not reality.** Under-declared surface area can
|
|
60
|
+
hide a semantic conflict the conflict graph missed; only CP3 catches it.
|
|
61
|
+
- **Feature-file "affect" is heuristic.** Behavioral coupling not textually
|
|
62
|
+
referenced may be missed.
|
|
63
|
+
|
|
64
|
+
## Workflow
|
|
65
|
+
|
|
66
|
+
### Step 1 — Staleness guard (skill-level, BEFORE launching)
|
|
67
|
+
|
|
68
|
+
The persisted group is a hint, not a contract. Re-validate it against the
|
|
69
|
+
*current* state before running:
|
|
70
|
+
|
|
71
|
+
1. **Fetch the group's plans.** Query actions whose `tags` contain
|
|
72
|
+
`grp:<label>` (the argument). Use `pib_query` (or `node scripts/pib-db.mjs
|
|
73
|
+
query`):
|
|
74
|
+
```sql
|
|
75
|
+
SELECT a.fid, a.text, a.notes, a.tags
|
|
76
|
+
FROM actions a
|
|
77
|
+
WHERE a.completed = 0 AND a.deleted_at IS NULL
|
|
78
|
+
AND a.tags LIKE '%grp:<label>%'
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
2. **Drop plans that are no longer open or lost their surface area.** Report
|
|
82
|
+
each dropped plan and why.
|
|
83
|
+
|
|
84
|
+
3. **Recompute the surface-area hash and compare.** Recompute it **exactly
|
|
85
|
+
as `/generate-plan-groups` did**: for every still-open plan in the group,
|
|
86
|
+
parse its `## Surface Area` file/dir list, concatenate all entries across
|
|
87
|
+
the group, sort, and hash. Compare to the `grp-hash:` token stored on the
|
|
88
|
+
plans.
|
|
89
|
+
- **Hash matches** → the group is current. Proceed.
|
|
90
|
+
- **Hash differs** → a plan's surface area changed since grouping. **HALT:**
|
|
91
|
+
> Group `<label>` has drifted since it was generated (surface areas
|
|
92
|
+
> changed). Re-run `/generate-plan-groups` to regroup, then
|
|
93
|
+
> `/execute-group` again.
|
|
94
|
+
Do not run a stale group — the conflict-free guarantee no longer holds.
|
|
95
|
+
|
|
96
|
+
4. **Edge cases:**
|
|
97
|
+
- **0 plans survive** the filter → tell the user the group is empty
|
|
98
|
+
(all drifted/closed) and stop. Don't launch the workflow.
|
|
99
|
+
- **1 plan survives** → you may still launch (the workflow skips
|
|
100
|
+
group-level checkpoints for a single plan), or just suggest
|
|
101
|
+
`/execute <plan>` directly. Single-plan groups gain nothing from the
|
|
102
|
+
parallel machinery.
|
|
103
|
+
|
|
104
|
+
### Step 2 — Select cabinet members
|
|
105
|
+
|
|
106
|
+
Select the cabinet members the workflow's checkpoints will use. Use
|
|
107
|
+
`.claude/skills/_index.json`: members whose `standingMandate` includes
|
|
108
|
+
`execute`, plus any whose file patterns match the group's aggregate surface
|
|
109
|
+
area. For each, collect `{ key, agentType, path, directive }` (the
|
|
110
|
+
`agentType` is the registered `cabinet-<name>` subagent; `directive` is
|
|
111
|
+
`directives.execute` if present). The workflow's review agents each read
|
|
112
|
+
`.claude/cabinet/checkpoint-protocol.md` and follow it, scoped to the
|
|
113
|
+
checkpoint they run (group aggregate / pre-impl / post-merge).
|
|
114
|
+
|
|
115
|
+
If the project has no cabinet members, the workflow still runs — it just
|
|
116
|
+
skips the checkpoints (implementation + validate only). Say so.
|
|
117
|
+
|
|
118
|
+
### Step 3 — Launch the workflow
|
|
119
|
+
|
|
120
|
+
Invoke the Workflow tool with the orchestrator script and the assembled
|
|
121
|
+
arguments:
|
|
122
|
+
|
|
123
|
+
- **script:** `.claude/workflows/execute-group.js`
|
|
124
|
+
- **args:**
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"label": "<label>",
|
|
128
|
+
"plans": [{ "fid": "...", "text": "...", "notes": "...", "surfaceArea": "..." }],
|
|
129
|
+
"cabinetMembers": [{ "key": "...", "agentType": "cabinet-...", "path": "...", "directive": "..." }],
|
|
130
|
+
"checkpointProtocolPath": ".claude/cabinet/checkpoint-protocol.md",
|
|
131
|
+
"briefingPath": ".claude/cabinet/_briefing.md"
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Pass `plans` and `cabinetMembers` as real JSON arrays (not stringified).
|
|
136
|
+
|
|
137
|
+
### Step 4 — Present the Completion Report
|
|
138
|
+
|
|
139
|
+
The workflow returns a structured Completion Report. Present it plainly:
|
|
140
|
+
which plans merged, which parked/failed, the checkpoint verdicts, the
|
|
141
|
+
integration result, any new pib-db actions created for deferred manual ACs,
|
|
142
|
+
and the `loose_ends`. The report is also the evidence the completion gate
|
|
143
|
+
(`action-completion-gate.sh`) checks when a `grp:`-tagged plan is marked
|
|
144
|
+
done — don't discard it.
|
|
145
|
+
|
|
146
|
+
If the workflow halted early (a checkpoint returned `stop`, or integration
|
|
147
|
+
failed), report exactly where and why. Nothing was merged on a pre-merge
|
|
148
|
+
halt; on a post-merge CP3 stop, the offending plan was reverted.
|
|
149
|
+
|
|
150
|
+
#### Recovery steps for parked/failed plans
|
|
151
|
+
|
|
152
|
+
After a mixed result, present explicit next steps for each status:
|
|
153
|
+
|
|
154
|
+
- **Merged** — done. No action needed.
|
|
155
|
+
- **Parked** (a merge was reverted after CP3 rejection, or /validate failed
|
|
156
|
+
post-merge) — the worktree branch is preserved. To retry this plan
|
|
157
|
+
individually with full cabinet checkpoints (including the per-file-group
|
|
158
|
+
CP2 that the group path skips), **strip its `grp:` tags first** and then
|
|
159
|
+
run `/execute <plan>`. If you don't strip the tags, the completion gate
|
|
160
|
+
will block because the Completion Report shows this plan as "parked," not
|
|
161
|
+
"merged." Strip with: `pib_update_action --tags "<non-grp-tags-only>"`.
|
|
162
|
+
- **Failed implementation** — the worktree agent could not complete the
|
|
163
|
+
plan. Investigate the `deviations` in the report, fix the plan, then
|
|
164
|
+
strip the `grp:` tags and run `/execute <plan>` individually.
|
|
165
|
+
- **No result** — the worktree agent errored entirely. Same recovery:
|
|
166
|
+
strip tags, retry via `/execute`.
|
|
167
|
+
|
|
168
|
+
Re-running `/generate-plan-groups` automatically replaces stale `grp:` tags
|
|
169
|
+
on any plans it re-groups — but only for plans it selects. Plans you retry
|
|
170
|
+
individually should have their tags stripped before running `/execute`.
|
|
171
|
+
|
|
172
|
+
## Principles
|
|
173
|
+
|
|
174
|
+
- **The group is a hint, not a contract.** Always re-validate (Step 1)
|
|
175
|
+
before running. Regenerate freely.
|
|
176
|
+
- **The workflow is the single orchestrator.** Don't try to run the
|
|
177
|
+
checkpoints from this skill — the whole point is that the workflow can
|
|
178
|
+
spawn both implementors and reviewers, and a worktree agent cannot.
|
|
179
|
+
- **Sequential merges, parallel everything else.** Merges into main are
|
|
180
|
+
serialized with `/validate` between them; CP1, implementation, and
|
|
181
|
+
per-plan CP3 run in parallel.
|
|
182
|
+
- **Honest about the ceiling.** This runs the checkpoints; it does not prove
|
|
183
|
+
the review was deep. For high-risk plans, prefer individual `/execute`.
|