specpipe 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +111 -311
  2. package/package.json +2 -1
  3. package/src/cli.js +16 -6
  4. package/src/commands/diff.js +1 -1
  5. package/src/commands/init-agents.js +48 -20
  6. package/src/commands/init-global.js +104 -33
  7. package/src/commands/init-interactive.js +71 -0
  8. package/src/commands/init.js +68 -20
  9. package/src/commands/remove.js +159 -49
  10. package/src/commands/upgrade.js +21 -56
  11. package/src/lib/agent-guards.js +34 -78
  12. package/src/lib/agent-install.js +38 -25
  13. package/src/lib/agents.js +53 -11
  14. package/src/lib/claude-global.js +55 -77
  15. package/src/lib/hooks.js +203 -0
  16. package/src/lib/installer.js +104 -62
  17. package/src/lib/reconcile.js +13 -8
  18. package/templates/{.claude/hooks → hooks}/file-guard.js +26 -21
  19. package/templates/hooks/specpipe-read-guard.sh +94 -21
  20. package/templates/hooks/specpipe-shell-guard.sh +121 -29
  21. package/templates/rules/specpipe-rules.md +77 -0
  22. package/templates/skills/sp-build/SKILL.md +101 -1
  23. package/templates/skills/sp-build-behavior-matrix/SKILL.md +876 -0
  24. package/templates/skills/sp-challenge/SKILL.md +34 -0
  25. package/templates/skills/sp-challenge-behavior-matrix/SKILL.md +289 -0
  26. package/templates/skills/sp-explore/SKILL.md +132 -0
  27. package/templates/skills/sp-explore-behavior-matrix/SKILL.md +862 -0
  28. package/templates/skills/sp-fix/SKILL.md +73 -1
  29. package/templates/skills/sp-fix-behavior-matrix/SKILL.md +338 -0
  30. package/templates/skills/sp-investigate/SKILL.md +70 -0
  31. package/templates/skills/sp-investigate-behavior-matrix/SKILL.md +718 -0
  32. package/templates/skills/sp-plan/SKILL.md +90 -0
  33. package/templates/skills/sp-plan-behavior-matrix/SKILL.md +1037 -0
  34. package/templates/skills/sp-review/SKILL.md +29 -3
  35. package/templates/skills/sp-review-behavior-matrix/SKILL.md +294 -0
  36. package/templates/.claude/CLAUDE.md +0 -79
  37. package/templates/.claude/hooks/path-guard.sh +0 -118
  38. package/templates/.claude/hooks/self-review.sh +0 -27
  39. package/templates/.claude/hooks/sensitive-guard.sh +0 -227
  40. package/templates/.claude/settings.json +0 -68
  41. package/templates/docs/WORKFLOW.md +0 -325
  42. package/templates/docs/specs/.gitkeep +0 -0
  43. package/templates/rules/specpipe-guards.md +0 -40
  44. package/templates/scripts/test-hooks.sh +0 -66
  45. /package/templates/{.claude/hooks → hooks}/comment-guard.js +0 -0
  46. /package/templates/{.claude/hooks → hooks}/glob-guard.js +0 -0
@@ -1,42 +1,115 @@
1
1
  #!/usr/bin/env bash
2
2
  # specpipe-read-guard.sh — blocking pre-file-read hook (enforced guardrail).
3
3
  #
4
- # For agents whose pre-read payload puts the path at .file_path (Cursor
5
- # beforeReadFile) or .tool_input.file_path (Claude/Codex Read). Blocks (exit 2)
6
- # reads of secret files; allows *.example / *.sample / *.template.
4
+ # The single file-access guard for every agent. Reads the target path from
5
+ # whichever shape the agent's payload uses:
6
+ # .tool_input.file_path (Claude/Codex Read/Write/Edit) · .file_path (Cursor beforeReadFile)
7
+ #
8
+ # Blocks (exit 2) reads/writes of secret files: .env, private keys, credentials,
9
+ # tokens. Allows *.example / *.sample / *.template. Honors .agentignore.
7
10
  #
8
11
  # Exit codes: 0 = allow, 2 = block (reason on stderr).
12
+ # Env: SENSITIVE_GUARD_EXTRA — extra pipe-separated path patterns to block.
9
13
  set -euo pipefail
10
14
 
11
15
  INPUT=$(cat)
12
16
  [[ -z "$INPUT" ]] && exit 0
13
17
 
18
+ # Security guard: warn loudly if Node is missing rather than silently allowing.
19
+ if ! command -v node &>/dev/null; then
20
+ echo "WARNING: read-guard degraded — Node.js not found. Sensitive files are NOT fully protected." >&2
21
+ exit 0
22
+ fi
23
+
14
24
  extract_path() {
15
- if command -v node &>/dev/null; then
16
- printf '%s' "$1" | node -e "
17
- try {
18
- const d = JSON.parse(require('fs').readFileSync(0,'utf-8'));
19
- const p = d.file_path ?? d.tool_input?.file_path ?? d.path;
20
- if (typeof p === 'string') process.stdout.write(p);
21
- } catch {}
22
- " 2>/dev/null
23
- else
24
- printf '%s' "$1" | grep -oE '\"file_path\"[[:space:]]*:[[:space:]]*\"[^\"]*\"' | head -1 | sed -E 's/.*:[[:space:]]*\"//;s/\"$//'
25
+ printf '%s' "$1" | node -e "
26
+ try {
27
+ const d = JSON.parse(require('fs').readFileSync(0,'utf-8'));
28
+ const p = d.tool_input?.file_path ?? d.file_path ?? d.tool_input?.path ?? d.path;
29
+ if (typeof p === 'string') process.stdout.write(p);
30
+ } catch {}
31
+ " 2>/dev/null
32
+ }
33
+
34
+ FILE_PATH=$(extract_path "$INPUT") || exit 0
35
+ [[ -z "$FILE_PATH" ]] && exit 0
36
+
37
+ # ─── Fast-path: obviously safe source/doc files (json still checked) ─
38
+ fast_path_safe() {
39
+ local ext="${1##*.}"
40
+ case "$ext" in
41
+ md|ts|tsx|js|jsx|css|scss|html|svg|yaml|yml|toml|xml|txt|sh|py|rb|rs|go|java|kt|swift|c|cpp|h|hpp|cs|vue|svelte|astro)
42
+ return 0 ;;
43
+ esac
44
+ return 1
45
+ }
46
+
47
+ # ─── Sensitive filename detection ───────────────────────────────────
48
+ is_sensitive() {
49
+ local filepath="$1" basename
50
+ basename=$(basename "$filepath" 2>/dev/null) || return 1
51
+
52
+ case "$basename" in
53
+ .env|.env.local|.env.development|.env.production|.env.staging|.env.test) return 0 ;;
54
+ .npmrc|.pypirc|.netrc) return 0 ;;
55
+ id_rsa|id_ecdsa|id_ed25519|id_dsa) return 0 ;;
56
+ serviceAccountKey.json|service-account*.json) return 0 ;;
57
+ config.json) [[ "$filepath" == *".docker/config.json"* ]] && return 0 ;;
58
+ esac
59
+ case "$basename" in
60
+ *.pem|*.key|*.p12|*.pfx|*.jks|*.keystore|*.truststore) return 0 ;;
61
+ *_rsa|*_ecdsa|*_ed25519|*_dsa) return 0 ;;
62
+ esac
63
+ local lower
64
+ lower=$(echo "$basename" | tr '[:upper:]' '[:lower:]')
65
+ case "$lower" in
66
+ *credential*|*secret*|*private_key*|*privatekey*) return 0 ;;
67
+ firebase-adminsdk*) return 0 ;;
68
+ esac
69
+ if [[ "$basename" =~ ^\.env\. ]]; then
70
+ case "$basename" in
71
+ .env.example|.env.sample|.env.template) return 1 ;;
72
+ *) return 0 ;;
73
+ esac
25
74
  fi
75
+ if [[ -n "${SENSITIVE_GUARD_EXTRA:-}" ]] && printf '%s\n' "$filepath" | grep -qE "$SENSITIVE_GUARD_EXTRA"; then
76
+ return 0
77
+ fi
78
+ return 1
26
79
  }
27
80
 
28
- P=$(extract_path "$INPUT") || exit 0
29
- [[ -z "$P" ]] && exit 0
81
+ # ─── .agentignore / .aiignore / .cursorignore ───────────────────────
82
+ check_agentignore() {
83
+ local filepath="$1" ignorefile=""
84
+ for candidate in .agentignore .aiignore .cursorignore; do
85
+ [[ -f "$candidate" ]] && { ignorefile="$candidate"; break; }
86
+ done
87
+ [[ -z "$ignorefile" ]] && return 1
88
+
89
+ local normalized_fp normalized_pwd relpath
90
+ normalized_fp=$(printf '%s' "$filepath" | tr '\\' '/')
91
+ normalized_pwd=$(pwd | tr '\\' '/')
92
+ relpath=$(printf '%s' "$normalized_fp" | sed "s|^${normalized_pwd}/||") 2>/dev/null || relpath="$filepath"
30
93
 
31
- # Allow example/template variants.
32
- case "$P" in
94
+ while IFS= read -r pattern || [[ -n "$pattern" ]]; do
95
+ [[ -z "$pattern" || "$pattern" == \#* ]] && continue
96
+ if [[ "$relpath" == $pattern ]] || [[ "$(basename "$relpath")" == $pattern ]]; then
97
+ return 0
98
+ fi
99
+ done < "$ignorefile"
100
+ return 1
101
+ }
102
+
103
+ # ─── Allow example/template variants outright ───────────────────────
104
+ case "$FILE_PATH" in
33
105
  *.example|*.sample|*.template) exit 0 ;;
34
106
  esac
35
107
 
36
- SECRET="(\.env)($|\.[A-Za-z0-9]+$)|\.(pem|key|p12|pfx|keystore)$|id_(rsa|ed25519|ecdsa)$|(credentials|secrets?)\.(json|ya?ml|toml|txt)$"
37
- if printf '%s\n' "$P" | grep -qiE "$SECRET"; then
38
- echo "Blocked: '$P' is a secret file. Use its .example variant, or ask the user first." >&2
39
- exit 2
108
+ if ! fast_path_safe "$FILE_PATH"; then
109
+ if is_sensitive "$FILE_PATH" || check_agentignore "$FILE_PATH"; then
110
+ echo "Blocked: '$FILE_PATH' is a sensitive file (secrets, keys, or credentials). Use its .example variant, or ask the user first." >&2
111
+ exit 2
112
+ fi
40
113
  fi
41
114
 
42
115
  exit 0
@@ -1,29 +1,68 @@
1
1
  #!/usr/bin/env bash
2
2
  # specpipe-shell-guard.sh — blocking pre-shell/pre-tool hook (enforced guardrail).
3
3
  #
4
- # Portable across agents whose hook payload puts the shell command at either
5
- # .tool_input.command (Codex PreToolUse, Claude PreToolUse)
4
+ # The single shell guard for every agent. Reads the command from whichever shape
5
+ # the agent's hook payload uses:
6
+ # .tool_input.command (Claude PreToolUse, Codex PreToolUse)
6
7
  # .command (Cursor beforeShellExecution)
7
- # Blocks (exit 2) commands that explore wasteful directories or touch secrets.
8
+ #
9
+ # Two protections:
10
+ # 1. Secrets — commands that read/copy credential files (.env, keys, …).
11
+ # SECRET_POLICY=block (default) → exit 2; =warn → warn on stderr, exit 0
12
+ # (the approval flow: Claude asks the user, then may `cat .env`).
13
+ # 2. Wasteful dirs — exploring node_modules / build output / caches, which
14
+ # burns tokens. Always blocks (exit 2) when an exploration verb is present.
8
15
  #
9
16
  # Exit codes: 0 = allow, 2 = block (reason on stderr). Exit 2 is the portable
10
17
  # block primitive honored by Claude, Codex, and Cursor.
18
+ #
19
+ # Env:
20
+ # SECRET_POLICY block (default) | warn
21
+ # PATH_GUARD_EXTRA extra pipe-separated dir patterns to block
22
+ # SENSITIVE_GUARD_EXTRA extra pipe-separated secret patterns to block
11
23
  set -euo pipefail
12
24
 
13
25
  INPUT=$(cat)
14
26
  [[ -z "$INPUT" ]] && exit 0
27
+ POLICY="${SECRET_POLICY:-block}"
28
+
29
+ # Antigravity honors a stdout JSON decision ({"decision":"deny","reason":…}), NOT exit
30
+ # codes — a non-zero exit is logged as a hook failure and falls through to its native
31
+ # permission prompt. Detect its payload shape so block() emits the right thing.
32
+ IS_ANTIGRAVITY=0
33
+ printf '%s' "$INPUT" | grep -q '"toolCall"' && IS_ANTIGRAVITY=1
34
+
35
+ # Block primitive. Antigravity → stdout JSON deny (+ exit 0, clean). Everyone else →
36
+ # reason on stderr + exit 2 (honored by Claude/Codex directly, Cursor via failClosed).
37
+ block() {
38
+ local reason="$1"
39
+ if [[ "$IS_ANTIGRAVITY" == "1" ]]; then
40
+ local esc; esc=$(printf '%s' "$reason" | sed 's/\\/\\\\/g; s/"/\\"/g')
41
+ printf '{"decision":"deny","reason":"%s"}\n' "$esc"
42
+ exit 0
43
+ fi
44
+ echo "$reason" >&2
45
+ exit 2
46
+ }
15
47
 
48
+ # ─── Extract command (multi-payload) ────────────────────────────────
49
+ # Covers every agent's hook payload shape:
50
+ # .tool_input.command Claude / Codex (PreToolUse Bash)
51
+ # .command Cursor (beforeShellExecution)
52
+ # .tool_args.CommandLine Antigravity (PreToolUse run_command) — verified 2026
16
53
  extract_command() {
17
54
  if command -v node &>/dev/null; then
18
55
  printf '%s' "$1" | node -e "
19
56
  try {
20
57
  const d = JSON.parse(require('fs').readFileSync(0,'utf-8'));
21
- const c = d.tool_input?.command ?? d.command;
58
+ const a = d.toolCall?.args ?? {}; // Antigravity 1.0.13: { toolCall: { args: { CommandLine } } }
59
+ const c = d.tool_input?.command ?? d.command ?? d.tool_args?.CommandLine
60
+ ?? a.CommandLine ?? a.Command ?? a.command;
22
61
  if (typeof c === 'string') process.stdout.write(c);
23
62
  } catch {}
24
63
  " 2>/dev/null
25
64
  else
26
- printf '%s' "$1" | grep -oE '\"command\"[[:space:]]*:[[:space:]]*\"[^\"]*\"' | head -1 | sed -E 's/.*:[[:space:]]*\"//;s/\"$//'
65
+ printf '%s' "$1" | grep -oE '"(command|CommandLine)"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed -E 's/.*:[[:space:]]*"//;s/"$//'
27
66
  fi
28
67
  }
29
68
 
@@ -32,34 +71,87 @@ COMMAND=$(extract_command "$INPUT") || exit 0
32
71
 
33
72
  SEP="[/\\\\]"
34
73
 
35
- # Secrets: block reading/copying credential files (allow *.example / *.sample).
36
- SECRET="(^|[ /\\\\\"'])(\.env)($|[ /\\\\\"'.])"
37
- SECRET+="|(^|[ /\\\\])\.env\.[A-Za-z0-9]+"
38
- SECRET+="|\.(pem|key|p12|pfx|keystore)(\b|$)"
39
- SECRET+="|(^|[ /\\\\])id_(rsa|ed25519|ecdsa)"
40
- SECRET+="|(credentials|secrets?)\.(json|ya?ml|toml|txt)"
41
- if printf '%s\n' "$COMMAND" | grep -qiE '(^|[ |;&`(])(cat|less|more|head|tail|bat|cp|nano|vi|vim|grep|rg|strings|xxd|od|base64)([ ])'; then
42
- CLEAN=$(printf '%s\n' "$COMMAND" | sed -E 's/\.env\.(example|sample|template)//g')
43
- if printf '%s\n' "$CLEAN" | grep -qiE "$SECRET"; then
44
- echo "Blocked: command accesses a secret file (.env / key / credentials). Use .env.example, or ask the user first." >&2
45
- exit 2
74
+ # ─── Secrets in the command ─────────────────────────────────────────
75
+ # Only flag when a read/copy verb is present (so "echo use .env.example" or
76
+ # variable assignments don't trip the guard).
77
+ handle_secret() {
78
+ local match="$1"
79
+ if [[ "$POLICY" == "warn" ]]; then
80
+ echo "Warning: '$match' is a sensitive file. If the user approved this access, proceed. Otherwise ask the user first before reading secrets." >&2
81
+ exit 0
46
82
  fi
47
- fi
83
+ block "Blocked: command accesses a secret file ('$match'). Use its .example variant, or ask the user first."
84
+ }
48
85
 
49
- # Wasteful directories: only when an exploration verb is present.
50
- EXPLORE="(^|[[:space:]|;&\`(])(ls|ll|la|find|cat|head|tail|less|more|wc|stat|du|tree|bat|od|xxd|hexdump|nl)([[:space:]]|$)"
51
- printf '%s\n' "$COMMAND" | grep -qE "$EXPLORE" || exit 0
86
+ # No verb gate here (matches the original sensitive-guard): a secret referenced
87
+ # anywhere in the command is flagged — `ssh -i id_rsa`, `openssl -in cert.pem`,
88
+ # `gcloud --key-file=…` included. The .example/.sample/.template strip avoids the
89
+ # obvious false positives.
90
+ CLEAN=$(printf '%s\n' "$COMMAND" | sed -E 's/\.env\.(example|sample|template)//g')
52
91
 
92
+ SENSITIVE_IN_CMD=$(printf '%s\n' "$CLEAN" | grep -oE '[\./[:alnum:]_-]*\.env([\.[:alnum:]_-]*)?' | head -5) || true
93
+ if [[ -n "$SENSITIVE_IN_CMD" ]]; then
94
+ while IFS= read -r m; do
95
+ [[ -z "$m" ]] && continue
96
+ case "$m" in *.example|*.sample|*.template) continue ;; esac
97
+ handle_secret "$m"
98
+ done <<< "$SENSITIVE_IN_CMD"
99
+ fi
100
+ KEY_IN_CMD=$(printf '%s\n' "$CLEAN" | grep -oE '[[:alnum:]_./-]*\.(pem|key|p12|pfx|jks|keystore)($|[^[:alnum:]])' | head -3) || true
101
+ [[ -n "$KEY_IN_CMD" ]] && handle_secret "$(printf '%s' "$KEY_IN_CMD" | head -1)"
102
+ NAME_IN_CMD=$(printf '%s\n' "$CLEAN" | grep -oiE '(id_rsa|id_ecdsa|id_ed25519|id_dsa|serviceAccountKey\.json|service-account[[:alnum:]_-]*\.json|\.npmrc|\.pypirc|\.netrc)' | head -3) || true
103
+ [[ -n "$NAME_IN_CMD" ]] && handle_secret "$(printf '%s' "$NAME_IN_CMD" | head -1)"
104
+ CRED_IN_CMD=$(printf '%s\n' "$CLEAN" | grep -oiE '[[:alnum:]_./-]*(credential|secret|private_key|privatekey)[[:alnum:]_./-]*' | head -3) || true
105
+ [[ -n "$CRED_IN_CMD" ]] && handle_secret "$(printf '%s' "$CRED_IN_CMD" | head -1)"
106
+ if [[ -n "${SENSITIVE_GUARD_EXTRA:-}" ]] && printf '%s\n' "$CLEAN" | grep -qE "$SENSITIVE_GUARD_EXTRA"; then
107
+ handle_secret "$(printf '%s\n' "$CLEAN" | grep -oE "$SENSITIVE_GUARD_EXTRA" | head -1)"
108
+ fi
109
+
110
+ # ─── Wasteful directories ───────────────────────────────────────────
53
111
  BLOCKED="(^|[ /\\\\])node_modules(${SEP}|$| )"
54
- BLOCKED+="|(__pycache__)|\.git${SEP}(objects|refs)"
55
- BLOCKED+="|(^|[ /\\\\])dist${SEP}|(^|[ /\\\\])build${SEP}|\.next${SEP}"
56
- BLOCKED+="|(^|[ /\\\\])vendor(${SEP}|$| )|(^|[ /\\\\])target${SEP}"
57
- BLOCKED+="|(^|[ /\\\\])\.venv${SEP}|(^|[ /\\\\])venv${SEP}|\.pytest_cache${SEP}|\.cache(${SEP}|$| )"
58
- CLEAN=$(printf '%s\n' "$COMMAND" | sed -E "s|node_modules[/\\]\.bin[/\\][^[:space:]]*||g")
59
- if printf '%s\n' "$CLEAN" | grep -qE "$BLOCKED"; then
60
- M=$(printf '%s\n' "$COMMAND" | grep -oE "$BLOCKED" | head -1)
61
- echo "Blocked: command explores '$M' — a large/generated directory. Use scoped paths or Grep." >&2
62
- exit 2
112
+ BLOCKED+="|(__pycache__)"
113
+ BLOCKED+="|\.git${SEP}(objects|refs)"
114
+ BLOCKED+="|(^|[ /\\\\])dist${SEP}"
115
+ BLOCKED+="|(^|[ /\\\\])build${SEP}"
116
+ BLOCKED+="|\.next${SEP}"
117
+ BLOCKED+="|(^|[ /\\\\])vendor(${SEP}|$| )"
118
+ BLOCKED+="|(^|[ /\\\\])Pods(${SEP}|$| )"
119
+ BLOCKED+="|\.build${SEP}"
120
+ BLOCKED+="|DerivedData"
121
+ BLOCKED+="|\.gradle${SEP}"
122
+ BLOCKED+="|(^|[ /\\\\])target${SEP}"
123
+ BLOCKED+="|\.nuget"
124
+ BLOCKED+="|\.cache(${SEP}|$| )"
125
+ BLOCKED+="|(^|[ /\\\\])\.venv${SEP}"
126
+ BLOCKED+="|(^|[ /\\\\])venv${SEP}"
127
+ BLOCKED+="|\.mypy_cache${SEP}"
128
+ BLOCKED+="|\.pytest_cache${SEP}"
129
+ BLOCKED+="|\.ruff_cache${SEP}"
130
+ BLOCKED+="|\.egg-info(${SEP}|$| )"
131
+ BLOCKED+="|(^|[ /\\\\])bin${SEP}(Debug|Release|net|x64|x86)"
132
+ BLOCKED+="|(^|[ /\\\\])obj${SEP}(Debug|Release|net)"
133
+ BLOCKED+="|\.nuxt${SEP}"
134
+ BLOCKED+="|\.svelte-kit${SEP}"
135
+ BLOCKED+="|\.parcel-cache${SEP}"
136
+ BLOCKED+="|\.turbo${SEP}"
137
+ BLOCKED+="|(^|[ /\\\\])out${SEP}(server|static|_next)"
138
+ BLOCKED+="|\.bundle${SEP}"
139
+
140
+ if [[ -n "${PATH_GUARD_EXTRA:-}" ]]; then
141
+ BLOCKED+="|$PATH_GUARD_EXTRA"
142
+ fi
143
+
144
+ EXPLORE_VERB_RE="(^|[[:space:]|;&\`(])(ls|ll|la|find|cat|head|tail|less|more|wc|stat|du|tree|bat|od|xxd|hexdump|nl)([[:space:]]|$)"
145
+ if ! printf '%s\n' "$COMMAND" | grep -qE "$EXPLORE_VERB_RE"; then
146
+ exit 0
147
+ fi
148
+
149
+ # Strip node_modules/.bin/<binary> — executing an installed binary isn't exploration.
150
+ COMMAND_FOR_CHECK=$(printf '%s\n' "$COMMAND" | sed -E "s|node_modules[/\\]\.bin[/\\][^[:space:]]*||g")
151
+
152
+ if printf '%s\n' "$COMMAND_FOR_CHECK" | grep -qE "$BLOCKED"; then
153
+ MATCHED=$(printf '%s\n' "$COMMAND" | grep -oE "$BLOCKED" | head -1)
154
+ block "Blocked: command references '$MATCHED' — this directory is typically large and exploring it wastes tokens. Use Glob or Grep tools instead."
63
155
  fi
64
156
 
65
157
  exit 0
@@ -0,0 +1,77 @@
1
+ These are the always-on operating rules for working in this repository with specpipe.
2
+ On Claude Code the guardrails are also enforced by blocking hooks; on every agent this
3
+ whole document is an always-on rule you must self-enforce.
4
+
5
+ ## Spec-first cycle
6
+
7
+ Every change follows: **SPEC (with acceptance scenarios) → CODE + TESTS → BUILD PASS**.
8
+
9
+ - Specs live in `docs/specs/<feature>/<feature>.md`; acceptance scenarios (Given/When/Then)
10
+ are embedded under `## Stories`.
11
+ - Never write code before the spec exists. Never auto-modify a spec from code.
12
+ - The spec is the source of truth — if code contradicts it, the code is wrong.
13
+
14
+ ## Workflow
15
+
16
+ | Trigger | Commands |
17
+ |---------|----------|
18
+ | New project (no codebase yet) | `/sp-explore` → `/sp-scaffold` → `/sp-plan` → `/sp-build` |
19
+ | Feature unclear / complex | `/sp-explore` → `/sp-plan` |
20
+ | New feature | `/sp-plan` → `/sp-challenge` (optional) → code in chunks → `/sp-build` each chunk |
21
+ | Update feature | `/sp-plan <spec-path> "changes"` → code → `/sp-build` (do NOT hand-edit the spec first) |
22
+ | Bug (complex/outage) | `/sp-investigate "<bug>"` → `/sp-fix <investigation-file>` |
23
+ | Bug fix | `/sp-fix "<bug>"` (test-first: failing test → fix → green) |
24
+ | Remove feature | `/sp-plan <spec-path> "remove stories"` → delete code + tests → build pass |
25
+ | Pre-merge check | `/sp-review` |
26
+ | Commit | `/sp-commit` (secret scan + conventional message) |
27
+ | Render spec / markdown as HTML | `/sp-spec-render <feature>` · `/sp-md-render <file.md>` |
28
+ | Multi-LLM review / humanize | `/sp-voices [target]` · `/sp-humanize [text]` |
29
+
30
+ ## Guardrails
31
+
32
+ - **Don't explore large directories.** Never grep/list/read inside `node_modules/`,
33
+ build/dist artifacts, or `.git/` internals — scope to specific paths.
34
+ - **Never touch secrets.** Do not read or write `.env*`, private keys, credentials, or
35
+ token stores. Respect any `.agentignore` patterns.
36
+ - **Never drop real code.** Don't replace implementation with placeholder comments like
37
+ `// ... existing code ...`. Reproduce the full code when editing.
38
+ - **Avoid broad globs.** No `**/*.ts` at the project root; scope globs to a directory.
39
+ - **Keep files focused.** Don't let a source file grow past a few hundred lines — split.
40
+
41
+ ## Testing
42
+
43
+ - Run the project's native test command (`npx vitest run`, `pytest`, `cargo test`,
44
+ `go test ./...`, `swift test`, …). Compile/typecheck before running tests.
45
+ - Max 3 fix loops on a failure, then stop and report.
46
+ - **Never edit production code to make a test pass** — ask first.
47
+ - No mocks/fakes/stubs to pass builds; real implementations only. Test doubles are for
48
+ external services (APIs, DBs) that can't run locally.
49
+
50
+ ## Conventions
51
+
52
+ - Commits: conventional — `type(scope): description` (`feat`, `fix`, `docs`, `refactor`,
53
+ `test`, `chore`, `perf`, `build`, `ci`).
54
+ - File names: kebab-case, descriptive enough to understand purpose from the path.
55
+ - Dates in filenames: `$(date +%Y-%m-%d)` — never guess dates.
56
+ - Specs: kebab-case feature dir `docs/specs/<feature>/<feature>.md` (2–3 words, no prefix).
57
+ - Never `git push --force` to `main`/`master`; never commit `.env`, certs, or keys.
58
+ - Self-review before finishing: tests pass, no secrets, no debug code, matches the spec.
59
+
60
+ ## Forbidden
61
+
62
+ - `any`/`Any` without a justifying comment; force unwrap/cast without a preceding guard.
63
+ - Hardcoded secrets, API keys, tokens, or credentials in source.
64
+ - Mocks or fake data used solely to pass tests.
65
+ - Editing generated files, vendor dirs, or lock files; ignoring compiler/linter warnings.
66
+ - Replacing real code with placeholder comments; renaming params to `_param` to dodge
67
+ unused-warnings instead of fixing them.
68
+ - Reading/writing `.env`, `.pem`, `.key`, or other sensitive files (use `.env.example`).
69
+
70
+ ## Project Info
71
+
72
+ > Auto-detected on install; verify and edit if wrong.
73
+
74
+ - **Language:** [CUSTOMIZE]
75
+ - **Test framework:** [CUSTOMIZE]
76
+ - **Source directory:** [CUSTOMIZE]
77
+ - **Test directory:** [CUSTOMIZE]
@@ -84,6 +84,7 @@ The prompt MUST contain:
84
84
  2. **The dispatched-subagent contract** (paste verbatim — this is what keeps the controller the single owner of cross-story state):
85
85
  - Build only your assigned story; the Phase 2 loop runs exactly once.
86
86
  - Name every test with the `AS-NNN` it covers (`AS-NNN: <scenario>`), one test node per primary AS — the controller's Spec Coverage Gate (Phase 3.5) counts coverage by that ID, so an untagged test is invisible to it.
87
+ - If your pasted checklist contains `BM.<AS-NNN>.<surface>` lines, treat each as a cell-level test obligation. Your test evidence must include the AS id and exercise that exact surface/source/timing cell. Do not satisfy a BM line with a mock of the boundary named by the cell.
87
88
  - Do NOT write `.build-progress` or `.build-checklist` — the controller owns them. Report your checklist ticks in the contract instead.
88
89
  - Do NOT run Phase 3 (full-suite), Phase 4.5 (cross-story checklist review), or Phase 5 (summary/cleanup) — those are the controller's job. Run only your story's filtered tests.
89
90
  - Do NOT surface a spec signal to the user or edit the spec — return it in the `Spec signal` field.
@@ -109,6 +110,7 @@ Files changed: [...]
109
110
  Tests added: [exact test names]
110
111
  Checklist: [lines ticked]
111
112
  Edge compliance: [the 8-row table for this story — each ✓ or N/A+reason] (depth forcing-function; the controller aggregates these into Phase 5)
113
+ Behavior Matrix evidence: [BM lines covered with test refs; BM lines partial with destinations; mocked-boundary concerns]
112
114
  Spec signal: none | S1 <gap> | S2 <conflict> | S3 <added guard>
113
115
  ```
114
116
 
@@ -266,10 +268,15 @@ Derive a checklist from the spec — each "promise" in this build's scope become
266
268
  **Sources (all in `docs/specs/<feature>/<feature>.md`) — anchor on IDENTITY, not nouns:**
267
269
  - **Each `AS-NNN` → at least one line carrying that ID** (`AS-NNN`, or `AS-NNN.Tk` when one AS needs several assertions). This is the primary anchor: the checklist is keyed on the spec's case IDs, not on text it happens to mention. A Then with several fields/effects becomes several `AS-NNN.Tk` lines — but they all carry the same AS-NNN, so the AS is never lost.
268
270
  - Each Constraint → one `C-NNN` line.
271
+ - Each `## Behavior Matrix` cell with `Coverage = AS-NNN` → one `BM.<AS-NNN>.<surface>` line carrying the referenced `AS-NNN`. This is a stricter obligation than the generic AS line: it preserves the exact state/viewer/surface cell that QA will test.
272
+ - Each `## Behavior Matrix` cell with `Coverage = GAP-NNN` → a normal `GAP-NNN` checklist line annotated `BM cell unresolved: <state>/<viewer>/<surface>`. It is visible, but it is NOT a BM test obligation.
273
+ - Each `## Behavior Matrix` cell with `Coverage = N/A: <reason>` → one `[N/A] BM.NA.<surface>` line with the reason.
269
274
  - Each open `GAP-NNN` (status not `resolved`) → one `[ ]` line tagged `GAP` (so a parked gap is visible, not silently dropped — see Spec Coverage Gate).
270
275
  - Each Not-in-Scope row → one `[N/A]` line (prevents accidental ticking).
276
+ - Each matching project-local invariant entry under `docs/invariants/INV-*.md` with `status: enforced` and a `test_ref` → one `INV-###` checklist line carrying the invariant id and test ref. `candidate` and `confirmed` entries are visible risks/spec obligations but not build gates unless the spec turned them into AS/GAP/BM lines. Use the invariant registry README/schema as base knowledge; README examples are not runtime entries.
277
+ - Each `## Sibling Surface Map` confirmed surface is checked through its AS/GAP/BM coverage, not by a separate checklist line. Candidate rows with `GAP-NNN` or `ignore(reason)` are visible context only. A candidate row with missing disposition is a spec signal, not a build requirement to implement.
271
278
 
272
- **Completeness invariant (checked, not hoped):** every `AS-NNN` and `C-NNN` in the spec's `## Stories`/Constraints MUST appear on ≥1 checklist line. An AS with no line = the checklist is wrong (re-derive), not the spec. Deriving from Then-nouns alone silently drops AS whose Then is verb-shaped ("retries", "must not send") or whose nouns collide with another AS — anchoring on the ID closes that.
279
+ **Completeness invariant (checked, not hoped):** every `AS-NNN` and `C-NNN` in the spec's `## Stories`/Constraints MUST appear on ≥1 checklist line. Every `Coverage = AS-NNN` Behavior Matrix cell MUST appear on exactly one `BM.AS-*` checklist line; every `Coverage = GAP-NNN` Behavior Matrix cell MUST appear on exactly one `GAP-* — BM cell unresolved` checklist line; every `Coverage = N/A` Behavior Matrix cell MUST appear on one `[N/A] BM.NA.*` checklist line. An AS with no line = the checklist is wrong (re-derive), not the spec. A Behavior Matrix cell with no line = QA coverage was dropped. Deriving from Then-nouns alone silently drops AS whose Then is verb-shaped ("retries", "must not send") or whose nouns collide with another AS — anchoring on the ID closes that.
273
280
 
274
281
  **Granularity rule (so two devs produce the same checklist):**
275
282
  - 1 line per **observable output field** (appears in Then result, independently assertable)
@@ -287,6 +294,9 @@ Example: Then "returns sorted list of {file, confidence, edges}" → 3 lines (on
287
294
  [ ] AS-012.T2 — affected_tests includes TESTED_BY edges | owner: S-003
288
295
  [ ] AS-012.T3 — output sorted by confidence | owner: S-004
289
296
  [ ] C-003 — query completes under 50ms | owner: S-005
297
+ [ ] BM.AS-012.appointment-list — Confirmed/trainer/list row parity | owner: S-003
298
+ [ ] GAP-004 — BM cell unresolved: Requested/booker/reschedule action | owner: GAP
299
+ [N/A] BM.NA.cancelled-client-calendar — terminal state has no invite | owner: —
290
300
  [N/A] AS-015 — out of scope (M3) | owner: —
291
301
  ```
292
302
 
@@ -374,6 +384,22 @@ Before writing tests, trace all paths and draw a diagram to see gaps upfront —
374
384
 
375
385
  **Step 2 — Trace user flows:** For multi-step features, trace the user journey. Edge cases: double-click/rapid resubmit, navigate away mid-op, submit stale data (session expired), slow connection, concurrent actions (2 tabs open).
376
386
 
387
+ **Step 2b — Trace Behavior Matrix cells:** If the spec has `## Behavior Matrix`, copy every non-N/A cell into the Coverage Map before drawing the diagram. Each cell is a QA-visible obligation, not just prose. Classify the test level by the cell's surface:
388
+ - Same-process logic/state cell → unit or integration is acceptable if it observes the public boundary.
389
+ - Cross-surface parity cell (list/detail/feed/API/dashboard/worklist) → integration/E2E over the real producer/consumer path.
390
+ - External/provider surface (calendar/email/payment/identity) → contract/integration against the verified boundary or provider fake that is itself verified; never a pure mock of the boundary under test.
391
+ - Timing/source cell (`realtime`, `refresh-required`, `persisted+served`, `transient`) → the test must assert that lifecycle point, not just the immediate action response.
392
+
393
+ In the diagram, prefix these rows with `[BM]` and keep the AS id visible:
394
+
395
+ ```
396
+ BEHAVIOR MATRIX COVERAGE
397
+ ========================
398
+ [BM] Confirmed × trainer × worklist — AS-004
399
+ ├── [GAP] [→E2E] assigned trainer sees updated item
400
+ └── [GAP] [→E2E] non-runner does not see item
401
+ ```
402
+
377
403
  **Step 3 — Draw the diagram:**
378
404
 
379
405
  ```
@@ -446,6 +472,20 @@ If every path is already covered, the diagram will have zero `[GAP]` rows — th
446
472
 
447
473
  **LINKED-FIELD SEAM RULE:** If the spec has a `## Linked Fields` block (it is one side of a producer/consumer split), each linked field's **seam AS is a real-integration test** — run it against the ACTUAL producer (build the producer side first; the consumer spec's seam tests run against it), **never a mocked consumer**. A mocked seam is a vacuous test: the mismatch it exists to catch — field on the wrong surface (list vs single-get) or wrong lifecycle (transient-in-response vs persisted+served) — is exactly what the mock hides. Do NOT mark a consumer story `done` on a mocked seam. **Auto-Mode:** a single-side subagent cannot see the seam — the controller runs the cross-spec seam tests after both sibling specs are built (A4b/A7).
448
474
 
475
+ **BEHAVIOR MATRIX NO-VACUOUS-MOCK RULE:** If a `## Behavior Matrix` cell names a surface/source/timing boundary, the test for that cell must not mock the boundary it is meant to verify. Examples:
476
+ - API list vs single-get parity → do not assert against fabricated response fixtures only; exercise the real read serializers/handlers or an integration path that uses them.
477
+ - Calendar/email parity → do not mock both email and calendar recipients from the same test fixture; verify the mapping into each outbound contract or a verified provider fake.
478
+ - Worklist/feed/dashboard cascade → do not mock the read model that should be updated; verify the persisted/read-model result after the state transition.
479
+
480
+ A mocked dependency is allowed only outside the boundary under test. If you cannot test the boundary with the current harness, mark the BM checklist line `[~]` with a concrete destination (`/sp-scaffold` E2E harness, contract-test story, or Known-Gap). Do not mark it `[x]`.
481
+
482
+ **TERMINAL LIFECYCLE CASCADE TEST RULE:** If a Behavior Matrix cell or AS describes a terminal lifecycle action (`done`, `completed`, `cancelled`, `declined`, `log outcome`, `terminal outcome`, `overdue outcome`, `reschedule creates replacement`) that removes, cancels, replaces, or advances pending work, the owning checklist MUST include a named cascade test. The test name or evidence must contain the word `cascade` and assert the downstream work surface changed, not only the primary record. Examples:
483
+ - Pending matchup terminal outcome → assert pending matchup queue/assignment affordance is removed or cancelled.
484
+ - Reschedule/next appointment → assert old and new appointment state, runner/owner carry-forward, and affected calendar/worklist surfaces.
485
+ - Outcome-created tracker/todo/FNA/production records → assert the downstream record is visible to the required viewer(s) with the expected role fields.
486
+
487
+ Do not satisfy this rule with a unit test that only checks `is_done`, status text, or a returned DTO. If the downstream surface cannot be exercised in the current harness, mark the BM line `[~]` with a concrete cascade-test destination. A terminal lifecycle BM line without a cascade test is not DONE.
488
+
449
489
  ---
450
490
 
451
491
  ## Test Command
@@ -551,9 +591,15 @@ Record for the Phase 5 summary: `S-00X added N tests: <list exact test names>`.
551
591
  ```
552
592
  [x] AS-012.T1 — covered by affected_tests_test.rs:test_convention_match
553
593
  [~] AS-012.T2 — PARTIAL: query wired, emit deferred → M3 S-008
594
+ [x] BM.AS-012.appointment-list — covered by appointment_reschedule_e2e.ts:AS-012 list row parity
554
595
  ```
555
596
  For `[x]`, record `file:test-name`. For `[~]`, record the destination.
556
597
 
598
+ If a checklist line starts with `BM.`:
599
+ - The `file:test-name` must include the AS id from the BM line.
600
+ - The test must assert the exact cell surface/source/timing from the Behavior Matrix row.
601
+ - Before ticking `[x]`, apply the Behavior Matrix No-Vacuous-Mock Rule. If the test mocks the named boundary, mark `[~]` with a concrete destination instead.
602
+
557
603
  **Carve-out scan on the story diff:**
558
604
  ```
559
605
  git diff <story-files> | grep -nE "TODO|FIXME|XXX|HACK"
@@ -647,6 +693,57 @@ comm -23 /tmp/spec-ids.txt /tmp/covered-ids.txt
647
693
 
648
694
  **Auto-mode:** the controller runs this gate at A7 (finish) over the whole spec, and may run the per-story slice after each story's gates (A4). Dispatched subagents are told (A2 contract) to embed `AS-NNN` in every test name so the gate can see their work.
649
695
 
696
+ ### Behavior Matrix Coverage Gate
697
+
698
+ Run this in addition to the AS/C identity gate when the spec contains `## Behavior Matrix`.
699
+
700
+ The AS/C gate proves every acceptance scenario has at least one test. It does not prove every state/viewer/surface cell was tested, because several cells can share one AS id. The Behavior Matrix gate is the stricter cell-level check.
701
+
702
+ Required evidence for each matrix cell:
703
+ - For `Coverage = AS-NNN`: a `.build-checklist` line named `BM.<AS-NNN>.<surface>` is `[x]`.
704
+ - That line includes a concrete test reference (`file:test-name`) and the test name contains the cell's `AS-NNN`.
705
+ - The referenced test exercises the named surface/source/timing boundary, not a mock of that boundary.
706
+ - If the cell is a terminal lifecycle/cascade cell, the referenced evidence includes a named cascade test per the Terminal Lifecycle Cascade Test Rule.
707
+ - For `Coverage = GAP-NNN`: a normal `GAP-NNN — BM cell unresolved: ...` checklist line exists. It stays visible in the open-gaps summary and is not a test obligation until `/sp-plan` resolves it into an AS.
708
+ - For `Coverage = N/A`: an `[N/A] BM.NA.<surface>` checklist line exists with a concrete reason.
709
+
710
+ Minimal shell check (identity only; still review the checklist line for surface/boundary evidence):
711
+
712
+ ```bash
713
+ CHECKLIST=docs/specs/<feature>/.build-checklist
714
+
715
+ # Any BM.AS line not ticked is an uncovered matrix test obligation.
716
+ grep '^\\[ \\] BM\\.AS\\|^\\[~\\] BM\\.AS' "$CHECKLIST" || true
717
+
718
+ # Any ticked BM.AS line without a test reference is suspicious and must be reviewed.
719
+ grep '^\\[x\\] BM\\.AS' "$CHECKLIST" | grep -vE '[[:alnum:]_./-]+:[[:alnum:]_ -]+' || true
720
+ ```
721
+
722
+ - **Any `[ ] BM.AS...` line → BLOCKED.** The QA cell has no test.
723
+ - **Any `[~] BM.AS...` line → DONE_WITH_CONCERNS**, unless the destination is a concrete future story/Known-Gap and the feature is not release-critical.
724
+ - **Any `[x] BM.AS...` line with only mocked-boundary evidence → treat as `[~]`**, not done.
725
+ - **Any terminal lifecycle BM line without a named cascade test → treat as `[~]` or BLOCKED**, depending on release criticality.
726
+ - **Any `GAP-NNN — BM cell unresolved` line → visible open gap, not a build test failure.** Do not implement or test that cell until `/sp-plan` resolves it.
727
+
728
+ ### Sibling Surface Map Gate
729
+
730
+ Run this when the spec contains `## Sibling Surface Map`.
731
+
732
+ - Confirmed sibling surfaces must be covered indirectly by AS/GAP/BM lines. If a confirmed surface has no AS/GAP/BM coverage, emit a Spec Signal and mark DONE_WITH_CONCERNS or BLOCKED depending on release criticality.
733
+ - Candidate rows are not build gates. Do not implement a candidate solely because it was discovered.
734
+ - A high/medium candidate without `cover`, `GAP-NNN`, or `ignore(reason)` means `/sp-plan` did not finish disposition. Emit a Spec Signal; do not silently choose behavior in code.
735
+
736
+ ### Invariant Registry Gate
737
+
738
+ Use the invariant registry README/schema as base knowledge; README examples are not runtime entries. Run this when the current project has `docs/invariants/INV-*.md`.
739
+
740
+ - `status: enforced` with `test_ref` → the referenced test, or an equivalent regression named in the build summary, must be present and run. Missing evidence → BLOCKED.
741
+ - `status: confirmed` → not a hard build gate by itself; verify the spec has AS/GAP/BM coverage when this build touches the component.
742
+ - `status: candidate` → advisory only; do not invent requirements. If the build confirms it, emit a Spec Signal to promote it via `/sp-plan` or `/sp-fix`.
743
+ - `status: retired` → ignore unless this build revives the component.
744
+
745
+ This gate prevents "memory exists but is never enforced" while avoiding false requirements from noisy candidate entries.
746
+
650
747
  ---
651
748
 
652
749
  ## Phase 4: Fix Loop
@@ -722,6 +819,9 @@ Stories: [AS-001 ✓, AS-002 ✓, AS-005 new]
722
819
  TDD evidence: [S-001: RED (paste 1st failing assertion raw) → GREEN ✓ | tests added: <names>, S-002: RED (raw output) → GREEN ✓ | tests added: <names>]
723
820
  Checklist: X/Y [x], A/Y [~] (destinations: <story-id list or Known-Gap refs>), B/Y [ ] (reasons), C/Y [N/A]
724
821
  Coverage gate (Phase 3.5): PASS — all AS/C carry a test | BLOCKED — uncovered: <AS/C ids> (breadth)
822
+ Behavior Matrix gate: PASS — all BM cells [x] | BLOCKED — uncovered: <BM lines> | CONCERNS — partial/mocked-boundary: <BM lines>
823
+ Sibling Surface Map gate: PASS — confirmed surfaces covered | CONCERNS — candidate disposition/spec signal: <candidate ids> | N/A
824
+ Invariant gate: PASS — enforced invariants covered | BLOCKED — missing enforced invariant tests: <INV ids> | N/A — no enforced invariants touched
725
825
  Edge Case Compliance: [per-story table — every row ✓ or N/A+reason] (depth)
726
826
  Open gaps: [GAP-NNN not yet resolved, or "none"]
727
827
  E2E: [authored + green: <test names> | deferred non-critical (with reason): <flows> | none]. A critical [→E2E] left unwritten → status is DONE_WITH_CONCERNS, not DONE.