voidforge-build 23.18.0 → 23.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/.claude/agents/celebrimbor-forge-artist.md +1 -0
  2. package/dist/.claude/agents/ducem-token-economics.md +1 -0
  3. package/dist/.claude/agents/galadriel-frontend.md +1 -0
  4. package/dist/.claude/agents/romanoff-integrations.md +4 -0
  5. package/dist/.claude/agents/silver-surfer-herald.md +19 -4
  6. package/dist/.claude/commands/architect.md +4 -3
  7. package/dist/.claude/commands/assemble.md +12 -0
  8. package/dist/.claude/commands/assess.md +1 -0
  9. package/dist/.claude/commands/build.md +8 -0
  10. package/dist/.claude/commands/contextmeter.md +56 -0
  11. package/dist/.claude/commands/debrief.md +10 -0
  12. package/dist/.claude/commands/engage.md +5 -0
  13. package/dist/.claude/commands/git.md +13 -1
  14. package/dist/.claude/commands/imagine.md +1 -1
  15. package/dist/.claude/commands/seal.md +80 -0
  16. package/dist/.claude/commands/ux.md +13 -0
  17. package/dist/.claude/workflows/assemble-review.workflow.js +26 -6
  18. package/dist/.claude/workflows/gauntlet.workflow.js +59 -12
  19. package/dist/CHANGELOG.md +73 -0
  20. package/dist/CLAUDE.md +9 -1
  21. package/dist/HOLOCRON.md +16 -2
  22. package/dist/VERSION.md +3 -1
  23. package/dist/docs/methods/AI_INTELLIGENCE.md +3 -0
  24. package/dist/docs/methods/ASSEMBLER.md +12 -0
  25. package/dist/docs/methods/BUILD_PROTOCOL.md +7 -0
  26. package/dist/docs/methods/CAMPAIGN.md +11 -0
  27. package/dist/docs/methods/DEVOPS_ENGINEER.md +56 -0
  28. package/dist/docs/methods/FIELD_MEDIC.md +1 -0
  29. package/dist/docs/methods/FORGE_ARTIST.md +3 -4
  30. package/dist/docs/methods/GAUNTLET.md +6 -0
  31. package/dist/docs/methods/MUSTER.md +2 -0
  32. package/dist/docs/methods/PRODUCT_DESIGN_FRONTEND.md +18 -0
  33. package/dist/docs/methods/QA_ENGINEER.md +17 -1
  34. package/dist/docs/methods/RELEASE_MANAGER.md +27 -0
  35. package/dist/docs/methods/SECURITY_AUDITOR.md +11 -1
  36. package/dist/docs/methods/SUB_AGENTS.md +31 -0
  37. package/dist/docs/methods/SYSTEMS_ARCHITECT.md +15 -0
  38. package/dist/docs/methods/TESTING.md +2 -0
  39. package/dist/docs/methods/TROUBLESHOOTING.md +2 -2
  40. package/dist/docs/methods/WORKFLOWS.md +18 -2
  41. package/dist/docs/patterns/ai-prompt-safety.ts +85 -0
  42. package/dist/docs/patterns/data-pipeline.ts +59 -1
  43. package/dist/docs/patterns/exclusion-set-invariant.md +62 -0
  44. package/dist/docs/patterns/multi-tenant-property-test.ts +64 -0
  45. package/dist/docs/patterns/oauth-token-lifecycle.ts +21 -0
  46. package/dist/scripts/statusline/README.md +38 -0
  47. package/dist/scripts/statusline/context-awareness-hook.sh +53 -0
  48. package/dist/scripts/statusline/settings-snippet.json +17 -0
  49. package/dist/scripts/statusline/voidforge-statusline.sh +91 -0
  50. package/dist/scripts/voidforge.js +69 -6
  51. package/dist/wizard/lib/claude-md-strategy.d.ts +87 -0
  52. package/dist/wizard/lib/claude-md-strategy.js +198 -0
  53. package/dist/wizard/lib/marker.d.ts +48 -1
  54. package/dist/wizard/lib/marker.js +58 -2
  55. package/dist/wizard/lib/patterns/oauth-token-lifecycle.d.ts +14 -0
  56. package/dist/wizard/lib/patterns/oauth-token-lifecycle.js +21 -0
  57. package/dist/wizard/lib/project-init.js +77 -0
  58. package/dist/wizard/lib/updater.d.ts +19 -0
  59. package/dist/wizard/lib/updater.js +91 -33
  60. package/package.json +2 -2
@@ -28,7 +28,14 @@ export const meta = {
28
28
  ],
29
29
  }
30
30
 
31
- const input = typeof args === 'string' ? JSON.parse(args) : (args || {})
31
+ // Guarded parse: a malformed or empty `args` string must NOT throw and abort the
32
+ // entire run before phase 1 (field report) — fall back to defaults instead.
33
+ let input = {}
34
+ try {
35
+ input = typeof args === 'string' ? (args.trim() ? JSON.parse(args) : {}) : (args || {})
36
+ } catch (_e) {
37
+ input = {}
38
+ }
32
39
  const scope = input.scope || 'the working tree / full codebase per gauntlet.md'
33
40
  // Surfer-selected specialists (gate-recorded upstream). Fall back to the canonical
34
41
  // core leads if no roster was passed (e.g. --light).
@@ -74,33 +81,58 @@ const VERDICT = {
74
81
  },
75
82
  }
76
83
 
77
- const key = (f) => `${(f.file || '').toLowerCase()}::${(f.title || '').toLowerCase().slice(0, 60)}`
84
+ // Normalize the file path before keying so the SAME finding reported by two agents —
85
+ // one with an absolute `/Users/.../repo/src/x.ts:42`, one with a relative `src/x.ts:42` —
86
+ // dedupes instead of surviving as two "distinct" claims (#366 F6). Strip the repo-root
87
+ // prefix (the launch cwd, passed via args; fall back to env) and any leading `./`.
88
+ // Workflows forbid argless `new Date()`/`Math.random()` but env reads are fine.
89
+ const REPO_ROOT = (input.repoRoot || (typeof process !== 'undefined' && process.cwd && process.cwd()) || '')
90
+ .replace(/\/+$/, '')
91
+ const normPath = (p) => {
92
+ let s = (p || '').trim()
93
+ if (REPO_ROOT && s.startsWith(REPO_ROOT + '/')) s = s.slice(REPO_ROOT.length + 1)
94
+ return s.replace(/^\.\/+/, '').toLowerCase()
95
+ }
96
+ const key = (f) => `${normPath(f.file)}::${(f.title || '').toLowerCase().slice(0, 60)}`
78
97
 
79
98
  // ── Round 1: Discovery + Round 2/3: Strike ────────────────────────────────────
99
+ const dom = (a) => a.domain || a.key || 'their domain' // avoid literal "undefined" in prompts
80
100
  phase('Discovery')
81
101
  const discovery = (await parallel(roster.slice(0, 5).map((a) => () =>
82
102
  agent(
83
- `You are ${a.name} (${a.domain}). GAUNTLET discovery pass over ${scope}. Map your domain and report concrete, evidence-backed findings only — every finding needs a file:line and a quoted line or a real repro (no speculation). Rate severity honestly.`,
84
- { label: `${a.name} · discovery:${a.key}`, phase: 'Discovery', schema: FINDINGS, agentType: a.id },
103
+ `You are ${a.name} (${dom(a)}). GAUNTLET discovery pass over ${scope}. Map your domain and report concrete, evidence-backed findings only — every finding needs a file:line and a quoted line or a real repro (no speculation). Rate severity honestly.`,
104
+ { label: `${a.name} · discovery:${a.key}`, phase: 'Discovery', schema: FINDINGS, agentType: a.name },
85
105
  )
86
106
  ))).filter(Boolean)
87
107
 
88
108
  phase('Strike')
89
- const strikeRoster = roster.length > 5 ? roster.slice(5) : roster
109
+ // Specialists only (index ≥5). When the roster is ≤5 (the default/--light core-leads
110
+ // set) there are NO specialists, so strike is EMPTY — falling back to the full roster
111
+ // here re-ran the identical discovery agents with a "find what discovery missed" prompt,
112
+ // doubling cost for no new coverage (field report). parallel([]) is a harmless no-op.
113
+ const strikeRoster = roster.length > 5 ? roster.slice(5) : []
114
+ if (!strikeRoster.length) log('Strike: no specialists beyond the 5 core leads — skipping (no double-pass).')
90
115
  const strike = (await parallel(strikeRoster.map((a) => () =>
91
116
  agent(
92
- `You are ${a.name} (${a.domain}). GAUNTLET strike pass over ${scope}. Deep, adversarial domain review — find what discovery missed. Evidence-backed findings only (file:line + quoted line/repro).`,
93
- { label: `${a.name} · strike:${a.key}`, phase: 'Strike', schema: FINDINGS, agentType: a.id },
117
+ `You are ${a.name} (${dom(a)}). GAUNTLET strike pass over ${scope}. Deep, adversarial domain review — find what discovery missed. Evidence-backed findings only (file:line + quoted line/repro).`,
118
+ { label: `${a.name} · strike:${a.key}`, phase: 'Strike', schema: FINDINGS, agentType: a.name },
94
119
  )
95
120
  ))).filter(Boolean)
96
121
 
97
122
  // ── Dedupe across all domains (plain JS — no agent) ───────────────────────────
123
+ const SEV_RANK = { CRITICAL: 5, HIGH: 4, MEDIUM: 3, LOW: 2, WARN: 1 }
98
124
  const seen = new Map()
99
125
  for (const r of [...discovery, ...strike]) {
100
126
  for (const f of (r.findings || [])) {
101
127
  const k = key(f)
102
128
  if (!seen.has(k)) seen.set(k, { ...f, raisedBy: [r.agent] })
103
- else seen.get(k).raisedBy.push(r.agent)
129
+ else {
130
+ const ex = seen.get(k)
131
+ ex.raisedBy.push(r.agent)
132
+ // Keep the HIGHEST severity any agent assigned. First-write-wins silently
133
+ // discarded a later agent's escalation (e.g. one rates MEDIUM, another HIGH).
134
+ if ((SEV_RANK[f.severity] || 0) > (SEV_RANK[ex.severity] || 0)) ex.severity = f.severity
135
+ }
104
136
  }
105
137
  }
106
138
  const claims = [...seen.values()]
@@ -137,7 +169,7 @@ const ADVERSARIES = [
137
169
  const crossfireRaw = (await parallel(ADVERSARIES.map((a) => () =>
138
170
  agent(
139
171
  `You are ${a.name}, a GAUNTLET crossfire adversary over ${scope}. The domain review already ran — hunt NEW issues it cleared (bypasses, chaos/edge cases, exploit chains). Evidence-backed only (file:line + repro).`,
140
- { label: `${a.name} · crossfire:${a.key}`, phase: 'Crossfire', schema: FINDINGS, agentType: a.id },
172
+ { label: `${a.name} · crossfire:${a.key}`, phase: 'Crossfire', schema: FINDINGS, agentType: a.name },
141
173
  )
142
174
  ))).filter(Boolean)
143
175
  // New crossfire claims (not already confirmed) get the same one-pass refute.
@@ -148,10 +180,23 @@ const crossVerified = await parallel(crossNew.map((c) => () =>
148
180
  agent(
149
181
  `Adversarially verify (default-to-refuted) this crossfire claim, reproducing through the real execution path: "${c.title}" [${c.severity}] at ${c.file}. Evidence: ${c.evidence}.`,
150
182
  { label: `verify:crossfire:${(c.file || '').slice(0, 24)}`, phase: 'Crossfire', schema: VERDICT },
151
- ).then((v) => (v && v.survives ? { ...c, finalSeverity: v.finalSeverity } : null))
183
+ ).then((v) => ({ claim: c, verdict: v }))
152
184
  ))
153
- const crossfireConfirmed = crossVerified.filter(Boolean)
154
- log(`Crossfire: ${crossNew.length} new claims → ${crossfireConfirmed.length} confirmed.`)
185
+ const crossfireConfirmed = []
186
+ const crossfireRefuted = []
187
+ for (const cv of crossVerified.filter(Boolean)) {
188
+ const v = cv.verdict
189
+ // The VERDICT schema lets a verdict be survives:true AND finalSeverity:'REFUTED'.
190
+ // Such a claim used to be kept as "confirmed" yet matched NO council severity bucket
191
+ // (bySeverity only checks CRITICAL/HIGH/MEDIUM/LOW/WARN) and silently vanished — breaking
192
+ // the "never silently dropped" invariant. Confirm ONLY a real severity; log the rest.
193
+ if (v && v.survives && v.finalSeverity && v.finalSeverity !== 'REFUTED') {
194
+ crossfireConfirmed.push({ ...cv.claim, finalSeverity: v.finalSeverity })
195
+ } else {
196
+ crossfireRefuted.push({ title: cv.claim.title, finalSeverity: (v && v.finalSeverity) || 'UNVERIFIED', why: (v && v.rationale) || 'verifier returned no verdict' })
197
+ }
198
+ }
199
+ log(`Crossfire: ${crossNew.length} new claims → ${crossfireConfirmed.length} confirmed, ${crossfireRefuted.length} refuted/unverified (logged, dropped).`)
155
200
 
156
201
  // ── Round 5: Council — synthesize survivors by severity (JS; lead applies fixes) ─
157
202
  phase('Council')
@@ -165,12 +210,14 @@ const report = {
165
210
  confirmed: confirmed.length,
166
211
  refuted: refuted.length,
167
212
  crossfireConfirmed: crossfireConfirmed.length,
213
+ crossfireRefuted: crossfireRefuted.length,
168
214
  },
169
215
  critical: bySeverity('CRITICAL'),
170
216
  high: bySeverity('HIGH'),
171
217
  medium: bySeverity('MEDIUM'),
172
218
  low: [...bySeverity('LOW'), ...bySeverity('WARN')],
173
219
  refutedLog: refuted, // dropped, but never silently — logged per SUB_AGENTS.md
220
+ crossfireRefutedLog: crossfireRefuted, // ditto for crossfire claims that failed the one-pass verify
174
221
  }
175
222
  log(`Council: ${report.critical.length} Critical · ${report.high.length} High · ${report.medium.length} Medium · ${report.low.length} Low/Warn. Lead applies fixes (workflow takes no mid-run input), then re-runs to re-verify.`)
176
223
  return report
package/dist/CHANGELOG.md CHANGED
@@ -6,6 +6,79 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/), and this
6
6
 
7
7
  ---
8
8
 
9
+ ## [23.20.0] - 2026-06-23
10
+
11
+ ### Triaged 12 upstream field reports → methodology hardening, + `/seal` and `/contextmeter`
12
+
13
+ Ran `/debrief --inbox` over the 12 open field reports (#364–#378), applied every accepted fix across ~41 method docs / agents / patterns / commands (one applier per file, diff-coverage verified), implemented the two wizard-code reports, and shipped two new commands. All 12 issues closed. Build clean, full suite 1392 → 1420.
14
+
15
+ The recurring theme across the reports: **green static gates (build, unit tests, denylists, ADR claims, status codes, declared coverage) keep passing while the real runtime / scale / auth / external path is never exercised.**
16
+
17
+ ### Added
18
+
19
+ - **`/seal`** — session-closeout ritual: `/git` commit → push → `/debrief --submit` → `/vault --seal`, then always prints the next-session handoff prompt. Fail-safe pipeline (a test failure halts before push but still seals a vault recording the blocked state). Thin orchestrator — no new persona.
20
+ - **`/contextmeter`** (Ducem Barr) — context-budget meter. A status line rendering a colored context-usage meter (green → yellow → red; yellow 80%, red 92%) plus a `UserPromptSubmit` hook that injects remaining-context awareness into Claude itself once usage crosses the threshold, so it can `/vault`/`/seal` before compaction. **Default-on:** `init` auto-wires it (new `mergeStatuslineSettings`, mirrors the surfer-gate hook merge; sets `statusLine` only if absent, appends the hook idempotently). Named to avoid the native `/statusline`/`/context` collision (logged in `NATIVE_CAPABILITIES.md`). `scripts/statusline/` wired through all four distribution paths + the npm `files` allowlist (LRN-11).
21
+ - **`docs/patterns/post-deploy-probe.sh`** — deploy probe asserting response content + Content-Type, not HTTP status only (defeats the SPA catch-all false-positive). [#371]
22
+ - **`docs/patterns/exclusion-set-invariant.md`** — superset invariant across `.gitignore` / rsync / secret-scanner exclusion sets. [#377]
23
+
24
+ ### Changed — methodology hardening (#364–#378)
25
+
26
+ - **QA / Testing** [#378/#373/#365] — throughput/scale gate for per-row network stages; partial/edge-state smoke; drift-guard shared check-fn + CI-wiring proof.
27
+ - **Architecture** [#378/#373/#376] — ADR concurrency-claim verification gate; ADRs record provider-doc-verified token lifecycle; `architect` Step 4.6 extended to verify EXTERNAL platform claims vs live docs.
28
+ - **Security** [#378/#377] — PII export-format `.gitignore`; denylist = tripwire vs authoritative-boundary (prove reachability before escalating to CRITICAL).
29
+ - **DevOps** [#377/#365/#364] — production-path tracer for arming gates; live contrastive systemd/sandbox smoke; promote gate verifies deployed-commit == branch HEAD.
30
+ - **Gauntlet / Campaign** [#377/#373/#365/#371] — live runtime assertion on fix-batch acceptance; dark-flag activation gated on review (not deploy); declared-vs-implemented reconciliation lens; FR-A5 HTTP two-principal isolation + planted-uid red-check.
31
+ - **Frontend / UX** [#376/#375] — anti-generic originality gate (justify-or-reject named defaults); render-gate coverage (every changed-prop surface, both auth states).
32
+ - **Build / Release** [#376/#366/#364/#375] — "adversarially verify the as-implemented diff" step; distribution-validator rule; pre-integration web-verification; removal sweep.
33
+ - **AI safety** [#378/#364] — deny-list discipline (negation-adjacency / proper-noun allowlist / NFKC / independent eval); LLM per-token cost-constant staleness.
34
+ - **Sub-agents / Workflows** [#378/#377/#371/#366] — orchestrator owns dedup + dispatch; verify the empirical premise of a severity rating; auth-mechanism → client-gate-migration audit; mktemp scratch rule; WORKFLOWS recovery subsection.
35
+ - **Forge artist currency** [#367] — removed the retired DALL-E 3 HD provider row; gpt-image-1 throughout.
36
+ - **Wizard `update`** [#368/#369] — non-destructive CLAUDE.md merge (new `claude-md-strategy.ts`: preserve + side-file / sentinel-fence merge / skip; marker `claudeMd` field; `--help` guard; section-loss warning), replacing the destructive "preserve first 10 lines, overwrite the rest" clobber; legacy-consumer marker detection (offer to create the marker instead of erroring to `init`).
37
+
38
+ ### Fixed
39
+
40
+ - **Silver Surfer gate `/debrief` gap** [#366-F4] — `/debrief` and other fixed-roster non-review commands take a `--light` / `--solo` bypass before launching sub-agents (the hook blocks *every* non-Surfer Agent launch regardless of the gated-commands list). Documented in `CLAUDE.md` + `debrief.md` + `FIELD_MEDIC.md`, including the live-observed `bypass.sh` stale-pointer bug + re-run workaround.
41
+ - **`gauntlet.workflow.js` dedup** [#366-F6] — strip the repo-root prefix from a finding's path before keying, so the same finding raised by two agents dedupes.
42
+
43
+ Dep `^23.19.0` → `^23.20.0`.
44
+
45
+ ---
46
+
47
+ ## [23.19.0] - 2026-06-13
48
+
49
+ ### Gauntlet acceptance test → 14 fixes
50
+
51
+ The ADR-067 workflow re-platform validated by running `gauntlet.workflow.js` live on the v23.13–v23.18 platform code (a 10-agent Surfer roster fanned to 347 agents → 99 distinct claims → 66 confirmed + 24 crossfire, **0 Critical**). The acceptance test passed *and* surfaced real bugs; the 3-lens-confirmed High/Medium findings are fixed here. (An earlier run crashed the machine ~44 min in at peak load; the relaunch completed cleanly — the workflow is resumable via `resumeFromRunId`.)
52
+
53
+ ### Fixed — Silver Surfer gate (security/correctness)
54
+
55
+ - **`_paths.sh` reap could delete the entire `sessions/` tree.** `find "$SESSIONS_DIR" -maxdepth 1 -type d -mmin +60` matches the search root itself; without `-mindepth 1` a stale `sessions/` root mtime made the sweep `rm -rf` every live session's roster + bypass flag. Added `-mindepth 1`.
56
+ - **Reap-vs-fresh-roster/bypass race (the one VERSION.md flagged for a field report).** The TTL refresh touched the roster *file* but the reaper keys on the session *directory* mtime (touching a child never bumps the parent). `check.sh` now touches `$SESSION_DIR` on every roster/bypass ALLOW, `bypass.sh` touches it on write, and the reap threshold (+120m) is held strictly above the roster TTL (3600s) — an active session is never reaped while still valid.
57
+ - **Gate silently broke on Alpine/minimal Linux:** `_paths.sh` hashed paths with `shasum` only (Perl, absent there). Added a `sha256sum` fallback via a shared `surfer_gate_repo_hash` helper.
58
+ - **`bypass.sh` run before the first hook fire was a silent no-op** (the documented orchestrator order). It now records a repo-scoped *pending* bypass that `check.sh` promotes to the session flag on the first fire.
59
+
60
+ ### Fixed — Dynamic Workflow scripts
61
+
62
+ - **Strike phase double-ran the roster** when ≤5 agents (the default/`--light` core-leads set): `strikeRoster` fell back to the full roster, re-running discovery agents under a "find what discovery missed" prompt. Now empty when there are no specialists.
63
+ - **Crossfire `survives:true` + `finalSeverity:'REFUTED'` verdicts silently vanished** — kept as "confirmed" yet matched no council severity bucket. Now excluded and logged in `crossfireRefutedLog`, honoring the never-silently-dropped invariant.
64
+ - **Dedup kept the first raiser's severity**, discarding a later agent's higher rating. Now keeps the max severity and tracks `raisedBy` (gauntlet + assemble-review). assemble-review also gains a `refutedLog`.
65
+ - **Unguarded `JSON.parse(args)`** could crash the whole run before phase 1; now falls back to defaults. Undefined-`domain` roster items no longer render literal `undefined` in prompts.
66
+
67
+ ### Fixed — distribution & CI
68
+
69
+ - **`npx voidforge-build init` now copies `.claude/workflows/`** (+ `AGENT_CLASSIFICATION.md`). gauntlet.md/assemble.md referenced workflow scripts that the CLI init path never shipped — v23.18.0 only patched the prepack/dist paths, not `project-init.ts`.
70
+ - **`npx voidforge-build update` now propagates `.claude/workflows` + `scripts/surfer-gate`** — both were absent from the updater's diff list, so existing projects never received workflow or gate fixes.
71
+ - **`publish.yml`:** `recover-partial` derives the version from `package.json`, not `github.ref_name` (a branch name on `workflow_dispatch`, which mis-targeted `npm deprecate`); the Playwright cache key keys on the committed manifests instead of the deleted-and-regenerated lockfile (was a permanent cache miss).
72
+
73
+ ### Added / Changed
74
+
75
+ - **`scripts/validate-workflows.sh`** (+ `npm run validate:workflows`, wired into `pretest`): a real syntax gate for `.workflow.js` scripts. Their top-level `await`/`return` make a bare `node --check` fail ("Illegal return statement"), so the v23.18.0 "passes node --check" claim was inaccurate; the validator reproduces the runtime's async wrapper before checking, catching syntax errors before they ship.
76
+ - **`WORKFLOWS.md`** example corrected (`agentType: a.id` → `a.name`) + two new gotchas (agentType resolves by the `name:` display field; how to validate workflow scripts). Stale pre-ADR-060 `/tmp/voidforge-*` paths fixed in the gate `README.md` and `CLAUDE.md`.
77
+
78
+ ### Validation
79
+
80
+ Gate suite **23 → 27** (added reap-root-preservation + pending-bypass cases). Full suite **1390 → 1392** (added init-copies-workflows + updater-tracks-workflows/gate regression tests). typecheck clean. Deferred as field-report candidates: concurrent same-repo pointer collision, `workflow_dispatch` branch guard. Dep `^23.18.0` → `^23.19.0` (ADR-062).
81
+
9
82
  ## [23.18.0] - 2026-06-13
10
83
 
11
84
  ### Workflow re-platform of `/gauntlet` + `/assemble` (ADR-067)
package/dist/CLAUDE.md CHANGED
@@ -33,13 +33,17 @@ ADR-051 enforces this gate at the hook level (PreToolUse). The prose below is th
33
33
 
34
34
  **Hook enforcement (ADR-051 Phase 5b — live as of v23.8.14; state relocated per ADR-060 in v23.8.18).** A `PreToolUse` hook on the **Agent and Workflow tools** (`scripts/surfer-gate/check.sh`; Workflow added per ADR-064) blocks any sub-agent or workflow launch that isn't the Silver Surfer itself, unless a roster has been recorded for this session or a bypass flag is set. State lives at `$XDG_RUNTIME_DIR/voidforge-gate/` (Linux) or `$HOME/.voidforge/gate/` (macOS fallback) — per-user, `0700`. This is the permanent enforcement mechanism. The prose above is a human-readable backup. **Workflow launches are gated identically (ADR-064):** a workflow run requires a recorded roster or a `--light`/`--solo` bypass — workflow-spawned sub-agents are invisible to the per-Agent hook, so gating the launch is what closes that bypass. Build/apply/research workflows that aren't review rosters should set a bypass.
35
35
 
36
+ **Non-review commands with a fixed roster take the bypass, NOT a Surfer muster (#366 F4).** A command like `/debrief` is NOT in the gated-commands list above — but the hook blocks *every* non-Surfer Agent launch regardless of the list, so its command-prescribed sub-agents (Ezri/O'Brien/Nog/Jake) get blocked too. The fix: any fixed-roster, non-review pipeline runs `[ -x scripts/surfer-gate/bypass.sh ] && bash scripts/surfer-gate/bypass.sh --light || true` BEFORE launching its sub-agents. Its roster is command-prescribed, not cherry-picked, so the gate's anti-cherry-pick purpose doesn't apply — the bypass is correct, not a workaround. (The gated list governs *which commands must muster the Surfer*; it does not exempt unlisted commands from the hook.)
37
+
38
+ **Known gate bug — stale session pointer (#366 F4, live-observed).** The repo's session pointer can point at a *dead* session (a prior `/clear`ed or crashed session whose dir still exists). When it does, `bypass.sh` writes the flag to that stale session's dir — the WRONG one — and the live session's launch still blocks. The first blocked `check.sh` fire repoints the pointer to the LIVE session. **Workaround:** re-run the exact same `bash scripts/surfer-gate/bypass.sh --light` line once after the first blocked fire; the second write lands in the now-correct live session dir and the launch proceeds. (Tracked for a real fix: `bypass.sh` should detect a stale pointer rather than rely on a re-run.)
39
+
36
40
  **Orchestrator contract** (you run these Bash commands at the right moments — wrap each in an existence guard so projects on older methodology versions don't error):
37
41
 
38
42
  1. After the Silver Surfer sub-agent returns its roster, and before launching any other Agent: `[ -x scripts/surfer-gate/record-roster.sh ] && bash scripts/surfer-gate/record-roster.sh || true` (optionally pass the roster JSON as the first argument for audit). The existence guard is a defensive no-op for projects that predate v23.10.0 — when the gate started shipping via the npm methodology package per #317.
39
43
  2. When the user's command includes `--light` or `--solo`, BEFORE launching the Surfer or any other agent: `[ -x scripts/surfer-gate/bypass.sh ] && bash scripts/surfer-gate/bypass.sh --light || true` (or `--solo`). **Fails closed on unknown flag values** (ADR-060 v23.8.18 hardening, SEC-003) — passing anything other than `--light` or `--solo` exits 2 with an error. No silent bypass.
40
44
  3. **Normalize roster names before dispatch** (field report #345, DEAL-001). The Silver Surfer returns agent names from a Haiku pre-scan, which can drift from the actual filenames in `.claude/agents/` (extra `silver-surfer-` prefix, a stray `.md` suffix, a hyphen/underscore mismatch). Before you launch, validate each name against `ls .claude/agents/`: if it matches a file (with or without the `.md` extension), keep it; if not, attempt exactly one correction — strip a known prefix/suffix or normalize separators — and re-check. If it still doesn't resolve, DROP that single name and proceed with the rest of the roster. Never block the whole dispatch over one unresolved name; log the dropped name so the Herald roster can be corrected upstream. The gate's job is to enforce *that* a roster ran, not to fail the run over a typo in *one* name.
41
45
 
42
- If `scripts/surfer-gate/check.sh` exists but you skip step 1, your first non-Surfer Agent call in that turn will be blocked with a clear message and your own log line in `/tmp/voidforge-session-$SESSION_ID/gate.log`. You are expected to comply with the block (launch Surfer / run record-roster), not to fight it. If the script does not exist, your project predates v23.10.0; pull the gate from `tmcleod3/voidforge:scripts/surfer-gate/` and merge `settings-snippet.json` into `.claude/settings.json`, or re-run `npx voidforge-build init` against the methodology source.
46
+ If `scripts/surfer-gate/check.sh` exists but you skip step 1, your first non-Surfer Agent call in that turn will be blocked with a clear message and your own log line in the gate's session log (`<gate-root>/sessions/$SESSION_ID/gate.log`, where `<gate-root>` is `$XDG_RUNTIME_DIR/voidforge-gate` or `$HOME/.voidforge/gate` per ADR-060 — the old `/tmp/voidforge-session-*` path was retired in v23.8.18). You are expected to comply with the block (launch Surfer / run record-roster), not to fight it. If the script does not exist, your project predates v23.10.0; pull the gate from `tmcleod3/voidforge:scripts/surfer-gate/` and merge `settings-snippet.json` into `.claude/settings.json`, or re-run `npx voidforge-build init` against the methodology source.
43
47
 
44
48
  **Why.** Seven field incidents (logged in `.claude/agents/silver-surfer-herald.md`) document the cost of skipping: the orchestrator cannot predict cross-domain relevance from the command name alone. The hook makes skipping mechanically impossible for non-bypass cases. Launch the Surfer. Every time.
45
49
 
@@ -130,6 +134,8 @@ Reference implementations in `/docs/patterns/`. Match these shapes when writing.
130
134
  - `nginx-vhost.conf` — Cloudflare-Flexible-safe vhost template: security headers, ACME http-01 passthrough, no redirect loop behind CF's flexible SSL (field report #351, #344)
131
135
  - `error-message-categorization.tsx` — Categorize errors at the UI boundary (network / auth / validation / server / unknown) before choosing copy, so users see actionable messages not raw internals (field report #351, #343)
132
136
  - `codemod-hygiene.md` — after a jscodeshift/recast codemod, strip incidental reformatting so the diff shows only the semantic change (field report #357)
137
+ - `post-deploy-probe.sh` — deploy probe that asserts response content + Content-Type, not HTTP status only, so an SPA catch-all serving index.html for every path can't false-pass into a rollback (field report #371)
138
+ - `exclusion-set-invariant.md` — superset invariant for multi-mechanism exclusion sets: one canonical secret/PII set with `.gitignore` / rsync / scanner derived from it (or a CI assertion) so the three never drift (field report #377)
133
139
 
134
140
  ## Slash Commands
135
141
 
@@ -166,6 +172,8 @@ Reference implementations in `/docs/patterns/`. Match these shapes when writing.
166
172
  | `/portfolio` | Steris's cross-project financials — aggregated spend/revenue, portfolio optimization | Full |
167
173
  | `/ai` | Seldon's AI Intelligence Audit — model selection, prompts, tool-use, orchestration, safety, evals | All |
168
174
  | `/vault` | Seldon's Time Vault — distill session intelligence into portable briefing for session handoff | All |
175
+ | `/seal` | Session closeout ritual — orchestrates `/git` commit → `/git` push → `/debrief --submit` → `/vault --seal`, then always prints the copy-paste next-session handoff prompt | All |
176
+ | `/contextmeter` | Ducem Barr's context budget meter — installs a context-usage status line (colored meter) + a `UserPromptSubmit` hook that warns Claude itself as the window fills (named to avoid the native `/statusline`/`/context` collision) | All |
169
177
 
170
178
  **Tier key:** `All` = works everywhere. `Full` = requires the wizard server (`packages/voidforge/wizard/server.ts`). Full-tier commands offer to install the wizard if not present.
171
179
 
package/dist/HOLOCRON.md CHANGED
@@ -96,7 +96,7 @@ Or manually: copy CLAUDE.md, .claude/, and docs/ from the npm package into your
96
96
 
97
97
  Every tier includes:
98
98
  - **CLAUDE.md** — Root context loaded at every session start
99
- - **30 slash commands** (28 primary + 2 permanent aliases: `/review` → `/engage`, `/security` → `/sentinel`) — `/prd`, `/blueprint`, `/build`, `/qa`, `/test`, `/sentinel`, `/ux`, `/engage`, `/deploy`, `/devops`, `/architect`, `/assess`, `/git`, `/void`, `/vault`, `/thumper`, `/assemble`, `/gauntlet`, `/campaign`, `/imagine`, `/debrief`, `/dangerroom`, `/cultivation`, `/grow`, `/current`, `/treasury`, `/portfolio`, `/ai`. Run `ls .claude/commands/*.md | wc -l` for the live file count.
99
+ - **33 slash commands** (31 primary + 2 permanent aliases: `/review` → `/engage`, `/security` → `/sentinel`) — `/prd`, `/blueprint`, `/build`, `/qa`, `/test`, `/sentinel`, `/ux`, `/engage`, `/deploy`, `/devops`, `/architect`, `/assess`, `/git`, `/void`, `/vault`, `/seal`, `/contextmeter`, `/thumper`, `/assemble`, `/gauntlet`, `/campaign`, `/imagine`, `/debrief`, `/audit-docs`, `/dangerroom`, `/cultivation`, `/grow`, `/current`, `/treasury`, `/portfolio`, `/ai`. Run `ls .claude/commands/*.md | wc -l` for the live file count.
100
100
  - **13-phase build protocol** — PRD to production with verification gates
101
101
  - **18 specialist agent protocols** — Each lead has behavioral directives and a sub-agent roster
102
102
  - **Named characters** — From Tolkien, Marvel, DC, Star Wars, Star Trek, Dune, Anime, Cosmere, and Foundation — each materialized as a subagent definition in `.claude/agents/`
@@ -620,9 +620,23 @@ Seldon distills session intelligence into a portable briefing. The Time Vault pr
620
620
 
621
621
  Flags: `--seal` (auto-confirm), `--open` (read most recent vault), `--list` (list all vaults), `--for <target>` (tailor for `campaign`, `colleague`, or `trigger`).
622
622
 
623
+ #### `/seal` — Session Closeout Ritual
624
+ **When:** You're done for the session and want to ship, report, preserve, and hand off in one move.
625
+
626
+ `/seal` is a thin conductor — it runs no new persona, it sequences three you already know: Coulson ships the release (`/git` commit + push), Bashir files the field report upstream (`/debrief --submit`), and Seldon seals the vault (`/vault --seal`). It always ends by printing the copy-paste pickup prompt that boots the next session. The order is deliberate: commit and push first so the report and vault describe the final state; debrief before vault so the vault folds in the debrief's learnings. It short-circuits safely — a failing test suite halts the pipeline before push (you don't ship a broken build or file a success report on it) but still seals a vault recording the blocked state, so the next session resumes on the right foot.
627
+
628
+ Flags: `--dry-run` (preview every stage), `--yes` (no confirmation pause), `--no-push`, `--no-submit`, `--no-debrief`, and `/git` pass-throughs (`--major`/`--minor`/`--patch`/`--no-tag`).
629
+
630
+ #### `/contextmeter` — Context Budget Meter
631
+ **When:** It's installed **by default** on `init` (warn 80% / crit 92%) — run this command only to retune thresholds, re-install on an older project, or uninstall.
632
+
633
+ Installs two small scripts: a **status line** that renders a colored context meter (`⟦████████░░⟧ 78% ctx · 44k left`, green → yellow → red as it fills) for you, and a **`UserPromptSubmit` hook** that injects "you have ~X% context left, checkpoint soon" into Claude's own context once usage crosses the threshold — so the model can `/vault` or `/seal` before compaction instead of being caught out. The meter's colors and the hook's warnings share the same thresholds (yellow/warn 80%, red/critical 92%). The status line reads Claude Code's native `context_window` field (with a transcript fallback); the hook derives usage from the transcript. Named `/contextmeter` rather than `/statusline` because the native `/statusline` and `/context` commands always shadow a same-named project command (`docs/NATIVE_CAPABILITIES.md`). Requires `jq`.
634
+
635
+ Flags: `--warn-pct N` / `--crit-pct N` (hook thresholds), `--window N` (fallback denominator), `--status`, `--uninstall`, `--dry-run`.
636
+
623
637
  ### Flag System
624
638
 
625
- VoidForge flags are standardized across all 30 commands. Same flag = same meaning everywhere.
639
+ VoidForge flags are standardized across all 33 commands. Same flag = same meaning everywhere.
626
640
 
627
641
  **Tier 1 — Universal:** `--resume` (resume from state), `--plan` (plan without executing), `--fast` (reduced review passes), `--dry-run` (preview without doing), `--status` (show state), `--blitz` (autonomous, no pauses)
628
642
 
package/dist/VERSION.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Version
2
2
 
3
- **Current:** 23.18.0
3
+ **Current:** 23.20.0
4
4
 
5
5
  ## Versioning Scheme
6
6
 
@@ -14,6 +14,8 @@ This project uses [Semantic Versioning](https://semver.org/):
14
14
 
15
15
  | Version | Date | Summary |
16
16
  |---------|------|---------|
17
+ | 23.20.0 | 2026-06-23 | **Triaged 12 upstream field reports (#364–#378) → methodology hardening, + `/seal` and `/contextmeter`.** Applied every accepted fix across ~41 method docs / agents / patterns / commands (throughput/scale gates, ADR concurrency verification, deny-list discipline, runtime-path tracers, render-gate coverage, OAuth/external-claim verification, HTTP two-principal isolation, dall-e→gpt-image-1 currency, `/debrief` gate-gap docs) + 2 new patterns (`post-deploy-probe.sh`, `exclusion-set-invariant.md`); implemented the two wizard reports — non-destructive CLAUDE.md `update` merge (#368) + legacy-marker detection (#369). **New `/seal`** — session closeout (git → debrief → vault → handoff). **New `/contextmeter`** — context-budget meter + `UserPromptSubmit` awareness hook, default-on (warn 80% / crit 92%), `scripts/statusline/` wired through all four distribution paths + npm `files`. Build clean, suite 1392→1420. Dep `^23.19.0` → `^23.20.0`. |
18
+ | 23.19.0 | 2026-06-13 | **Gauntlet acceptance test → 14 fixes (the ADR-067 re-platform, validated by running it on itself).** Ran the new `gauntlet.workflow.js` live on the v23.13–v23.18 platform code (10-agent Surfer roster → 347 agents → 99 distinct claims → 66 confirmed + 24 crossfire, 0 Critical) and fixed the 3-lens-confirmed findings. **Gate (security):** `_paths.sh` reap was missing `-mindepth 1` → could `rm -rf` the entire `sessions/` tree (every live roster/bypass); the reaper now refreshes `$SESSION_DIR` mtime on activity + threshold raised above the TTL, closing the documented reap-vs-fresh-roster/bypass race; `shasum`→`sha256sum` fallback (gate silently broke on Alpine); `bypass.sh` run before the first hook fire now records a repo-scoped *pending* bypass that `check.sh` promotes (was a silent no-op). **Workflows:** strike no longer re-runs the same ≤5-agent roster twice; crossfire `survives:true+REFUTED` verdicts no longer vanish into no bucket (logged in `crossfireRefutedLog`); dedup keeps the **highest** severity + `raisedBy` (was first-write-wins); guarded `JSON.parse(args)`; undefined-domain prompt guard. **Distribution:** `npx voidforge-build init` now copies `.claude/workflows/` + `AGENT_CLASSIFICATION.md`; `update` now propagates `.claude/workflows` + `scripts/surfer-gate` (both were stranded). **Validation:** new `scripts/validate-workflows.sh` (wraps the runtime shape, then `node --check`) wired into `pretest` — corrects the false "scripts pass `node --check`" claim and gates syntax errors from shipping. **Docs:** `WORKFLOWS.md` example `agentType: a.id`→`a.name` + new gotchas; stale `/tmp/voidforge-*` paths fixed in gate README + CLAUDE.md (ADR-060). **CI:** `recover-partial` derives the version from `package.json` not `github.ref_name` (broke on dispatch); Playwright cache key off the committed manifests not the regenerated lockfile. Gate suite 23→27, full suite 1390→1392. Deferred (field-report candidates): concurrent same-repo pointer collision, `workflow_dispatch` branch guard. Dep `^23.18.0` → `^23.19.0`. |
17
19
  | 23.18.0 | 2026-06-13 | **Workflow re-platform of `/gauntlet` + `/assemble` (ADR-067)** — the opportunity ADR-064 unblocked. New `.claude/workflows/gauntlet.workflow.js` (discovery → JS dedupe → 3-lens adversarial REFUTE → crossfire → council, schema-validated) and `assemble-review.workflow.js` (engage+sentinel over a mission diff; build/arch/devops stay prose). New **`docs/methods/WORKFLOWS.md`** authoring standard (API, the #348/#363 gotchas, 16/1000 caps, and the ADR-064 gate-launch sequence: Surfer→record-roster→Workflow). `gauntlet.md`/`assemble.md` gain workflow-execution sections; personas + fix-application + Debate Protocol stay prose. **Distribution gate (Phase 12.75):** `.claude/workflows/` is a new shared category — added to `prepack.sh` (npm) + `copy-assets.sh` (init) so the scripts ship to consumers. Both scripts `node --check`-validated (ESM async-wrapped); the live end-to-end gauntlet run is the acceptance test. Dep `^23.17.0` → `^23.18.0`. |
18
20
  | 23.17.0 | 2026-06-13 | **Effort-tiering fleet edit (ADR-054) — verified + applied.** Verified against the official Claude Code sub-agents docs that `effort` is a supported sub-agent frontmatter field (values `low`/`medium`/`high`/`xhigh`/`max`; "available levels depend on the model"). Applied across all 264 agent definitions: **20 leads (`model: inherit`) → `effort: xhigh`, 201 Sonnet specialists → `effort: medium`, 43 Haiku scouts → omitted** (Haiku doesn't support the parameter). Per-agent reasoning-spend lever, independent of model tier — the largest cost lever in the fleet (200 specialists no longer pay lead-level reasoning for read-and-report review). Frontmatter-only, idempotent insert after the `model:` line; `validate-agent-refs` + full suite (1390/1390) green; integrity preserved. Closes the M2 deferral from v23.16.0. Updated ADR-054 (status→fleet-applied), SUB_AGENTS.md, COMPATIBILITY.md. Dep `^23.16.0` → `^23.17.0`. (Aside: confirmed the v23.16.0 gate fix live — a Workflow launch was correctly BLOCKED this session until a `--light` bypass was set; noted a reap-vs-fresh-bypass timing race for a future field report.) |
19
21
  | 23.16.0 | 2026-06-13 | Platform-alignment campaign — implements the ADR set from `/architect --plan` (→ `/campaign`). **ADR-064 (gate↔Workflow) IMPLEMENTED:** the Silver Surfer `PreToolUse` hook matcher is now `Agent\|Workflow` and `check.sh` gates the **Workflow tool launch** on a recorded roster (closes the proven bypass where workflow-spawned agents skipped the gate); `test.sh` gains 3 Workflow cases (**23/23**), mirrored to the methodology package. **Behavior change:** a Workflow run now requires a recorded roster or a `--light`/`--solo` bypass — build/apply/research workflows should set a bypass. **ADR-065 (platform floor):** `docs/COMPATIBILITY.md` gains a Claude Code platform-floor + per-feature maturity table; informational `claudeCodeFloor` field; semver rule (raising the floor = breaking) + release-checklist item. **ADR-066 (native-capability tracker):** new `docs/NATIVE_CAPABILITIES.md` audits all commands vs native skills with dispositions (`/qa`,`/test` coexist+document; `/git` keep) — realizes ADR-050's deferred follow-up; release re-audit item added. Amended **ADR-051** (workflow-exemption→closure), **ADR-054** (effort tiers + Haiku 200K/no-effort). M2 effort fleet-edit deferred per ADR-054 precondition (runtime `effort:`-frontmatter honoring unverified). Dep `^23.15.0` → `^23.16.0`. |
@@ -143,6 +143,7 @@ Run sequentially — each builds on findings from parallel phase:
143
143
  - Are system prompts deduplicated across requests?
144
144
  - Is streaming used where appropriate? (Time to first token)
145
145
  - Estimated monthly cost at projected volume?
146
+ - **Are hardcoded per-token cost constants verified against CURRENT provider pricing?** Per-token LLM rates are a STALENESS LIABILITY: models get retired and repriced, so a constant that was right at build time silently rots. Whenever touching cost-tracking or cost-cap code, verify every per-token rate against the provider's live pricing — do not trust the value in the repo, the PRD, or a prior vault. A stale rate mis-records COGS and mis-sets margin guards: field report #364 found Opus hardcoded at $15/$75 per 1M tokens against an actual current $5/$25 — a 3× over-statement that inflated every recorded generation cost and set AI-cost caps *above* subscription revenue (a live margin leak). (Field report #364)
146
147
 
147
148
  **Bayta Darell (Evaluation):** Quality measurement.
148
149
  - Does an eval exist for each AI component?
@@ -258,6 +259,8 @@ If issues found, return to Phase 3. Maximum 2 iterations.
258
259
  - [ ] Human review process for edge cases
259
260
  - [ ] LIVE eval layer runs against the real model and passes before launch (sandbox layer alone cannot catch model-output-shape bugs) (field report #352, #4)
260
261
  - [ ] Model output normalized null-to-undefined before Zod `.optional()` validation (field report #352, #4)
262
+ - [ ] Safety-eval leak-detector is INDEPENDENT of the production deny-list/filter — it does not re-import the filter's own banned-terms or regex. Reusing the filter to test the filter is tautological: every term the filter misses, the eval also misses, so the eval reports PASS on a real leak. Build the leak-detector from a separate oracle (hand-curated banned-phrase set, second model / LLM-judge, or human labels). (Field report #378)
263
+ - [ ] Safety eval includes adversarial cases for all three deny-list false-fire classes: NEGATION ("no accreditation evidence" must PASS — it's the safe answer), PROPER-NOUN (a contact at "Visa" / a fund named "Trust Fund" must not flag on the substring), and HOMOGLYPH / zero-width evasion ("аccredited" with a Cyrillic 'а', or a zero-width-split token, must still be CAUGHT after NFKC normalization). (Field report #378)
261
264
 
262
265
  ### AI Gate Bootstrapping (Cold-Start Problem)
263
266
  AI-gated approval systems have a cold-start problem: no historical outcomes -> gate rejects all requests -> no operations -> no outcomes. During the first N decisions (configurable, default 20), the gate should approve at reduced size (0.5-0.7x normal) to build a track record. The gate should never reject solely because "no historical data exists." Include explicit prompt guidance: "Lack of history is not a reason to reject — approve at reduced size to build the track record." (Field report #152)
@@ -127,6 +127,18 @@ Verify no circular calls between store actions and API methods. Specifically che
127
127
 
128
128
  When a feature is added to one surface (API, dashboard, CLI, marketing site), verify all other surfaces displaying the same entities are updated. A new field added to the API response but missing from the dashboard table, or a new tier added to the pricing page but missing from the settings panel, creates an inconsistent product. After each pipeline phase that adds or modifies a feature, grep for the entity name across all surfaces: API routes, React/Vue components, CLI output formatters, marketing page copy, email templates, admin panels. (Triage fix from field report batch #149-#153.)
129
129
 
130
+ ### Render-Gate Regression Coverage (Phase 2.5 smoke + Phase 6 /ux)
131
+
132
+ A green build and a green unit suite do NOT catch render-gate regressions — a removed or renamed prop can silently kill a feature while every automated gate stays green. Example: a component still gates its render on `!token`; the `token` prop is removed (now always `null`); the headline panel becomes invisible to every signed-in user — and `build` plus 97 unit tests all pass, because the compiler and the unit suite never render the gated surface. Only a browser does. (Field report #375.)
133
+
134
+ So when a pipeline change removes or renames a **prop or a shared contract**, the Phase 2.5 smoke and the Phase 6 `/ux` browser/e2e pass must:
135
+
136
+ 1. Cover **EVERY surface that consumes the changed prop/contract — not a sampled page.** Grep for the symbol; the consuming-surface list is the screenshot list, not a subset of it.
137
+ 2. Explicitly **re-check the render *gates* that key off the changed prop** — for each gate (`!token`, `prop && <Panel/>`, `if (!x) return null`), confirm the gated surface still renders after the change.
138
+ 3. Verify each changed component in **BOTH signed-in and signed-out states.**
139
+
140
+ An e2e that exercises a *different* surface than the one that changed does not satisfy the screenshot mandate — it is a coverage gap that ships a dead feature. (Field report #375: removing a browser token prop left `TelegramConnect` gated on a now-always-`null` value; the headline connect panel went invisible to every signed-in user, passed a green build + 97 unit tests, and was caught only by the review roster because the e2e exercised the Account dialog, not the changed surface.)
141
+
130
142
  ### Phase 13.5 — Doc-Currency Refresh (pre-SEAL)
131
143
 
132
144
  After the Council signs off, but BEFORE Fury seals the run and makes the Deploy Offer, sweep the project's source-of-truth docs for drift introduced over the course of the pipeline. A full `/assemble` touches architecture, features, version, and build state — by the time the Council finishes, the docs that describe the project frequently no longer match it. This mirrors the Doc-Currency Refresh mission in `CAMPAIGN.md`: same checklist, applied once at the end of the pipeline instead of once per mission. (Field report #342 F-1: `/assemble` shipped a Council-clean build whose `CLAUDE.md` Project block and `PROJECT_VERSION` line still described the pre-build scaffold.)
@@ -258,6 +258,9 @@ Grep for deferred wiring comments: `Set after`, `Wire after`, `None #`, `TODO: w
258
258
  7. Log each batch to `/logs/phase-05-features.md`
259
259
 
260
260
  **Phase 6 — Integrations.**
261
+
262
+ **MANDATORY PRE-INTEGRATION WEB-VERIFICATION (Romanoff owns; field report #364 Finding A).** BEFORE writing any external-API integration code, web-verify the provider's live docs (WebFetch) for: the CURRENT API version, deprecation/sunset notices on the endpoints you plan to call, and the auth requirements (OAuth scopes, token type, developer-token gating). Treat the PRD/vault/plan's API version, endpoint names, and auth specifics as STALE until confirmed — they rot fast across a multi-day campaign on a fast-moving platform (Google/Meta/Stripe ad & billing APIs deprecate aggressively). The post-build live smoke test below is NOT a substitute: it verifies the call you wrote works, not that you wrote against the right (non-deprecated) API. Real case (Kongo M8): a vault plan said "v17→v21, use `uploadClickConversions`" — live docs showed v24 was current, that path was blocked for the project's token 3 days out, and the correct route was a different API with a different scope and no developer token. Building blind would have wasted the whole mission and missed a hard external deadline. Log the verified version/deprecation/auth findings to the phase log before coding.
263
+
261
264
  1. Each integration: client wrapper, env vars, test mode, error handling, retry logic
262
265
  2. For async work, follow `/docs/patterns/job-queue.ts`
263
266
  3. Kenobi reviews each integration
@@ -357,6 +360,10 @@ If this build introduces a new shared file category (e.g., `.claude/agents/`, a
357
360
  6. `void.md` — listed in user-facing sync checklist
358
361
  Missing even one path means some users silently miss the feature. This gate is mandatory after any structural addition to the methodology. (Field report #297: .claude/agents/ was added to packaging but missed in 3 of 6 delivery paths.)
359
362
 
363
+ **Four-path distribution discipline (LRN-11, field report #366 F2).** Of the paths above, FOUR are the actual file-copy routes a new category must land in or it strands some installs — verify each by name: (1) `packages/methodology/scripts/prepack.sh` (the npm methodology package), (2) `packages/voidforge/scripts/copy-assets.sh` (the CLI `dist/`), (3) `packages/voidforge/wizard/lib/project-init.ts` `copyMethodology()` (`npx voidforge-build init`), (4) `packages/voidforge/wizard/lib/updater.ts` diff `dirs` (`npx voidforge-build update`). v23.18.0 added `.claude/workflows/` to (1) and (2) only, stranding `init` and `update` — three of that release's bugs trace to the omission. Grep all four for an existing sibling category (`.claude/agents`, `scripts/thumper`) and mirror it.
364
+
365
+ **Ship-with-validator rule (field report #366 F2).** A new shipped artifact TYPE must ship, in the same release, with a CI/pretest validator for it — e.g. a new `.claude/workflows/*.js` category ships with `scripts/validate-workflows.sh` wired into `pretest`, plus a regression test that init/update actually copy it. And **a release must never claim a validation it did not run.** v23.18.0 claimed "both scripts pass `node --check`" when their top-level `return`/`await` make a bare `node --check` fail — the claim was false and ran nothing. State the exact command the validator runs; if it cannot run in CI, say so — never assert a green you didn't observe.
366
+
360
367
  **Phase 13 — Launch Checklist.**
361
368
  All flows in production. SSL. Email. Payments. Analytics. Monitoring. Backups. Security headers. Legal. Performance. Mobile. Accessibility. Tests passing. **Build-time env var verification:** For every new `NEXT_PUBLIC_*` / `VITE_*` / `REACT_APP_*` reference introduced during this build, verify the variable exists in `.env` or the deploy environment. Missing build-time vars cause features to silently disappear without errors. (Field report #104) Log final status to `/logs/phase-13-launch.md`.
362
369
 
@@ -304,6 +304,8 @@ User confirms, redirects, or overrides. On confirm → Step 4.
304
304
 
305
305
  **Post-infrastructure enforcement gate:** For infrastructure campaigns (deploy targets, CI/CD, monitoring, staging environments): after the infrastructure is provisioned, run `/architect --plan` to verify workflow enforcement gates exist — not just infrastructure existence. Infrastructure without process gates is incomplete.
306
306
 
307
+ **Dark-flag activation is gated on the comprehensive REVIEW, not the deploy (field report #373 #1).** When a mission builds a feature behind a dark flag — deploy disabled, then enable via a flag flip (often paired with a contract/data migration) — the gate on the **flag flip** is the comprehensive adversarial review of the dark code, NOT the deploy of the dark code. Sequence the mission as **build dark → review the dark code (Gauntlet checkpoint or full `/assemble` review round) → flip the flag only after the review's Critical/High findings are closed.** Never flip-then-review: a feature activated before its review ships review-findable bugs to live users, and a subsequent Gauntlet finds them already in production. The activation review MUST exercise the **partial/edge interaction states the feature introduces** (partial confirm, reject-all, edit-then-act, account-switch mid-action) — a green happy-path smoke is necessary, not sufficient, because the new bugs live in the partial/edge paths. (Field report #373: a mission built an ADR dark, flipped the flag + ran a contract migration, and ONLY THEN ran the Gauntlet — which found 6 live blockers; a follow-on `/assemble` found 2 more, also live. 8 review-findable bugs shipped to prod purely from the dark→activate→review ordering.)
308
+
307
309
  **Silver Surfer gate fires at the REVIEW phase, not the solo build.** Within a mission, the gate (ADR-051 PreToolUse hook on the Agent tool) engages when Fury deploys the review/audit roster as sub-agents — NOT during the orchestrator's solo build of the mission's code. Solo-build-before-review is intentional, not a skipped gate: parallel agents editing the same tightly-coupled engine files (game loop, state machine, shared service) would clobber each other's edits and produce merge garbage. So the orchestrator builds the changeset solo, THEN the Surfer-gated review roster reads it. If you find yourself mid-build asking "did a gate get skipped?", the answer is no — the gate has not fired yet because the review phase has not started. (Field report #348 #3: mid-build confusion over an un-fired gate that fires correctly at the review phase.)
308
310
 
309
311
  ### Pre-Prod Verification: when there is no staging
@@ -459,6 +461,15 @@ Field report #322 (barrierwatch): `statistical-gate.ts` grew 425 → 775 LOC acr
459
461
 
460
462
  When a mission duplicates or extends an existing code path (adding a version-aware path alongside a legacy path, adding a new endpoint that mirrors an existing one), verify that security patterns (locking, rate limiting, validation, sanitization) from the original path are replicated in the new path. Grep for the original pattern and confirm it exists in the new code. (Field report #38: optimistic locking in legacy chat edit was not replicated to the version-aware path.)
461
463
 
464
+ ### FR-A5 Isolation Gate — HTTP-level two-principal test + planted-uid red-check
465
+
466
+ When a mission ships or touches user-scoped / multi-tenant data (the FR-A5 "two-user isolation" class — owner data must not leak across principals), the isolation test that gates the mission MUST drive the **real request entry point with two distinct credentials** — not a repository-layer test only. A test that calls the repository/store directly and asserts on its `WHERE user_id = ?` clause goes green even when the HTTP handler **hardcodes `uid`** — because the repo is never the thing that resolves the principal in production; the auth→uid wiring at the handler is. The definition-of-done for an FR-A5 isolation gate is two-part and both parts are mandatory:
467
+
468
+ 1. **HTTP-level two-principal assertion.** The test issues requests through the actual request entrypoint (route/handler/middleware stack the production server mounts) with **two distinct credentials** — e.g. owner-token vs. a second session-cookie/second-user-token — and asserts principal A cannot read or mutate principal B's resources (404/403, empty result, or write-rejected, per the project's IDOR contract). Driving the repo's WHERE clause is NOT sufficient; the test must cross the auth→uid resolution seam.
469
+ 2. **Planted-uid red-check (explicit).** The gate must include a planted-bug assertion: **hardcoding `uid = <owner>` in the handler MUST turn the test RED.** If pinning the handler's principal to a constant still passes every isolation test, the test is asserting at the wrong layer — it never exercised the auth→uid wiring and provides false confidence. State this red-check explicitly in the mission's acceptance criteria, and verify it by actually planting the constant once and confirming the failure.
470
+
471
+ (Field report #371 #1: an FR-A5 two-user isolation test written against repositories went green, yet hardcoding `uid = 1` in the HTTP handler passed ALL 48 tests — the gap survived until an HTTP-level two-principal test driving the real handler was added at a checkpoint. The repository-layer test never exercised the auth→uid wiring that the bug lived in. See the handler-entry two-principal variant in `docs/patterns/multi-tenant-property-test.*`.)
472
+
462
473
  ### Minimum Review Guarantee
463
474
 
464
475
  Even in `--fast` mode, each mission gets at least **1 review round** (not 3, but never 0). A single review catches ~80% of issues for 33% of the review cost. Zero reviews in blitz caused 7 Critical+High issues to accumulate undetected across 4 missions — all caught by the Victory Gauntlet but at much higher fix cost. (Field report #28)
@@ -328,6 +328,16 @@ If health check fails after deploy:
328
328
 
329
329
  (Field report #97: 3 campaigns of Dialog Travel code never reached production because no deploy step existed.)
330
330
 
331
+ ### Arming / go-no-go gates must run the REAL production launch path
332
+
333
+ A go/no-go gate — a "tracer bullet" that authorizes arming an autonomous component, or a deploy gate that authorizes cutover — is only meaningful if it crosses the **same production seam** the live system uses. A tracer that runs through a dev shortcut and reports CLEAN gives **false confidence**: it proves a path that production never takes.
334
+
335
+ The recurring failure: the gate executes via a dev/current-user code path — an `unsafe_run_as_current_user`-style flag, a `--dev`/`--local` launcher, the test harness's in-process invocation — and never crosses the privileged hand-off (the `sudo`/`setuid` drop, the systemd `User=`/`ExecStart`, the scheduler's spawn) that the production scheduler actually uses. It passes CLEAN while production is broken, because the broken seam was the one the tracer skipped.
336
+
337
+ **Rule:** the gate that authorizes arming or deploy must exercise the **real entrypoint, the real OS user, the real privilege drop, and the real process model** — the production seam, end to end — not a dev bypass. Same systemd unit (or the same scheduler/launcher), same environment construction, same user/privilege transition, same hand-off. If the production path drops privileges and re-execs under a service account, the tracer must too; a tracer that stays in the current user's shell is testing a different program.
338
+
339
+ **Checklist item (add to the deploy/arming go-no-go):** *"Tracer exercised the production seam — real entrypoint, real user, real privilege drop, real hand-off — not a dev/`--unsafe-current-user` shortcut."* If you cannot answer yes, the gate has not run; a CLEAN from a dev path is a false CLEAN. (Field report #377: an arming tracer ran via a current-user dev path, reported CLEAN, and gated the arming — the system's first real scheduled run then failed because the production privileged hand-off it had skipped was broken.)
340
+
331
341
  ## Load Testing (Pre-Launch)
332
342
 
333
343
  **When to load test:**
@@ -396,6 +406,29 @@ When staging and production coexist on the same server, enforce full isolation:
396
406
 
397
407
  Convention isn't enough — enforcement is. The pre-push hook is the single most effective protection. (Field report #241: 68-hour production outage from shared infrastructure.)
398
408
 
409
+ ### Promote gate must verify the staging server's DEPLOYED COMMIT == branch HEAD
410
+
411
+ A staging-first promote gate that checks only "staging branch is ahead of main" + "staging health endpoint returns 200" + "version was bumped" is **structurally blind to the one thing staging-first exists to guarantee**: that the code being promoted actually *ran on staging*. Branch-ahead proves the commit was *pushed*; health-200 proves *some* build is up. Neither proves the staging **server** is running the commit being promoted — a push-to-branch without a redeploy leaves the server lagging the branch, and "ahead + 200" both still pass. Promote at that point and you ship commits to prod that **never executed on staging** — the exact failure staging-first is built to prevent (and the same shape as the "deployed but never reloaded" stale-build outage elsewhere in this doc).
412
+
413
+ **The gate (two required parts):**
414
+
415
+ 1. **Expose the running build's commit on the health/status endpoint.** A health body of `{status, checks, responseMs}` with no commit/version field gives nothing machine-checkable to promote against. Add the deployed git SHA (or version) to the payload — this is the same build-fingerprint discipline as §Build Staleness Detection, applied to the promote decision:
416
+ ```json
417
+ { "status": "ok", "commit": "abc1234", "version": "v5.5.1", "checks": { ... } }
418
+ ```
419
+ 2. **`promote.sh` compares the staging server's reported commit against the branch HEAD being promoted, and BLOCKS on mismatch.** Health-200 + branch-ahead is necessary, not sufficient — the *server* must be running the code being promoted.
420
+ ```bash
421
+ STAGING_COMMIT="$(curl -s "https://$STAGING_HOST/api/health" | jq -r '.commit')"
422
+ BRANCH_HEAD="$(git rev-parse --short "$PROMOTE_BRANCH")"
423
+ if [ "$STAGING_COMMIT" != "$BRANCH_HEAD" ]; then
424
+ echo "PROMOTE BLOCKED: staging server runs $STAGING_COMMIT but you are promoting $BRANCH_HEAD."
425
+ echo "Redeploy staging to $BRANCH_HEAD and re-run the health check before promoting."
426
+ exit 1
427
+ fi
428
+ ```
429
+
430
+ A health-only promote gate promotes stale builds. The commit-equality check is the assertion that the thing you tested is the thing you ship. (Field report #364: a session pushed two missions to the staging *branch* without redeploying the staging *server*; "branch ahead + HTTP 200" both passed while the server lagged the branch by a full version — promoting would have shipped prod commits that never ran on staging, caught only by an operator's instinct to inspect server state.)
431
+
399
432
  ### Renaming a Linked Worktree Directory Breaks Git Silently
400
433
 
401
434
  A linked git worktree (staging worktree, release worktree) keeps **two** pointer files that must agree on the directory's path. Renaming the worktree directory with a plain `mv` orphans both, and git gives you no warning (field report #343 F2):
@@ -533,6 +566,29 @@ ReadWritePaths=/var/lib/myapp /var/log/myapp
533
566
  ```
534
567
  Note: ahead-of-time-compiled binaries (Go, Rust, statically compiled C/C++) have no JIT and **can** keep `MemoryDenyWriteExecute=true` — the restriction is specific to JIT runtimes (Node/V8, the JVM, PyPy, .NET with JIT). When a unit template is shared across services, gate MDWE on the runtime, not on the unit boilerplate.
535
568
 
569
+ ### Live contrastive smoke gate (systemd / shell / sudo / sandbox wiring)
570
+
571
+ A unit file that *parses* is not a unit that *runs*. systemd sandbox flags (`ProtectHome`, `ReadWritePaths`, `MemoryDenyWriteExecute`, `User=`, `NoNewPrivileges`), shell `set -o pipefail` interactions, `sudo`/`setuid` drops, and `exec`-replaced traps all pass every code-read and unit-lint yet fail (or silently no-op) the moment the service runs under the real hardened runtime. `systemd-analyze verify <unit>` checks syntax; it does NOT prove the service does useful work inside its own sandbox.
572
+
573
+ For any systemd/shell/sudo/sandbox wiring mission, the review gate must run a **live contrastive smoke** — prove the failure mode AND the fix *at runtime*, contrasting pass-vs-fail, not merely that the unit file is defined:
574
+
575
+ 1. **Reproduce the failure live.** Run the operation under the OLD/un-fixed sandbox config and show it actually blocks — e.g. a `systemd-run` (or `systemd-run --user`) transient unit carrying the restrictive flags demonstrates the write is denied / the process SIGTRAPs / the cadence run no-ops.
576
+ 2. **Prove the fix live.** Run the SAME operation under the NEW config and show it now succeeds.
577
+ 3. **Assert the contrast.** The gate passes only when fail→old and pass→new are both demonstrated. A unit that merely *exists* with the right flags is not proof the service runs under them.
578
+
579
+ ```bash
580
+ # Contrastive smoke: does the service actually run under its real hardened runtime?
581
+ # OLD config must BLOCK; NEW config must ALLOW. Run both; assert the contrast.
582
+ systemd-run --pty --property=ProtectHome=read-only \
583
+ --property=ReadWritePaths=/var/log/myapp \
584
+ /usr/bin/env sh -c 'echo probe > /home/svc/repo/run.log' # expect: FAILS (read-only)
585
+ systemd-run --pty --property=ProtectHome=tmpfs \
586
+ --property=ReadWritePaths=/var/log/myapp /home/svc/repo \
587
+ /usr/bin/env sh -c 'echo probe > /home/svc/repo/run.log' # expect: SUCCEEDS (path writable)
588
+ ```
589
+
590
+ This catches the class of defect that unit-green code hides: a `ProtectHome=read-only` + enumerated `ReadWritePaths` that makes a repo-root log read-only and **silently no-ops every armed cadence run**; an `OnFailure=` alert unit that `203/EXEC`-fails because its script lacks the executable bit; a secret still readable in `/proc/<pid>/environ` after an in-process `unsetenv`. None are reachable by code-reading or unit tests — only by running the service under its real runtime and watching what it does. (Field report #365: an M13 systemd sandbox passed every unit test but would have silently no-op'd every armed cadence run; reachable only by live proof, not code-reading.)
591
+
536
592
  ## Config Foot-Guns (deploy/runtime)
537
593
 
538
594
  Four recurring config traps that pass every syntax check yet break at runtime (field report #352 #5):
@@ -50,6 +50,7 @@ Transform session failures into structured, actionable field reports that improv
50
50
  4. **Categorize root causes.** Every failure is one of: methodology gap, tooling limitation, communication failure, scope issue, framework-specific bug, or external dependency.
51
51
  5. **Severity matters.** Distinguish between "this affects all users" (methodology flaw) and "this was specific to my project" (edge case).
52
52
  6. **Be actionable.** Every finding should specify: which file should change, what should be added/modified, and which agent is responsible.
53
+ 7. **Take the gate bypass before dispatching the DS9 crew (#366 F4).** Bashir's Ezri/O'Brien/Nog/Jake are a fixed, command-prescribed roster — not a cherry-picked review — so `/debrief` is NOT in the Silver Surfer gated-commands list. But the gate's `PreToolUse` hook blocks *every* non-Surfer Agent launch regardless of that list, so the crew would be blocked. BEFORE launching the first sub-agent, run `[ -x scripts/surfer-gate/bypass.sh ] && bash scripts/surfer-gate/bypass.sh --light || true`. The bypass is correct here, not a workaround: a fixed roster can't be cherry-picked, so the gate's anti-cherry-pick purpose doesn't apply. **Stale-pointer bug:** if the first sub-agent launch still blocks, the repo's session pointer was stale (pointed at a dead session) and the flag landed in the wrong session dir; re-run the same `bypass.sh --light` line once — the first blocked `check.sh` fire repoints to the live session, so the second write lands correctly. (Tracked: `bypass.sh` should detect a stale pointer instead of relying on a re-run.)
53
54
 
54
55
  ## Root Cause Categories
55
56
 
@@ -90,10 +90,9 @@ Stored at `public/images/manifest.json`:
90
90
 
91
91
  Default: OpenAI (gpt-image-1). Provider-abstracted for future extensibility.
92
92
 
93
- | Provider | Model | Per Image (HD) | Notes |
94
- |----------|-------|---------------|-------|
95
- | OpenAI | gpt-image-1 | ~$0.04 | Default. Best quality/cost ratio. |
96
- | OpenAI | DALL-E 3 HD | ~$0.08 | Higher detail, double cost. |
93
+ | Provider | Model | Per Image | Notes |
94
+ |----------|-------|-----------|-------|
95
+ | OpenAI | gpt-image-1 | ~$0.04 | Default. Best quality/cost ratio. For higher detail, raise `quality` to `high`. |
97
96
 
98
97
  ## Deliverables
99
98
 
@@ -112,6 +112,8 @@ Why default-to-refuted: across instrumented Gauntlets, **~38% of first-pass Crit
112
112
 
113
113
  > **Cross-system checkpoint is non-optional (field report #350 #4):** in a multi-mission Gauntlet, the cross-system checkpoint caught a **fix-induced Critical that a per-mission review's own fix had created** — the per-mission review verified its fix in isolation and passed it; only the whole-system pass saw the new failure mode the fix introduced. This is direct evidence that verifying a fix against the single mission that motivated it is insufficient. The Gauntlet-level refute-the-fix checkpoint stays in the protocol regardless of how green the per-mission reviews were.
114
114
 
115
+ **A fix-batch verdict must PROVE its premise — live runtime assertion for runtime-dependent fixes (field report #377 #2, #4):** Source-read + unit-test-green is NECESSARY but NOT SUFFICIENT to accept a fix whose correctness depends on **runtime state**. For any fix touching file permissions, environment variables, process lifecycle (`exec`/`trap`/signal handling), systemd sandbox (`ProtectHome`, `ReadWritePaths`, `OnFailure=`), privilege drop, or any kernel-observable state, the fix-batch gate requires a **live runtime assertion** that the fix actually takes effect at runtime — not merely that the source looks correct. The acceptance check is empirical: run the real unit/script/process and assert the post-state. Examples that "looked right" in source and passed unit tests yet failed at runtime: an `OnFailure=` alert unit that `203/EXEC`-failed because its script lacked the executable bit (assert: `systemd-run` the unit and confirm it starts); a secret still present in `/proc/<pid>/environ` after an in-process `unsetenv`/pop because the kernel exposes the exec-time env block (assert: read `/proc/<pid>/environ` of the live process); a cleanup step skipped because `exec` replaced the shell before the `trap` could fire (assert: run the script and confirm the cleanup artifact). **A verdict must prove its premise before the fix is accepted.** A severity rating or an accept/reject decision that rests on an unverified factual premise — "the secret is reachable," "the bit is set," "the trap fires" — is INADMISSIBLE until the agent ships the command that proves the premise and the command is run (e.g., prove reachability by running the env-builder before rating a secret CRITICAL). When the runtime cannot be exercised, the fix is accepted only as a code-read CONFIRM carrying the unreproduced-finding severity discount — never present a source-read as proof the runtime behavior holds. (Field report #377: three adversarial verification rounds each caught a fix that passed source-read + unit tests but did not take effect at runtime, plus a CRITICAL rated on a premise — secret reachability — that was never checked and proved false.)
116
+
115
117
  **Round 5 — The Council (convergence):**
116
118
  - Spock (Star Trek) — code quality after fixes
117
119
  - Ahsoka (Star Wars) — access control integrity
@@ -125,6 +127,10 @@ Troi also performs a **Marketing Copy Drift Check**: compare marketing page clai
125
127
 
126
128
  **Composition/wiring lens (Victory / multi-mission Gauntlet) (field report #358 #1):** Per-mission reviews are structurally blind to cross-mission composition — they only see one mission's changeset. A defect that is a property of the *assembled entry paths across all missions* (which code path is actually invoked at each armed/public entry point, what each entry *passes* vs. what the library *accepts*, and whether a security-critical default — `run_as`, eval tier, isolation flag — is set on the entry path or only deep in a module) is invisible to every per-mission review yet ships. Therefore the final holistic Gauntlet MUST dedicate at least one agent to a wiring/composition pass that: (1) enumerates every entry point (CLI, daemon, public route, scheduled job) that invokes the assembled system; (2) for each, traces what arguments/config it actually passes and reconciles them against what the library/eval gate accepts and what the safe default requires; (3) flags any entry path that omits a containment boundary the library threads internally but the entry never sets, or that injects a weaker gate (T1-only) than the full regression+isolation gate the system defines. This pass is non-negotiable and is not satisfied by green per-mission reviews. Field report #358: 12 passing reviews (10 per-mission + 2 every-4 checkpoints) all missed (a) every armed run executing as the privileged `run_as` user because it was set on the eval module but never on any entry path, and (b) both armed entry points injecting a T1-only eval that bypassed the T2/T3 gate — both caught only by the final Victory Gauntlet.
127
129
 
130
+ **Declared-vs-implemented reconciliation lens (Victory / multi-mission Gauntlet) — MANDATORY (field report #365 #4):** Phantom coverage — a flow/entry-point/capability **declared** in one mission, **implemented** (or not) in a second, and **counted as covered** by a measurement tool in a third — is structurally invisible to every per-mission review because the declaration, the implementation, and the measurement each live in a different mission's changeset. The final holistic Gauntlet MUST dedicate at least one agent to a reconciliation pass that, for the whole assembled system: (1) **enumerates everything DECLARED** — every flow registered in a registry/manifest/config, every route declared, every capability advertised in a coverage/health/status surface; (2) **enumerates everything actually IMPLEMENTED AND WIRED** — for each declared item, confirm a real handler/runner exists AND is reachable from a live entry point (not a stub, not an unimported function, not a `pass`); (3) **reconciles the two counts and flags every gap** — any declared item with no live implementation is phantom coverage; any coverage/health number that counts declared-but-unimplemented items is a false-confidence metric and is itself a finding. The reconciliation is a count, not a vibe: "registry declares N flows; runner implements M; coverage tool reports K covered — reconcile N vs M vs K and name every item in the difference." This pass is non-negotiable for any multi-mission build and is not satisfied by green per-mission reviews. (Field report #365: a flow registry (mission A) declared 14 "scripted" flows; the runner (mission B) implemented only one; the coverage tool (mission C) counted all 14 as covered — the gap was invisible to every per-mission review and caught only by the whole-system Victory Gauntlet.)
131
+
132
+ **Dark-flag activation is gated on REVIEW, not deploy (field report #373 #1):** For a feature built behind a dark flag (deployed disabled, then enabled via a flag flip + any paired contract/data migration), the comprehensive adversarial review of the dark code is the gate on the **flag flip** — NOT on the deploy of the dark code. Reviewing AFTER activation ships review-findable bugs to live users. The required ordering is **build dark → Gauntlet the dark code → flip the flag only once the review's Critical/High are closed** — never flip-then-review. The Gauntlet (or `/assemble` for a follow-on increment) that clears a dark feature for activation MUST exercise the **partial/edge interaction states the feature introduces** (partial confirm, reject-all, edit-then-act, account-switch mid-action), not only the end-to-end happy path — a green happy-path smoke is necessary, not sufficient, because the new bugs live in the partial/edge interactions. (Field report #373: an ADR was built dark, the flag flipped + a contract migration applied, and ONLY THEN the 17-agent Gauntlet ran — it found 6 confirmed blockers, including a premature batch-completion that stranded un-confirmed siblings, already LIVE; a follow-on `/assemble` found 2 more HIGH, also already live. The dark→activate→review ordering shipped 8 review-findable bugs to prod.)
133
+
128
134
  **Conditional verdict — ship-vs-enable separation (field report #358 #4):** When the Council's verdict is conditional — "safe to ship in state X but not state Y" (most commonly: safe to ship the feature GATED OFF, but NOT safe to arm/enable it) — the Council MUST NOT sign off on a bare "ship." It requires, before sign-off: (1) an **ADR that explicitly separates the two states** — what is true in the shipped-but-gated state, what must additionally hold before the enabled/armed state is safe (the open P0/P1 prerequisites), and which Gauntlet findings gate the transition; and (2) a **prerequisites runbook** enumerating the concrete, verifiable steps to move from shipped to enabled (containment boundary set on every entry path, full eval gate wired, credentials provisioned, etc.). Without this artifact, "shipped" silently reads as "fully enabled" to the next operator and a latent privileged-execution or gate-bypass gap goes live. The shipped state is only signed off once the ADR + runbook exist; the enabled state is signed off only once the runbook's prerequisites are independently verified. (Union Station's campaign wrote ADR-222 to capture exactly this separation.)
129
135
 
130
136
  **Pattern auth completeness check (Kenobi, during Rounds 2-3):** When a pattern file defines an authentication flow, verify the auth checks perform actual value verification (compare against expected, call verify functions) — not just presence checks (`!!header`, `Boolean()`). Flag `!!` or truthiness checks on auth-related headers as suspicious. (Field report #109: daemon socket auth used `!!vaultHeader` which passed for any non-empty string.)
@@ -39,6 +39,8 @@ For each of the 9 universes, evaluate which agents have relevant expertise for *
39
39
  3. Does this agent bring a unique perspective no other included agent covers? → Include
40
40
  4. Would this agent's findings be a subset of another agent's? → Exclude (dedup)
41
41
 
42
+ **The orchestrator owns this dedup and the dispatch decision.** Whether the roll comes from the Muster evaluation above or from the Silver Surfer's pre-scan, the candidate list is *advice* — the orchestrator collapses same-domain agents auditing the same artifact into one agent per distinct lens before launching, and decides which survive. A roster that returns ~5 data agents and ~6 security agents re-reading one artifact is bloat, not coverage; it wastes tokens on launch and again on re-deduping near-identical findings. The Herald advises; it never commands "launch all / do not analyze yourself." See SUB_AGENTS.md "The Orchestrator Owns Roster Dedup + Dispatch" for the full rule. (Field report #378 RC-3.)
43
+
42
44
  **Universe leads (always evaluated, included if relevant):**
43
45
 
44
46
  | Universe | Lead | Domain | Include When |