wogiflow 2.26.2 → 2.29.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/.claude/commands/wogi-bug.md +30 -0
  2. package/.claude/commands/wogi-debug-hypothesis.md +33 -0
  3. package/.claude/commands/wogi-morning.md +1 -2
  4. package/.claude/commands/wogi-review.md +31 -2
  5. package/.claude/commands/wogi-start.md +32 -0
  6. package/.claude/commands/wogi-statusline-setup.md +12 -0
  7. package/.claude/commands/wogi-story.md +3 -2
  8. package/.claude/docs/claude-code-compatibility.md +40 -0
  9. package/.claude/docs/phases/01-explore.md +2 -1
  10. package/.claude/docs/phases/03-implement.md +4 -0
  11. package/.claude/docs/phases/04-verify.md +45 -0
  12. package/.claude/rules/README.md +36 -0
  13. package/.claude/rules/_internal/worker-tool-first-turn.md +82 -0
  14. package/.claude/rules/alternative-execpolicy-toml-command-policy.md +11 -0
  15. package/.claude/rules/alternative-hand-edit-ready-json-to-register-orpha.md +11 -0
  16. package/.claude/rules/alternative-permission-ruleset-per-phase.md +11 -0
  17. package/.claude/rules/alternative-short-name.md +12 -0
  18. package/.claude/rules/alternative-wogi-flow-as-mcp-client-oauth-manager.md +11 -0
  19. package/.claude/rules/architecture/hook-three-layer.md +68 -0
  20. package/.claude/rules/dual-repo-architecture-2026-02-28.md +18 -0
  21. package/.claude/rules/github-release-workflow-2026-01-30.md +16 -0
  22. package/.claude/settings.json +1 -1
  23. package/.workflow/agents/logic-adversary.md +2 -1
  24. package/.workflow/agents/personas/README.md +48 -0
  25. package/.workflow/agents/personas/platform-rigor.md +38 -0
  26. package/.workflow/agents/personas/scale-skeptic.md +28 -0
  27. package/.workflow/agents/personas/security-hawk.md +34 -0
  28. package/.workflow/agents/personas/simplicity-champion.md +37 -0
  29. package/.workflow/agents/personas/user-advocate.md +36 -0
  30. package/.workflow/bridges/base-bridge.js +46 -23
  31. package/.workflow/templates/claude-md.hbs +44 -122
  32. package/.workflow/templates/partials/feature-dossiers.hbs +33 -0
  33. package/.workflow/templates/partials/intent-grounded-reasoning.hbs +2 -12
  34. package/.workflow/templates/partials/methodology-rules.hbs +85 -79
  35. package/.workflow/templates/tier3-dom-field-inventory.md +102 -0
  36. package/lib/fuzzy-patch.js +251 -0
  37. package/lib/installer.js +8 -0
  38. package/lib/memory-proposal-store.js +458 -0
  39. package/lib/mode-schema.js +255 -0
  40. package/lib/skill-proposal-store.js +432 -0
  41. package/lib/skill-registry.js +1 -1
  42. package/lib/wogi-claude +149 -9
  43. package/lib/wogi-claude-expect.exp +113 -76
  44. package/lib/workspace-channel-server.js +19 -0
  45. package/lib/workspace-contracts.js +1 -1
  46. package/lib/workspace-dispatch-tracking.js +144 -0
  47. package/lib/workspace-gates.js +1 -1
  48. package/lib/workspace-ipc-sqlite.js +550 -0
  49. package/lib/workspace-messages.js +92 -0
  50. package/lib/workspace-routing.js +1 -1
  51. package/lib/workspace-task-injector.js +223 -0
  52. package/lib/workspace.js +23 -0
  53. package/lib/worktree-review.js +315 -0
  54. package/package.json +2 -2
  55. package/scripts/base-workflow-step.js +1 -1
  56. package/scripts/flow +28 -4
  57. package/scripts/flow-ac-scope-preservation.js +238 -0
  58. package/scripts/flow-auto-review-worker.js +75 -0
  59. package/scripts/flow-auto-review.js +102 -0
  60. package/scripts/flow-autonomous-detector.js +118 -0
  61. package/scripts/flow-autonomous-mode.js +153 -0
  62. package/scripts/flow-best-of-n.js +1 -1
  63. package/scripts/flow-bulk-loop.js +1 -1
  64. package/scripts/flow-checkpoint.js +2 -6
  65. package/scripts/flow-community-sync.js +1 -1
  66. package/scripts/flow-completion-summary.js +176 -0
  67. package/scripts/flow-completion-truth-gate.js +343 -4
  68. package/scripts/flow-config-defaults.js +52 -5
  69. package/scripts/flow-config-loader.js +3 -2
  70. package/scripts/flow-context-compact/expander.js +1 -1
  71. package/scripts/flow-context-compact/section-extractor.js +2 -2
  72. package/scripts/flow-context-gatherer.js +1 -1
  73. package/scripts/flow-context-generator.js +1 -1
  74. package/scripts/flow-context-scoring.js +1 -1
  75. package/scripts/flow-correct.js +1 -1
  76. package/scripts/flow-correction-detector.js +3 -2
  77. package/scripts/flow-decision-authority.js +66 -15
  78. package/scripts/flow-done.js +33 -1
  79. package/scripts/flow-epic-cascade.js +171 -0
  80. package/scripts/flow-epics.js +2 -7
  81. package/scripts/flow-eval-judge.js +1 -1
  82. package/scripts/flow-eval.js +1 -1
  83. package/scripts/flow-export-scanner.js +2 -6
  84. package/scripts/flow-failure-learning.js +1 -1
  85. package/scripts/flow-feature-dossier.js +787 -0
  86. package/scripts/flow-figma-extract.js +2 -2
  87. package/scripts/flow-figma-generate.js +1 -1
  88. package/scripts/flow-gate-confidence.js +1 -1
  89. package/scripts/flow-health.js +52 -1
  90. package/scripts/flow-hooks.js +1 -1
  91. package/scripts/flow-id.js +19 -3
  92. package/scripts/flow-instruction-richness.js +1 -1
  93. package/scripts/flow-knowledge-router.js +1 -1
  94. package/scripts/flow-knowledge-sync.js +1 -1
  95. package/scripts/flow-logic-adversary.js +76 -1
  96. package/scripts/flow-logic-rules.js +380 -0
  97. package/scripts/flow-long-input.js +5 -5
  98. package/scripts/flow-memory-sync.js +1 -1
  99. package/scripts/flow-memory.js +78 -7
  100. package/scripts/flow-migrate.js +1 -1
  101. package/scripts/flow-model-caller.js +1 -1
  102. package/scripts/flow-models.js +2 -2
  103. package/scripts/flow-morning.js +0 -17
  104. package/scripts/flow-multi-approach.js +1 -1
  105. package/scripts/flow-orchestrate-context.js +4 -4
  106. package/scripts/flow-orchestrate-templates.js +1 -1
  107. package/scripts/flow-orchestrate.js +8 -8
  108. package/scripts/flow-peer-review.js +1 -1
  109. package/scripts/flow-phase.js +9 -0
  110. package/scripts/flow-proactive-compact.js +1 -1
  111. package/scripts/flow-prompt-composer.js +3 -2
  112. package/scripts/flow-prompt-template.js +3 -2
  113. package/scripts/flow-providers.js +1 -1
  114. package/scripts/flow-question-queue.js +255 -0
  115. package/scripts/flow-repo-map.js +312 -0
  116. package/scripts/flow-review-passes/index.js +1 -1
  117. package/scripts/flow-review-passes/integration.js +1 -1
  118. package/scripts/flow-review-passes/structure.js +1 -1
  119. package/scripts/flow-revision-tracker.js +1 -1
  120. package/scripts/flow-section-resolver.js +1 -1
  121. package/scripts/flow-session-end.js +74 -5
  122. package/scripts/flow-session-state.js +103 -1
  123. package/scripts/flow-setup-hooks.js +1 -1
  124. package/scripts/flow-skeptical-evaluator.js +274 -0
  125. package/scripts/flow-skill-generator.js +3 -3
  126. package/scripts/flow-skill-learn.js +3 -6
  127. package/scripts/flow-skill-manage.js +248 -0
  128. package/scripts/flow-spec-verifier.js +1 -1
  129. package/scripts/flow-standards-checker.js +75 -0
  130. package/scripts/flow-standards-gate.js +1 -1
  131. package/scripts/flow-statusline-setup.js +8 -2
  132. package/scripts/flow-step-changelog.js +2 -2
  133. package/scripts/flow-step-coverage.js +1 -1
  134. package/scripts/flow-step-knowledge.js +1 -1
  135. package/scripts/flow-step-regression.js +1 -1
  136. package/scripts/flow-step-simplifier.js +1 -1
  137. package/scripts/flow-task-analyzer.js +1 -1
  138. package/scripts/flow-task-classifier.js +1 -1
  139. package/scripts/flow-task-enforcer.js +1 -1
  140. package/scripts/flow-template-extractor.js +1 -1
  141. package/scripts/flow-trap-zone.js +1 -1
  142. package/scripts/flow-utils.js +4 -0
  143. package/scripts/flow-worker-mcp-strip.js +122 -0
  144. package/scripts/flow-worker-question-classifier.js +51 -5
  145. package/scripts/flow-workspace-migrate-ipc.js +216 -0
  146. package/scripts/flow-workspace-summary.js +256 -0
  147. package/scripts/hooks/adapters/base-adapter.js +2 -2
  148. package/scripts/hooks/core/feature-dossier-gate.js +194 -0
  149. package/scripts/hooks/core/observation-capture.js +24 -0
  150. package/scripts/hooks/core/overdue-dispatches.js +20 -1
  151. package/scripts/hooks/core/phase-gate.js +15 -1
  152. package/scripts/hooks/core/phase-transition-auto-review.js +61 -0
  153. package/scripts/hooks/core/post-compact.js +5 -2
  154. package/scripts/hooks/core/pre-tool-orchestrator.js +21 -0
  155. package/scripts/hooks/core/routing-gate.js +58 -0
  156. package/scripts/hooks/core/session-context.js +108 -0
  157. package/scripts/hooks/core/session-end-memory-proposals.js +65 -0
  158. package/scripts/hooks/core/session-end-skill-proposals.js +58 -0
  159. package/scripts/hooks/core/session-end.js +25 -0
  160. package/scripts/hooks/core/setup-handler.js +1 -1
  161. package/scripts/hooks/core/task-boundary-reset.js +110 -4
  162. package/scripts/hooks/core/worker-boundary-gate.js +71 -0
  163. package/scripts/hooks/core/worker-tool-first-gate.js +275 -0
  164. package/scripts/hooks/entry/claude-code/post-tool-use.js +2 -2
  165. package/scripts/hooks/entry/claude-code/pre-tool-use.js +7 -2
  166. package/scripts/hooks/entry/claude-code/session-start.js +74 -30
  167. package/scripts/hooks/entry/claude-code/stop.js +47 -1
  168. package/scripts/hooks/entry/claude-code/user-prompt-submit.js +17 -0
  169. package/.workflow/templates/partials/user-commands.hbs +0 -20
@@ -131,6 +131,36 @@ Return:
131
131
 
132
132
  **If investigation fails to identify a clear root cause**, consider escalating to `/wogi-debug-hypothesis` which spawns parallel agents to investigate competing theories about the root cause.
133
133
 
134
+ #### Evidence Inventory (A3 — wf-f64f58b0)
135
+
136
+ Before moving to Phase 2.5, you MUST have consulted at least **5 of 7** source categories across the agent runs (or the sequential-fallback). A root cause grounded in one grep is a guess, not a diagnosis. Breadth of evidence is the strongest predictor of correct root-cause identification.
137
+
138
+ **The seven source categories** (each counts once, regardless of repeat reads):
139
+
140
+ 1. **Code read** — actual file content examined at the relevant lines
141
+ 2. **Grep / search** — pattern search across the codebase
142
+ 3. **Git log / blame** — history of the suspect file or symbol
143
+ 4. **Test run or test file read** — what the tests exercise vs. what they miss
144
+ 5. **Log / telemetry / gate-telemetry** — runtime evidence (`/wogi-gate-stats`, browser console, server log)
145
+ 6. **Type / interface definition** — the contract-level view from a types or schema file
146
+ 7. **User-confirmed assumption or prior correction** — from `feedback-patterns.md` or user dialogue
147
+
148
+ **Emit this block in the final bug report (under "Evidence inventory"):**
149
+
150
+ ```
151
+ Sources consulted: N / 7
152
+ [✓] Code read: <file:line>
153
+ [✓] Grep: <pattern> → <n hits>
154
+ [✓] Git log: <finding>
155
+ [✓] Test: <test or finding>
156
+ [✓] <fifth source>
157
+ [ ] <skipped categories with one-line reason>
158
+ ```
159
+
160
+ **Downgrade clause**: if N < 5, the final diagnosis/root-cause language is downgraded — say **"likely cause"** instead of "root cause", and **"probable fix"** instead of "fix". (A4 formalizes this as the 95/85/75 confidence-tier rubric — this clause is the placeholder until A4 lands.)
161
+
162
+ **Skippable only when**: the bug is a typo, single-line obvious fix, or user explicitly said "skip deep investigation". Note the reason in the inventory block.
163
+
134
164
  ### Phase 2.5: Hypothesis Verification Gate (MANDATORY)
135
165
  <!-- PIN: hypothesis-verification-gate -->
136
166
 
@@ -55,6 +55,39 @@ If any assumption is **CONTRADICTED** (user refers to a component that doesn't e
55
55
 
56
56
  This catches the "investigate imaginary code" failure mode before wasting a parallel agent run.
57
57
 
58
+ ### Step 0.5: Reflect on 5-7 Independent Sources (A3 — wf-f64f58b0)
59
+
60
+ Before moving to consolidation (Step 4), you MUST have consulted at least **5 of 7** source categories. A hypothesis grounded in 1-2 code reads is a hypothesis, not a diagnosis. Breadth of evidence is the single strongest predictor of correct root-cause identification across the research corpus that informed WogiFlow v2.27.
61
+
62
+ **The seven source categories** (a source only counts once, even if you read it repeatedly):
63
+
64
+ 1. **Code read** — actual file content examined at the relevant lines
65
+ 2. **Grep / search** — pattern search across the codebase for call sites, imports, or string literals referencing the suspect area
66
+ 3. **Git log / blame** — history of the suspect file or symbol (who changed what, and when)
67
+ 4. **Test run or test file read** — what the tests exercise vs. what they miss
68
+ 5. **Log / telemetry / gate-telemetry** — runtime evidence (browser console, server log, `/wogi-gate-stats`)
69
+ 6. **Type / interface definition** — the contract-level view from a types or schema file
70
+ 7. **User-confirmed assumption or prior correction** — from Step 1 (Tier 2) or `feedback-patterns.md`
71
+
72
+ **Required output before Step 2**:
73
+
74
+ ```
75
+ ━━━ EVIDENCE INVENTORY ━━━
76
+ Consulted (5+ required):
77
+ [✓] Code read: <file:line>
78
+ [✓] Grep: <pattern> → <n hits in m files>
79
+ [✓] Git log: <file or symbol> → <key finding>
80
+ [✓] Test: <testfile or run result>
81
+ [✓] <fifth source>
82
+ [ ] <skipped categories with one-line reason>
83
+ Sources consulted: N / 7
84
+ ━━━━━━━━━━━━━━━━━━━━━━━━━
85
+ ```
86
+
87
+ **Downgrade clause**: if N < 5, the final diagnosis language is downgraded — say **"likely cause"** instead of "root cause", and **"probable fix"** instead of "fix". (A4 formalizes this as the 95/85/75 tier rubric — this clause is the placeholder until A4 lands.)
88
+
89
+ **Skippable only when**: the bug is a typo, single-line obvious fix, or the user explicitly said "skip deep investigation". Note the reason in the inventory block.
90
+
58
91
  ### Step 1: Assumption Surfacing (Tier 2 — MANDATORY unless `--no-assumptions`)
59
92
 
60
93
  Before generating ANY hypothesis, identify the domain-model assumptions your theories will depend on. Present them in a fenced block and **WAIT** for user confirmation.
@@ -153,7 +153,6 @@ fs.writeFileSync('.workflow/state/session-state.json', JSON.stringify(sessionSta
153
153
  Configuration in `.workflow/config.json`:
154
154
  ```json
155
155
  "morningBriefing": {
156
- "enabled": true,
157
156
  "showLastSession": true,
158
157
  "showChanges": true,
159
158
  "showRecommendedTasks": 3,
@@ -166,7 +165,7 @@ Configuration in `.workflow/config.json`:
166
165
  }
167
166
  ```
168
167
 
169
- Set `enabled: false` to disable this command.
168
+ `/wogi-morning` is user-invoked — there is no "enabled" toggle. Don't run the command if you don't want a briefing.
170
169
 
171
170
  ## Stale Skills Section (Implementation Details)
172
171
 
@@ -711,6 +711,29 @@ SEVERITY IS CAPPED BY TIER:
711
711
  - Tier 1: severity capped at MEDIUM (unless grep returned >=5 instances → HIGH allowed)
712
712
  - Tier 2+: severity stands as you assign it
713
713
 
714
+ IMPORTANT: Confidence Tier (95 / 85 / 75) — see `.workflow/rubrics/confidence-tiers.md`
715
+
716
+ Every finding MUST also carry:
717
+
718
+ confidencePct: exactly one of 95, 85, or 75 (no other values)
719
+ 95 = HIGH — tier ≥3, OR tier 2 with 2+ observations,
720
+ OR tier 1 with ≥10 hits across ≥3 files
721
+ 85 = MEDIUM — tier 2 (single obs), OR tier 1 with 5-9 hits,
722
+ OR tier 1 with ≥3 hits across ≥2 files
723
+ 75 = LOW — tier 0, OR tier 1 with 1-4 isolated hits, OR missing evidenceNote
724
+ → MUST set flagUnverified=true; severity capped at LOW
725
+
726
+ flagUnverified: boolean (auto-true for confidencePct=75)
727
+
728
+ confidenceNote: one-line justification — required ONLY if you override the
729
+ default mapping (e.g. tier 1 with 6 hits but flagged 75 because 5 are in
730
+ the same block of code). Otherwise omit.
731
+
732
+ LANGUAGE by confidence tier:
733
+ 95 → assertive ("is", "breaks", "requires")
734
+ 85 → hedged ("likely", "appears to", "in most paths")
735
+ 75 → speculative ("might", "could", "possibly") + propose a verification action
736
+
714
737
  Also respect the FRAMING ARTIFACT from Phase 0 — only report findings within
715
738
  `scopeIn`. Findings outside `scopeOut` will be moved to an appendix by the
716
739
  orchestrator.
@@ -718,7 +741,9 @@ orchestrator.
718
741
 
719
742
  **Why evidence tiers matter**: During this project's own self-review (session logs), a `code-reviewer` agent reported an F1 finding as "Critical — broken require path" without citing evidence. Manual verification via `require.resolve()` showed the path was correct — the agent's path math was flawed. With tier enforcement, F1 would have been Tier 0 (no grep, no execution), capped at LOW, and flagged UNVERIFIED — alerting the reader to verify before acting.
720
743
 
721
- **Config toggles**: `review.evidenceTiers.enabled` (default true), `review.evidenceTiers.capByTier` (default trueenforce severity caps).
744
+ **Why confidence tiers matter**: Evidence tier (0–4) is mechanical ("what produced the evidence"); confidence tier (95/85/75) is judgment ("how likely is this real and actionable"). Two findings at the same evidence tier can warrant different confidence e.g. a grep hit of 1 match vs. 14 matches across 9 files. The 95/85/75 bucket drives language, severity cap, and whether the finding is flagged UNVERIFIED. The full reconciliation table is in `.workflow/rubrics/confidence-tiers.md`.
745
+
746
+ **Config toggles**: `review.evidenceTiers.enabled` (default true), `review.evidenceTiers.capByTier` (default true — enforce severity caps), `review.confidenceTiers.enabled` (default true — require 95/85/75 on every finding).
722
747
 
723
748
  **2.4. Launch ALL agents in parallel** (single message with N Task tool calls, subagent_type=Explore)
724
749
 
@@ -742,7 +767,11 @@ orchestrator.
742
767
  "issue": "Description of the issue",
743
768
  "recommendation": "How to fix it",
744
769
  "autoFixable": false,
745
- "agent": "code-logic|security|architecture|performance|project-rules-[slug]"
770
+ "agent": "code-logic|security|architecture|performance|project-rules-[slug]",
771
+ "evidenceTier": 3,
772
+ "evidenceNote": "grep 'foo' returned 7 matches in src/api/",
773
+ "confidencePct": 95,
774
+ "flagUnverified": false
746
775
  }
747
776
  ],
748
777
  "triaged": false
@@ -29,6 +29,36 @@ If `false` OR the input is natural language → continue with the full prompt be
29
29
 
30
30
  ### Pre-Routing Checks (Automatic)
31
31
 
32
+ **Epic Decompose-and-Run Cascade** (Story E / wf-e28b6cd8): When `/wogi-start
33
+ <epicId>` finishes decomposing an epic into child stories, immediately call
34
+ `node scripts/flow-epic-cascade.js resolve <epicId>`. The result indicates one
35
+ of three actions:
36
+ - `invoke-skill` → call `Skill(skill="wogi-start", args=<taskId>)` in the SAME
37
+ turn (Option A, zero-latency, used in interactive mode by default).
38
+ - `restart-with-marker` → marker has been written; end the turn and rely on the
39
+ task-boundary-reset SIGTERM cascade (Option B, fresh context per story, used
40
+ when autonomous mode is active by default).
41
+ - `abort` → emit a warning with the `reason` (no-children / target-missing); do
42
+ not cascade. The user retains control.
43
+
44
+ Strategy is configurable via `autonomousMode.cascadeStrategy` (`"auto"` |
45
+ `"direct"` | `"restart"`, default `"auto"`). Cascade only fires for epics — non-
46
+ epic `/wogi-start` invocations are unaffected.
47
+
48
+ **Autonomous Walk-Away Mode Trigger**: Before any other classification, run
49
+ `node scripts/flow-autonomous-mode.js activate "<user-message>"`. If the message
50
+ matches a trigger phrase (`go until you finish`, `autonomous mode`, `run this autonomously`,
51
+ `don't bother me, just do it`, `walk-away mode`, `go ahead until done`, etc.), the
52
+ helper writes `autonomousMode` to `session-state.json` (atomic) and returns the
53
+ activation record. The flag survives task-boundary SIGTERM restarts via SessionStart
54
+ re-hydration. While active, decision-routing in `flow-decision-authority.js` returns
55
+ `queue-for-review` for productBehavior/ux questions instead of `owner-decides` —
56
+ the AI MUST NOT ask the user; questions go to `flow-question-queue` and surface in
57
+ the end-of-run summary. To exit: user says `stop`/`pause`, the ready queue drains,
58
+ or the staleness threshold trips. Run `flow-autonomous-mode.js finalize <reason>`
59
+ to render the completion summary and clear the flag. Detection fails closed — if
60
+ the classifier errors, mode stays interactive (the safer default).
61
+
32
62
  **Long Input Detection**: If `config.longInputGate.enabled` and EITHER:
33
63
  - Prompt > `lineThreshold` (40) lines, OR
34
64
  - Prompt contains 5+ discrete items (numbered lists, bullet points, semicolon-separated requests)
@@ -193,6 +223,8 @@ After task level classification (L0-L3), set the reasoning effort level to optim
193
223
 
194
224
  This is advisory. Claude Code's effort levels: `low` / `medium` / `high` are universal. Claude Code 2.1.111+ added `xhigh` (between high and max) and `max` as Opus 4.7-only levels — other models fall back to `high`. Use `/effort` interactively (slider as of 2.1.111) to switch mid-session. The AI should adjust reasoning depth during implementation phases accordingly.
195
225
 
226
+ **Note on Claude Code 2.1.117 default change**: Claude Code 2.1.117 raised the default effort for Pro/Max subscribers on Opus 4.6 and Opus 4.7 from `medium` to `high`. WogiFlow's advisory mapping above is **task-level-scoped** (L2 recommends `medium` because 1–5 file changes don't need deep reasoning), not a global session default. It intentionally differs from Claude Code's new default — L2 work runs faster at `medium` regardless of Pro/Max. If you're on Pro/Max with Opus 4.6/4.7 and want the CC default for everything, use `/effort high` at session start and ignore the L2 row.
227
+
196
228
  ### Task Checkpoints (when `config.proactiveCompaction.enabled`)
197
229
 
198
230
  At each phase boundary: save checkpoint to `.workflow/state/task-checkpoint.json` (task ID, phase, completed scenarios, changed files, verification results). If context >= `triggerThreshold` (75%), run `/wogi-pre-compact` before proceeding.
@@ -17,6 +17,7 @@ This command helps you configure Claude Code's status line (shown at the bottom
17
17
 
18
18
  - Claude Code v1.0.52+ (January 2026 or later) — `context_window.used_percentage` field
19
19
  - Claude Code v2.1.97+ (optional) — `refreshInterval` setting and `workspace.git_worktree` variable
20
+ - Claude Code v2.1.119+ (optional) — `effort.level` and `thinking.enabled` fields
20
21
 
21
22
  ## Setup Instructions
22
23
 
@@ -67,6 +68,8 @@ Claude Code 2.1.97+. Omit the field or set it to 0 to disable auto-refresh.
67
68
  | `{{task.title}}` | Current task title |
68
69
  | `{{skill}}` | Currently active skill |
69
70
  | `{{workspace.git_worktree}}` | Truthy when cwd is inside a linked git worktree (2.1.97+) |
71
+ | `{{effort.level}}` | Active reasoning effort: `low` / `medium` / `high` / `xhigh` / `max` (2.1.119+) |
72
+ | `{{thinking.enabled}}` | Truthy when extended thinking is on (2.1.119+) |
70
73
  | `{{worktree.name}}` | Worktree name (if running in --worktree session) |
71
74
  | `{{worktree.branch}}` | Worktree branch name |
72
75
  | `{{worktree.path}}` | Worktree directory path |
@@ -101,6 +104,15 @@ branch label.
101
104
  "format": "{{#if worktree}}[WT:{{worktree.branch}}] {{/if}}{{#if task}}[{{task.id}}] {{/if}}{{model}} | Ctx: {{context_window.used_percentage}}%"
102
105
  ```
103
106
 
107
+ **With Effort + Thinking** (Claude Code 2.1.119+):
108
+ ```json
109
+ "format": "{{#if task}}[{{task.id}}] {{/if}}{{model}} | Ctx: {{context_window.used_percentage}}%{{#if effort.level}} | {{effort.level}}{{/if}}{{#if thinking.enabled}} | 🧠{{/if}}"
110
+ ```
111
+
112
+ The `{{#if effort.level}}` and `{{#if thinking.enabled}}` guards keep surrounding
113
+ punctuation from collapsing on older Claude Code versions that don't emit those
114
+ fields — Handlebars renders missing paths as empty strings without throwing.
115
+
104
116
  ## WogiFlow Integration
105
117
 
106
118
  To have WogiFlow automatically update the status line with current task info, we need to:
@@ -16,9 +16,9 @@ For multi-item inputs, the command output MUST include: **"All {N} items capture
16
16
 
17
17
  This rule applies equally to deep-decomposition mode and flat stories.
18
18
 
19
- ## Specification-Quality Gates (wf-63c0f4cc)
19
+ ## Specification-Quality Gates (wf-63c0f4cc + wf-fe8ef64d)
20
20
 
21
- Five P0 gates run automatically at creation time (all fail-open):
21
+ Six P0 gates run automatically at creation time (all fail-open):
22
22
 
23
23
  | Gate | Fires When | Effect |
24
24
  |------|-----------|--------|
@@ -27,6 +27,7 @@ Five P0 gates run automatically at creation time (all fail-open):
27
27
  | 3. Consumer Impact | input contains refactor/rename/migrate/etc. | greps consumers, flags phased migration at ≥5 breaking |
28
28
  | 4. Scope-Confidence | input mentions "new X" / "existing Y" / "the Z service" | audits assumptions → "Pending Clarifications" block |
29
29
  | 5. Intent Bootstrap | IGR artifacts missing + not already scheduled | schedules background bootstrap via session-state.json |
30
+ | 6. AC Scope-Preservation | any task with ≥1 criterion | snapshots originals to `.workflow/state/ac-snapshots/<taskId>.json`; `/wogi-done` re-verifies at close time and surfaces PRESERVED / MODIFIED / DROPPED / ADDED checklist. Blocks completion on silent drops or 2-into-1 collapses. See `scripts/flow-ac-scope-preservation.js`. |
30
31
 
31
32
  Gates enforce **specification quality at creation time**; runtime-quality gates (wiring, typecheck, tests) remain `/wogi-start`'s job.
32
33
 
@@ -75,6 +75,8 @@ flow parallel check # See available parallel tasks
75
75
  | 2.9.0+ | 2.1.90+ | --resume deferred-tool cache fix, MCP schema perf, PostToolUse format-on-save fix, PreToolUse exit-code-2 fix, .husky protected |
76
76
  | 2.9.2+ | 2.1.97+ | Stop/SubagentStop long-session fix, subagent worktree cwd leak fix, refreshInterval status line, workspace.git_worktree, MCP HTTP/SSE leak fix, 429 backoff, compaction transcript dedup |
77
77
  | 2.18.0+ | 2.1.108+ | ENABLE_PROMPT_CACHING_1H guidance, /recap awareness, /doctor MCP duplicate-scope mirror in `/wogi-health` |
78
+ | 2.27.0+ | 2.1.116+ | Sandbox dangerous-path safety on auto-allow, agent frontmatter hooks for `--agent`, `/resume` large-session speedup, MCP stdio concurrent startup |
79
+ | 2.27.0+ | 2.1.117+ | Native bfs/ugrep via Bash (hook audit documented), Opus 4.7 /context fix (estimator already percentage-based), Pro/Max effort default shift (advisory delta documented), agent frontmatter `mcpServers` for `--agent`, subagent model-mismatch malware-warning fix, managed-settings plugin marketplace enforcement |
78
80
 
79
81
  ### Environment Variables (2.1.19+)
80
82
 
@@ -439,6 +441,44 @@ await cancelTask('wf-123', 'superseded', false);
439
441
 
440
442
  - **Reliability fixes (all automatic after upgrade)**: Terminal display tearing in iTerm2+tmux, `@`-file suggestions re-scanning entire project in non-git directories, LSP diagnostics from before an edit appearing after it, tab-completing `/resume` behavior, `/context` grid rendering, `/clear` dropping session name, spurious decompression/network/transient errors in the TUI. Reverted v2.1.110 cap on non-streaming fallback retries (now uncapped again). Fixed Bedrock/Vertex/Foundry 429 retries pointing users at the wrong status page, bare URLs unclickable when wrapped in tool output, feedback surveys appearing back-to-back. WogiFlow sessions benefit from all of these automatically.
441
443
 
444
+ ### Features in 2.1.116+
445
+
446
+ - **Agent frontmatter hooks fire for main-thread `--agent` sessions (BUG FIX)**: Previously, hooks declared in an agent frontmatter only fired when the agent ran as a sub-agent. When the same agent was invoked on the main thread via `--agent`, its hooks silently did not fire. **Impact on WogiFlow**: WogiFlow ships several agents under `.workflow/agents/` (logic-adversary, architect, etc.). Users running them via `--agent` previously lost any per-agent hook behavior. Automatic improvement after upgrade — no WogiFlow code change.
447
+
448
+ - **`/resume` large-session speedup (up to 67% on 40MB+ sessions)**: `/resume` now loads significantly faster on large sessions and handles many dead-fork entries efficiently. **Impact on WogiFlow**: WogiFlow's post-compact state recovery and task-checkpoint resume benefit directly — long bulk sessions (`/wogi-bulk`, `/wogi-bulk-loop`) that accumulate large transcripts restart faster. No code change needed.
449
+
450
+ - **MCP stdio concurrent startup**: Multiple stdio MCP servers now start concurrently instead of serially, and `resources/templates/list` is deferred to first `@`-mention. **Impact on WogiFlow**: Users with several MCP servers (figma, atlassian, gmail) see faster session startup. Orthogonal to WogiFlow. No action.
451
+
452
+ - **Sandbox auto-allow honors dangerous-path safety check (SECURITY)**: Sandbox auto-allow rules no longer bypass the dangerous-path safety check when a command targets `/`, `$HOME`, or other critical system directories (e.g. `rm -rf /`, `rmdir $HOME`). **Impact on WogiFlow**: Reinforces `.claude/rules/security/security-patterns.md §6` — destructive commands should be scoped to safe variants, not blanket-wildcarded. WogiFlow's installer-generated permission rules already scope `git reset`, `git restore`, `git clean` away from blanket wildcards (v2.19.0+). No code change needed; this is defense-in-depth for users who hand-edited their settings.
453
+
454
+ - **Bash tool `gh` rate-limit hint**: Bash tool surfaces a hint when `gh` commands hit GitHub's API rate limit, so agents can back off instead of retrying. **Impact on WogiFlow**: Affects `/wogi-finalize`, `/wogi-review` when they shell out to `gh`. No code change needed — WogiFlow's commands already terminate on CLI errors instead of retry-looping.
455
+
456
+ - **Other UX fixes**: Thinking spinner shows inline progress, `/config` search matches values, Bash security no longer bypasses dangerous-path check for wildcard-allowed rm/rmdir, `/resume` reports load errors on large files instead of silently showing empty, `/doctor` can open during a response. All automatic. See `.claude/rules/security/security-patterns.md §6` for the permission-rule pattern the sandbox fix reinforces.
457
+
458
+ ### Features in 2.1.117+
459
+
460
+ - **Native macOS/Linux builds replace Glob and Grep tools with embedded bfs/ugrep via Bash**: On native builds (not npm, not Windows), Claude Code now executes `bfs`/`ugrep` through the Bash tool instead of the separate Glob/Grep tools. **Impact on WogiFlow**:
461
+ - **Hooks that match on `tool === 'Glob'|'Grep'`**: audited 2026-04-22. All matches are in **allow-list** contexts (`phase-gate.js`, `routing-gate.js`, `manager-boundary-gate.js`, `pre-tool-orchestrator.js` classify Glob/Grep as read-only; `observation-capture.js` logs them). On native builds, search operations arrive as `Bash` — Bash is NOT in those read-only allow-lists, so search calls get the stricter Bash treatment. No bypass, no gate weakening.
462
+ - **Evidence tracking** (`research-evidence-gate.js`) and **scope enforcement** (`scope-gate.js`) do NOT match on Glob/Grep — unaffected.
463
+ - **Observation gap (minor)**: `observation-capture.js` no longer logs search patterns from native-build users. Cosmetic — does not affect enforcement.
464
+ - **WogiFlow scripts do NOT invoke `bfs`/`ugrep` directly** — those binaries are embedded in Claude Code's native build only, not on users' PATH. Invoking them would break npm/Windows users. Existing Node `fs` / `ripgrep` / `git grep` tooling is the cross-platform path. **No code change needed.**
465
+
466
+ - **Opus 4.7 `/context` fix — was computing against 200K instead of native 1M**: Claude Code 2.1.117 fixed inflated `/context` percentages and premature auto-compaction on Opus 4.7 sessions. **Impact on WogiFlow**: `scripts/flow-context-estimator.js` operates entirely in percentages provided by Claude Code — no hardcoded 200K/1M assumption. Audited 2026-04-22. When Claude Code reports the correct percentage post-upgrade, the estimator consumes it correctly. **No code change needed.** Only env-var-gated branch is `CLAUDE_CODE_DISABLE_1M_CONTEXT` — which correctly tightens thresholds when the user opts out of extended context.
467
+
468
+ - **Default effort `high` on Opus 4.6/4.7 for Pro/Max subscribers (was `medium`)**: Claude Code raised the session default. **Impact on WogiFlow**: WogiFlow's effort-level advisory table in `.claude/commands/wogi-start.md` is **task-level-scoped** (L2 → `medium` because 1–5 file changes don't need deep reasoning), not a global default. It intentionally differs from Claude Code's new default — documented inline in the table. Users on Pro/Max wanting the CC default everywhere can use `/effort high` at session start.
469
+
470
+ - **Agent frontmatter `mcpServers` loaded for main-thread `--agent` sessions**: Complements the 2.1.116 hook fix — MCP servers declared in an agent's frontmatter are now loaded when the agent is invoked via `--agent`. **Impact on WogiFlow**: WogiFlow's `.workflow/agents/` personas that depend on MCP tooling now work correctly when invoked on the main thread. No code change needed.
471
+
472
+ - **Subagent model-mismatch malware-warning fix**: Previously, subagents running on a different model than the main agent incorrectly flagged file reads with a malware warning. **Impact on WogiFlow (HIGH)**: IGR architect + adversary passes routinely run on a DIFFERENT model from the main agent (config: `intentGroundedReasoning.adversaryModel`, `researchReasoningGate.tier3.adversaryModel`). Pre-fix, these sub-agents could see spurious "malware" warnings on normal file reads during critique. Post-fix, IGR sub-agent reads are clean. Automatic improvement. No code change needed.
473
+
474
+ - **Managed-settings `blockedMarketplaces`/`strictKnownMarketplaces` enforcement on plugin install/update**: Enterprise admins can now block plugin marketplaces, enforced at install, update, refresh, and autoupdate time. **Impact on WogiFlow**: Relevant to `/wogi-register` and plugin-registry routing (`plugin-registry.json`). Users in managed environments may now be blocked from installing plugins WogiFlow's registry references. `/wogi-register` should surface the underlying Claude Code error verbatim (it already does — no code change). Future enhancement: detect a blocked-marketplace error and suggest the admin-controlled alternative.
475
+
476
+ - **Pro/Max Opus 4.7 `/context` fix — expanded details**: Fixed alongside the percentage fix, Opus 4.7 sessions no longer auto-compact early. Pairs with WogiFlow's smart-compaction thresholds which recalibrated to 0.80 safe / 0.92 emergency under Claude Code 2.1.75+ token accuracy (see `scripts/flow-context-estimator.js:65`).
477
+
478
+ - **Other notable fixes**: Plain-CLI OAuth sessions now refresh tokens reactively on 401 (no more mid-session "Please run /login"); WebFetch no longer hangs on very large HTML (truncates before conversion); `/login` works when launched with `CLAUDE_CODE_OAUTH_TOKEN` env var and token expires; Windows caches `where.exe` lookups per process; Bedrock `application-inference-profile` + Opus 4.7 with thinking disabled no longer returns 400; MCP `elicitation/create` no longer auto-cancels in print/SDK mode when the server connects mid-turn. All automatic after upgrade.
479
+
480
+ - **Experimental flag**: `CLAUDE_CODE_FORK_SUBAGENT=1` enables forked subagents on external builds. Not currently consumed by WogiFlow. Tracked as a future enhancement for faster IGR sub-agent spawning.
481
+
442
482
  ### Simple Mode Naming Distinction
443
483
 
444
484
  Claude Code's `CLAUDE_CODE_SIMPLE` environment variable (which enables a simplified tool set) is **unrelated** to WogiFlow's `loops.simpleMode` (a lightweight task completion loop using string detection). They are separate features that happen to share the word "simple":
@@ -7,7 +7,8 @@ Instructions for the explore phase of task execution. Loaded on-demand when phas
7
7
  1. Read `ready.json`, move task to inProgress
8
8
  2. Load task context from `.workflow/changes/*/wf-XXXXXXXX.md`
9
9
  3. Check `app-map.md`, `function-map.md`, `api-map.md`, `decisions.md`
10
- 4. Auto-invoke matched skills based on task context
10
+ 4. **Generate repo map** (wf-f3707d2f / C1) auto-generated per task, refreshed per turn: `node scripts/flow-repo-map.js generate --task=<taskId>`. The map surfaces TOUCHED + ADJACENT + SHAPE sections within a bounded token budget (default 16KB ≈ 4K tokens). Config: `repoMap.enabled` (default true), `repoMap.budgetBytes`. Skip if output is empty (no touched files yet).
11
+ 5. Auto-invoke matched skills based on task context
11
12
 
12
13
  ## Step 1.15: Intent Framing Pass (when `config.intentGroundedReasoning.enabled`)
13
14
 
@@ -2,6 +2,10 @@
2
2
 
3
3
  Instructions for the implementation phase. Loaded on-demand when phase transitions to `coding`.
4
4
 
5
+ ## Step 2.9: Refresh Repo Map (wf-f3707d2f / C1)
6
+
7
+ At the start of each coding turn, regenerate the task-aware repo map so TOUCHED / ADJACENT reflect the latest changes: `node scripts/flow-repo-map.js generate --task=<taskId>`. The map stays within `repoMap.budgetBytes` (default 16KB) and surfaces what you just modified + what imports it. Skip if `config.repoMap.enabled === false` or the output is empty.
8
+
5
9
  ## Step 3: Execute Each Scenario (Loop)
6
10
 
7
11
  **When TDD is NOT active**, use this normal flow. For each acceptance criterion:
@@ -233,6 +233,51 @@ For EACH acceptance criterion in the spec:
233
233
 
234
234
  **Minimum: Tier 2 for display criteria, Tier 3 for behavioral criteria.**
235
235
 
236
+ #### Skeptical Evaluator (B5 — wf-15175dbc)
237
+
238
+ **When to use**: every L1+ task at validating phase. Forces three enumeration passes before "done" is allowed: UI fields, API parameters, state keys.
239
+
240
+ Run:
241
+ ```js
242
+ const { buildSkepticalPrompt, parseSkepticalOutput } = require('scripts/flow-skeptical-evaluator');
243
+ const built = buildSkepticalPrompt({ specMarkdown, diffText, changedFiles, commitMessage, taskId });
244
+ // spawn Agent with built.systemPrompt + built.userPrompt
245
+ const result = parseSkepticalOutput(agentResponse, { taskId });
246
+ if (!result.ok) { /* surface blockers + unverifiedClaims to user */ }
247
+ ```
248
+
249
+ The evaluator's built-in pre-checks (BEL grep + spec-bundle grep) are surfaced in the user prompt so the sub-agent is grounded in mechanical data, not vibes. Every finding the evaluator produces must carry `evidenceTier` (0–4) + `confidencePct` (95/85/75) per `.workflow/rubrics/confidence-tiers.md`. Confidence-75 findings auto-flag as `UNVERIFIED`.
250
+
251
+ Config: `intentGroundedReasoning.skepticalEvaluator.enabled` (default true).
252
+
253
+ #### Spec-String Bundle Grep (B4 — wf-07046456)
254
+
255
+ **When to use**: every L1+ task at Tier-3 verification. Extracts the "string bundle" from the spec (backtick IDs, quoted strings, file paths, constants, route paths) and greps each against the diff + changed files + built bundle.
256
+
257
+ Run: `const { extractSpecStrings, verifySpecBundleCoverage, formatSpecBundleResult } = require('scripts/flow-completion-truth-gate');`
258
+
259
+ Per-category coverage thresholds (defaults):
260
+ - File paths: 100% (every file the spec names must appear in the diff)
261
+ - Route paths: 100%
262
+ - Constants: 80%
263
+ - Backtick IDs: 80%
264
+ - Quoted strings: 70% (allows for paraphrasing of error messages that were prototypes)
265
+
266
+ Report the diff: any category below threshold surfaces the missing strings. The user either adds the missing implementation or updates the spec.
267
+
268
+ #### DOM Field Inventory Snapshot (B3 — wf-f9431ef6)
269
+
270
+ **When to use**: Any task that modifies a form, filter, wizard step, settings panel, or other UI surface containing `<input>`, `<select>`, `<textarea>`, or custom input components.
271
+
272
+ Follow the protocol in `.workflow/templates/tier3-dom-field-inventory.md`:
273
+
274
+ 1. **Before**: snapshot the field inventory (name, label, type, default, required, validation, visibility) → `.workflow/verifications/<taskId>/dom-inventory-before.md`
275
+ 2. **After**: re-snapshot with the new code → `.workflow/verifications/<taskId>/dom-inventory-after.md`
276
+ 3. **Diff**: classify each field as preserved / modified / vanished / added → `.workflow/verifications/<taskId>/dom-diff.md`
277
+ 4. **Reconcile** against the task spec — any vanished/modified/added field that isn't named in an AC must be surfaced to the user before proceeding.
278
+
279
+ This catches "silent field vanishing" bugs (see `feedback-patterns.md`) that lint + typecheck + smoke-test all miss because the missing field has no consumer in the critical path.
280
+
236
281
  #### Verification Method Selection
237
282
 
238
283
  Run: `node node_modules/wogiflow/scripts/flow-runtime-verification.js method`
@@ -0,0 +1,36 @@
1
+ # Auto-Generated Rules
2
+
3
+ This directory is auto-generated from `.workflow/state/decisions.md`.
4
+
5
+ **DO NOT EDIT THESE FILES DIRECTLY.**
6
+
7
+ Edit `decisions.md` instead, then run:
8
+ ```bash
9
+ node scripts/flow-rules-sync.js
10
+ ```
11
+
12
+ Or rules will auto-sync when decisions.md is updated.
13
+
14
+ ## How It Works
15
+
16
+ - Each section in decisions.md becomes a separate rule file
17
+ - Rules are path-scoped based on section keywords (e.g., "component" rules only load for component files)
18
+ - Claude Code automatically loads these rules for context-aware guidance
19
+
20
+ ## Rule Types
21
+
22
+ Rules have `alwaysApply` frontmatter that determines loading behavior:
23
+
24
+ - **`alwaysApply: true`** - Always loaded (rules with: general, always, project, naming, coding, standard, convention, must, never, critical, security in title)
25
+ - **`alwaysApply: false`** - Agent-requested: Claude decides whether to load based on description relevance to current task
26
+
27
+ This saves tokens by not loading React rules when working on backend code, etc.
28
+
29
+ ## Files
30
+
31
+ - dual-repo-architecture-2026-02-28.md
32
+ - github-release-workflow-2026-01-30.md
33
+ - alternative-short-name.md
34
+ - alternative-hand-edit-ready-json-to-register-orpha.md
35
+
36
+ Last synced: 2026-04-24T06:42:44.238Z
@@ -0,0 +1,82 @@
1
+ ---
2
+ alwaysApply: false
3
+ description: "Worker tool-first-turn contract — workspace worker mode only (WOGI_WORKSPACE_ROOT set, WOGI_REPO_NAME !== 'manager')"
4
+ globs: "scripts/hooks/core/worker-tool-first-gate.js,.workflow/templates/worker-rules.md"
5
+ ---
6
+
7
+ # Worker Tool-First Turn Contract
8
+
9
+ **Applies to**: workspace worker mode (`WOGI_WORKSPACE_ROOT` set + `WOGI_REPO_NAME !== 'manager'`).
10
+
11
+ **Rule**: Every worker turn that follows a `UserPromptSubmit` (channel dispatch from the manager) MUST contain at least one tool call. In **strict mode** (default), the first assistant content block MUST be a tool call, not text.
12
+
13
+ ## Three violations enforced as one rule
14
+
15
+ The Stop hook detects three distinct violations and labels all of them under the unified rule name `worker-tool-first-turn`:
16
+
17
+ | Gate | Violation | Detection |
18
+ |------|-----------|-----------|
19
+ | **G1** | `silent-halt` | Zero `tool_use` blocks across the entire turn (pure-text response). |
20
+ | **G4** | `text-before-tool-call` | First assistant content block is `text`, not `tool_use`. Strict mode only. |
21
+ | **G6** | documented contract | The named rule referenced in block messages so the worker sees one coherent contract, not three independent gates. |
22
+
23
+ ## Why the contract exists
24
+
25
+ Workers communicate with the manager via tool calls:
26
+ - Channel dispatches (`curl` POST to the manager port)
27
+ - File edits (`Edit`, `Write`)
28
+ - Test runs (`Bash`)
29
+ - Structured `## Results` payloads posted back to the manager channel
30
+
31
+ A pure-text response from a worker is **invisible to the user** — the user only sees the manager terminal. Worker text disappears into the transcript with no downstream consumer. It also disqualifies the worker from the three-state end-of-turn contract (`ACTION` | `ESCALATION` | `IDLE`) documented in CLAUDE.md under "Workspace Autonomous-Mode Action-After-Completion Contract".
32
+
33
+ ## Allowed turn shapes (pass)
34
+
35
+ - Pure action: `tool_use` → `tool_use` → end
36
+ - Action with narration after: `tool_use` → `text` → `tool_use` → end
37
+ - Escalation: `tool_use` (channel dispatch of `## QUESTION:`) → end
38
+ - Reply: `tool_use` (channel dispatch of `## Results:`) → end
39
+ - Idle (pre-user-message): zero assistant blocks — not gated since no dispatch happened
40
+
41
+ ## Blocked turn shapes (fail)
42
+
43
+ - **G1**: `text` → end (no tool_use anywhere in turn)
44
+ - **G4**: `text` → `tool_use` → end (strict mode: first block must be tool_use)
45
+
46
+ ## Configuration
47
+
48
+ `.workflow/config.json → workspace.toolFirstTurnGate`:
49
+
50
+ ```json
51
+ {
52
+ "enabled": true,
53
+ "strict": true
54
+ }
55
+ ```
56
+
57
+ - `enabled: false` — disables the gate entirely (silent-halt + text-first both allowed).
58
+ - `strict: false` — G1 still enforced (zero-tool-call blocked), G4 relaxed (text-first allowed as long as a tool_use eventually fires).
59
+
60
+ Default is `enabled: true, strict: true` — maximum worker discipline.
61
+
62
+ ## Fail-open behavior
63
+
64
+ The gate fails open on:
65
+ - Missing transcript path
66
+ - Unreadable / malformed transcript file
67
+ - Config read errors
68
+ - Any unexpected exception
69
+
70
+ Rationale: a silent-halt false-negative is recoverable (the worker will be retried on the next manager cycle via the dispatch-tracking overdue check). A false-positive block on every turn would make the worker unusable — unrecoverable without a code deploy.
71
+
72
+ ## Related gates (same epic, same integration point)
73
+
74
+ - **Existing — "Gap B" (v2.20.0)**: blocks end-of-turn when queued dispatches exist but no task is in-progress. Complements this gate — Gap B runs at the queue boundary; tool-first runs at every turn.
75
+ - **Existing — AI Worker Question Classifier (v2.21.0)**: Haiku classifier detects when a worker ends with a user-facing question. Complements this gate — question classifier is semantic; tool-first is structural.
76
+ - **Existing — Worker Boundary Gate**: blocks `AskUserQuestion` in worker mode. Complements this gate — boundary gate blocks a specific tool; tool-first requires any tool.
77
+
78
+ ## Enforcement
79
+
80
+ - Core logic: `scripts/hooks/core/worker-tool-first-gate.js`
81
+ - Wired into: `scripts/hooks/entry/claude-code/stop.js` (after Gap B, before AI question classifier)
82
+ - Template updated: `.workflow/templates/worker-rules.md` carries the contract verbatim so workers see it in every system prompt.
@@ -0,0 +1,11 @@
1
+ ---
2
+ alwaysApply: false
3
+ description: "Alternative: execpolicy-style TOML Bash allow/deny policy layer - Rejected: 2026-04-24"
4
+ ---
5
+
6
+ # Alternative: execpolicy-style TOML Bash allow/deny policy layer
7
+
8
+ **Rejected**: 2026-04-24
9
+ **Reason**: The spec framed WogiFlow as owner of Bash allow/deny wildcards. It isn't — `.claude/settings.local.json → permissions.{allow,deny,ask}` are enforced by Claude Code *before* any WogiFlow hook runs, and Claude Code already supports all three modes natively (see `.claude/rules/security/security-patterns.md` §6). WogiFlow's PreToolUse Bash gates (`git-safety-gate`, `deploy-gate`, `strike-gate`, `scope-mutation-gate`, `commit-log-gate`) are content-aware — not wildcard lists — so there is nothing for a TOML allowlist to "replace." Implementing the spec as written would duplicate Claude Code's native permission system with a strictly worse version (no UI integration, no session-scope memory, no settings-source hierarchy).
10
+ **Chose instead**: Lean on Claude Code's native `permissions.{allow,deny,ask}` for allow/deny semantics. The only genuinely-additive piece of the D2 spec — **per-phase command overlay** (AC3) — should be re-specced as a small feature that reads phase-scoped overrides from `.workflow/config.json` and augments the existing `pre-tool-orchestrator.js`, if the need resurfaces from a real incident. Don't build it speculatively.
11
+ **Source**: wf-ac2a8074 scope-confidence audit (user chose option A: drop the story entirely)
@@ -0,0 +1,11 @@
1
+ ---
2
+ alwaysApply: false
3
+ description: "Alternative: Hand-edit ready.json to register orphaned specs - Rejected: 2026-04-15"
4
+ ---
5
+
6
+ # Alternative: Hand-edit ready.json to register orphaned specs
7
+
8
+ **Rejected**: 2026-04-15
9
+ **Reason**: CLAUDE.md memory-hierarchy rule forbids hand-editing `.workflow/state/` files to create tasks. Doing so bypasses routing telemetry and breaks the bypass-counter signal that surfaces actual workflow gaps.
10
+ **Chose instead**: One-off script using `flow-utils` `getReadyData` / `saveReadyData` API. The script is self-documenting (kept in `.workflow/scratch/` for the auto-cleanup pass) and uses the same write path the runtime uses.
11
+ **Source**: wf-a3cc5f2a session, state-sync between progress.md and ready.json after epic-episodic-memory wave.
@@ -0,0 +1,11 @@
1
+ ---
2
+ alwaysApply: false
3
+ description: "Alternative: Permission-ruleset-per-phase via WogiFlow hooks - Rejected: 2026-04-24"
4
+ ---
5
+
6
+ # Alternative: Permission-ruleset-per-phase via WogiFlow hooks
7
+
8
+ **Rejected**: 2026-04-24
9
+ **Reason**: Same root cause as wf-ac2a8074 (D2). `.claude/settings.json → permissions.{allow,deny,ask}` is Claude Code's permission system, enforced by Claude Code itself before any WogiFlow hook fires. Making it phase-aware would require Claude Code to be phase-aware — out of WogiFlow's scope. WogiFlow's PreToolUse gates are content-aware (`git-safety-gate`, `scope-mutation-gate`, etc.), not wildcard permission lists, so there's nothing to "phase-scope" at this layer either.
10
+ **Chose instead**: If real per-phase restrictions surface as a need from incident data, extend `pre-tool-orchestrator.js` with phase-scoped content-aware checks (e.g., "block `rm -rf` during validating phase"). Build that narrow thing from a real incident, not speculative per-phase rulesets.
11
+ **Source**: wf-c6c75841 scope-confidence batch audit (user chose "drop H2" as part of the 1+2+3 combination)
@@ -0,0 +1,12 @@
1
+ ---
2
+ alwaysApply: false
3
+ description: "Alternative: <short name> - Rejected: <YYYY-MM-DD>"
4
+ ---
5
+
6
+ # Alternative: <short name>
7
+
8
+ **Rejected**: <YYYY-MM-DD>
9
+ **Reason**: <why we said no — be specific>
10
+ **Chose instead**: <what we did instead, and where it lives>
11
+ **Source**: <task ID, audit, or session that produced this decision>
12
+ -->
@@ -0,0 +1,11 @@
1
+ ---
2
+ alwaysApply: false
3
+ description: "Alternative: WogiFlow-as-MCP-client OAuth manager - Rejected: 2026-04-24"
4
+ ---
5
+
6
+ # Alternative: WogiFlow-as-MCP-client OAuth manager (Cline McpHub pattern)
7
+
8
+ **Rejected**: 2026-04-24
9
+ **Reason**: WogiFlow is a workflow layer that runs *inside* a Claude Code session — it is not an MCP client. No `@modelcontextprotocol/sdk` dependency, no transport/stdio/server-connect code; existing `scripts/flow-mcp-*` scripts only *discover* capabilities Claude Code already exposes (`ToolSearch`, `ListMcpResourcesTool`). Cline's `McpHub` works because Cline owns the MCP client lifecycle. In our stack, Claude Code owns it — by the time a WogiFlow SessionStart hook fires, Claude Code has already loaded (or declined to load) its MCP servers, so "reconnect at session start" has no connection object to attach tokens to.
10
+ **Chose instead**: Rely on Claude Code 2.1+ native MCP OAuth. If a future WogiFlow-as-standalone-agent runtime hosts its own MCP clients, revisit then with a fresh spec grounded in that runtime's lifecycle.
11
+ **Source**: wf-8e97ac77 scope-confidence audit (user chose option C: drop the story entirely)