@windyroad/itil 0.47.12 → 0.47.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/bin/wr-itil-check-outbound-responses-staleness +51 -0
- package/bin/wr-itil-enumerate-postrelease-kv-candidates +51 -0
- package/lib/check-outbound-responses-staleness.sh +93 -0
- package/lib/enumerate-postrelease-kv-candidates.sh +106 -0
- package/package.json +1 -1
- package/scripts/run-check-outbound-responses-staleness.sh +21 -0
- package/scripts/run-enumerate-postrelease-kv-candidates.sh +29 -0
- package/skills/check-upstream-responses/SKILL.md +5 -2
- package/skills/manage-problem/SKILL.md +5 -5
- package/skills/review-problems/SKILL.md +28 -4
- package/skills/review-problems/test/jtbd-301-verdict-shape-contract.bats +225 -0
- package/skills/transition-problem/SKILL.md +1 -1
- package/skills/work-problems/SKILL.md +121 -20
- package/skills/work-problems/test/work-problems-step-0d-outbound-responses-staleness-behavioural.bats +174 -0
- package/skills/work-problems/test/work-problems-step-5-is-error-transient-halt.bats +278 -0
- package/skills/work-problems/test/work-problems-step-5-prompt-body-re-grounding.bats +128 -0
- package/skills/work-problems/test/work-problems-step-6-5-postrelease-kv-callback.bats +209 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
# Contract assertions for /wr-itil:review-problems Step 4.5d + Step 4.5e
|
|
3
|
+
# verdict-shape ack-comment templates (P229 / JTBD-301).
|
|
4
|
+
#
|
|
5
|
+
# Structural assertions — Permitted Exception to the source-grep ban
|
|
6
|
+
# per ADR-005 / P011 / ADR-037 / ADR-052 § Surface 2. SKILL.md prose
|
|
7
|
+
# governs LLM-driven runtime behaviour; behavioural-replay testing
|
|
8
|
+
# requires a synthetic agent harness (P012 master ticket; P176 follow-up
|
|
9
|
+
# for the SKILL.md surface; P324 review-problems agent-prose harness gap).
|
|
10
|
+
# Until that harness lands, contract bats assert the load-bearing
|
|
11
|
+
# template elements are present so future edits don't silently strip
|
|
12
|
+
# the JTBD-301 verdict vocabulary and re-introduce framework-vocab leakage.
|
|
13
|
+
#
|
|
14
|
+
# @problem P229
|
|
15
|
+
# @problem P012 (master harness ticket — justification for structural exception)
|
|
16
|
+
# @problem P176 (SKILL.md surface follow-up)
|
|
17
|
+
# @problem P324 (review-problems agent-prose harness gap)
|
|
18
|
+
# @jtbd JTBD-301 (verdict-shape acknowledgement contract — non-negotiable)
|
|
19
|
+
# @adr ADR-024 (report-upstream contract — symmetry mirror)
|
|
20
|
+
# @adr ADR-036 (downstream-scaffold contract — adopter inheritance)
|
|
21
|
+
# @adr ADR-052 (behavioural-tests default + Permitted Exception)
|
|
22
|
+
# @adr ADR-062 (inbound-discovery + assessment pipeline)
|
|
23
|
+
|
|
24
|
+
setup() {
|
|
25
|
+
SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
|
26
|
+
SKILL_FILE="${SKILL_DIR}/SKILL.md"
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
30
|
+
# Verdict-shape contract subsection exists at the head of Step 4.5e
|
|
31
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
@test "4.5e-comment-shape subsection exists (briefs JTBD-301 contract upstream of branch templates)" {
|
|
34
|
+
run grep -nE '^#### 4\.5e-comment-shape' "$SKILL_FILE"
|
|
35
|
+
[ "$status" -eq 0 ]
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
@test "verdict-shape subsection names all four JTBD-301 verdict words verbatim" {
|
|
39
|
+
# JTBD-301 Desired Outcome row 6 names exactly four verdicts.
|
|
40
|
+
# The subsection MUST name all four so a reader sees the vocabulary
|
|
41
|
+
# before reading the per-branch templates that implement it.
|
|
42
|
+
run grep -nE 'fix released' "$SKILL_FILE"
|
|
43
|
+
[ "$status" -eq 0 ]
|
|
44
|
+
run grep -nE 'won.t-fix|won\\.t fix' "$SKILL_FILE"
|
|
45
|
+
[ "$status" -eq 0 ]
|
|
46
|
+
run grep -nE 'duplicate' "$SKILL_FILE"
|
|
47
|
+
[ "$status" -eq 0 ]
|
|
48
|
+
run grep -nE 'parked' "$SKILL_FILE"
|
|
49
|
+
[ "$status" -eq 0 ]
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
@test "verdict-shape subsection explicitly forbids framework-vocab leakage in ack-comment bodies" {
|
|
53
|
+
# Load-bearing rule: maintainer-internal jargon (Step IDs, branch
|
|
54
|
+
# names, classification tokens) MUST NOT appear in reporter-facing
|
|
55
|
+
# comment bodies. Audit-log at 4.5f keeps the tokens.
|
|
56
|
+
run grep -inE 'framework.vocab|maintainer.internal|reporter.facing' "$SKILL_FILE"
|
|
57
|
+
[ "$status" -eq 0 ]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@test "verdict-shape subsection cites the report-upstream symmetry (ADR-024 / ADR-036)" {
|
|
61
|
+
# JTBD-301 line 23 + ADR-024 / ADR-036 establish the inbound/outbound
|
|
62
|
+
# symmetry: outbound `/wr-itil:report-upstream` posts structured
|
|
63
|
+
# human-language; inbound ack mirrors that shape.
|
|
64
|
+
run grep -nE 'symmetry|mirror' "$SKILL_FILE"
|
|
65
|
+
[ "$status" -eq 0 ]
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
@test "verdict-shape subsection documents the C2 gate-substitution caveat (architect condition C2)" {
|
|
69
|
+
# The external-comms gate (ADR-028) fires on the SUBSTITUTED body,
|
|
70
|
+
# not the template — template authors must ensure no maintainer
|
|
71
|
+
# jargon leaks via P<NNN> title substitution or <reason> expansion.
|
|
72
|
+
run grep -inE 'substituted body|gate.fires.on.the.*substituted|template.author' "$SKILL_FILE"
|
|
73
|
+
[ "$status" -eq 0 ]
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
77
|
+
# Step 4.5d — matched-local-ticket cross-reference uses verdict-shape
|
|
78
|
+
# (duplicate verdict)
|
|
79
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
@test "Step 4.5d cross-reference comment uses 'duplicate' verdict language" {
|
|
82
|
+
# Replaces the bureaucratic "Tracked locally as docs/problems/..." boilerplate
|
|
83
|
+
# with verdict-shape "we're tracking this as a duplicate of P<NNN>".
|
|
84
|
+
run grep -inE 'duplicate of P.NNN.|tracking.*duplicate' "$SKILL_FILE"
|
|
85
|
+
[ "$status" -eq 0 ]
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
@test "Step 4.5d cross-reference comment template is documented inline (not just referenced)" {
|
|
89
|
+
# SKILL prose must carry the actual template body so a single-pass
|
|
90
|
+
# reader sees the shape, not just a cross-reference to JTBD-301.
|
|
91
|
+
run grep -nE '4\.5d.*[Cc]omment template|matched-local-ticket.*template' "$SKILL_FILE"
|
|
92
|
+
[ "$status" -eq 0 ]
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
96
|
+
# Step 4.5e Step 4 — above-threshold-pushback verdict template (won't-fix)
|
|
97
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
@test "Step 4.5e Step 4 pushback template uses 'we don't plan to fix this' verdict language" {
|
|
100
|
+
run grep -inE "we don.t plan to fix|don.t plan to fix this" "$SKILL_FILE"
|
|
101
|
+
[ "$status" -eq 0 ]
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
105
|
+
# Step 4.5e Step 5 — clear-malicious verdict template (policy-violation close)
|
|
106
|
+
# Architect condition C4: name this as fifth implicit verdict, not conflated
|
|
107
|
+
# with won't-fix
|
|
108
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
@test "Step 4.5e Step 5 clear-malicious template uses 'we're closing this report' verdict language" {
|
|
111
|
+
run grep -inE "we.re closing this|closing this report" "$SKILL_FILE"
|
|
112
|
+
[ "$status" -eq 0 ]
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
@test "verdict-shape subsection names clear-malicious as fifth implicit verdict (architect C4)" {
|
|
116
|
+
# The four documented JTBD-301 verdicts are fix-released / parked /
|
|
117
|
+
# duplicate / won't-fix. clear-malicious is a stronger close
|
|
118
|
+
# (policy-violation) — name it precisely in the subsection prose
|
|
119
|
+
# rather than conflating with won't-fix.
|
|
120
|
+
run grep -inE 'policy.violation close|fifth.*verdict|implicit.*verdict' "$SKILL_FILE"
|
|
121
|
+
[ "$status" -eq 0 ]
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
@test "clear-malicious classification gloss is plain-language (JTBD non-blocking advisory)" {
|
|
125
|
+
# JTBD advisory: <classification> in the clear-malicious template
|
|
126
|
+
# MUST be a plain-language gloss, NOT the raw wr-risk-scorer verdict
|
|
127
|
+
# token. SKILL prose must specify this so the reporter sees
|
|
128
|
+
# human language, not "out-of-scope-for-documented-personas".
|
|
129
|
+
run grep -inE 'plain.language gloss|plain-language.*classification' "$SKILL_FILE"
|
|
130
|
+
[ "$status" -eq 0 ]
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
134
|
+
# Step 4.5e Step 6 — safe-and-valid verdict template (accepted-into-backlog)
|
|
135
|
+
# Architect condition C3: name as "accepted into backlog", not fix-released
|
|
136
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
137
|
+
|
|
138
|
+
@test "Step 4.5e Step 6 safe-and-valid template uses 'we're tracking this as a real bug' verdict language" {
|
|
139
|
+
run grep -inE "tracking this as a real|tracking.*real bug" "$SKILL_FILE"
|
|
140
|
+
[ "$status" -eq 0 ]
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
@test "Step 4.5e Step 6 template does NOT include framework vocab 'safe-and-valid branch' in comment body" {
|
|
144
|
+
# The 31-comment leak: "classified via /wr-itil:review-problems Step
|
|
145
|
+
# 4.5e safe-and-valid branch with safe-low-fix-risk" appeared in
|
|
146
|
+
# comment bodies. The new template prose must show the reporter-facing
|
|
147
|
+
# body and not include the framework-vocab phrasing inside a comment-body block.
|
|
148
|
+
# Note: the steps section header itself names "Safe-and-valid branch" —
|
|
149
|
+
# that's the maintainer prose and fine. This test checks the COMMENT-BODY
|
|
150
|
+
# template (which lives in a fenced code block under Step 6) does not include
|
|
151
|
+
# the leak phrase "safe-low-fix-risk".
|
|
152
|
+
run grep -nE 'safe-low-fix-risk' "$SKILL_FILE"
|
|
153
|
+
# Token may appear in maintainer prose / classifier docs — but MUST NOT
|
|
154
|
+
# appear inside a fenced comment-body template block. We assert the token
|
|
155
|
+
# appears AT MOST in step-3 dual-axis-risk-classifier prose and the audit-log
|
|
156
|
+
# surface; not in a quoted comment template body. We use a structural proxy:
|
|
157
|
+
# the new 4.5e-comment-shape subsection MUST explicitly call out that the
|
|
158
|
+
# token belongs in maintainer-side audit-log only, not in the user-facing comment.
|
|
159
|
+
run grep -inE "safe-low-fix-risk.*audit.log only|safe.low.fix.risk.*maintainer.side" "$SKILL_FILE"
|
|
160
|
+
[ "$status" -eq 0 ]
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
@test "verdict-shape subsection names Step 6 verdict as 'accepted into backlog' (architect C3)" {
|
|
164
|
+
# JTBD-301 'fix released' is the post-release verdict — the Step 6
|
|
165
|
+
# ack fires at accept-into-backlog time. Name the verdict precisely
|
|
166
|
+
# in the subsection prose.
|
|
167
|
+
run grep -inE 'accepted into backlog|accept.into.backlog' "$SKILL_FILE"
|
|
168
|
+
[ "$status" -eq 0 ]
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
@test "Step 4.5e Step 6 template references release-notes / status surface for future updates" {
|
|
172
|
+
# JTBD-301 desired outcome: reporter knows where to watch for updates.
|
|
173
|
+
# The template must point to a stable surface (release notes / status
|
|
174
|
+
# page / linked issue) so the reporter has actionable expectation.
|
|
175
|
+
run grep -inE 'release notes|release-notes|watch this issue' "$SKILL_FILE"
|
|
176
|
+
[ "$status" -eq 0 ]
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
180
|
+
# Architect condition A1 — gate-denial sub-branches preserved across all four
|
|
181
|
+
# verdict-shape templates (no silent-skip regression)
|
|
182
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
@test "All four ack-comment branches preserve gate-denial sub-branches (architect A1)" {
|
|
185
|
+
# Step 4 (pushback), Step 5 (clear-malicious), Step 6 (safe-and-valid)
|
|
186
|
+
# already have gate-denial sub-branches in the current SKILL. The
|
|
187
|
+
# P229 fix preserves them; this assertion catches a regression where
|
|
188
|
+
# a template rewrite accidentally drops the sub-branch.
|
|
189
|
+
run grep -nE 'gate-denied-pushback' "$SKILL_FILE"
|
|
190
|
+
[ "$status" -eq 0 ]
|
|
191
|
+
run grep -nE 'gate-denied-clear-malicious' "$SKILL_FILE"
|
|
192
|
+
[ "$status" -eq 0 ]
|
|
193
|
+
run grep -nE 'gate-denied-safe-and-valid' "$SKILL_FILE"
|
|
194
|
+
[ "$status" -eq 0 ]
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
198
|
+
# Architect condition A2 — audit-log preserves classification tokens verbatim
|
|
199
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
@test "Audit-log surface at 4.5f preserves classification tokens (architect A2)" {
|
|
202
|
+
# The user-side template change strips framework vocab from comment
|
|
203
|
+
# bodies; the audit-log surface (4.5f) MUST continue receiving the
|
|
204
|
+
# raw classification tokens for replay determinism per ADR-062.
|
|
205
|
+
run grep -nE '4\.5f.*[Aa]udit-log' "$SKILL_FILE"
|
|
206
|
+
[ "$status" -eq 0 ]
|
|
207
|
+
run grep -nE 'safe-and-valid-local-ticket-created|above-threshold-pushback|clear-malicious-closed' "$SKILL_FILE"
|
|
208
|
+
[ "$status" -eq 0 ]
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
212
|
+
# JTBD-301 vocabulary visibility — four-verdict words appear in branch templates
|
|
213
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
214
|
+
|
|
215
|
+
@test "P229 root-cause ticket cross-referenced from verdict-shape subsection" {
|
|
216
|
+
# Audit-trail grounding per ADR-026: the SKILL prose change cites
|
|
217
|
+
# the originating ticket so the rationale stays discoverable.
|
|
218
|
+
run grep -nE 'P229' "$SKILL_FILE"
|
|
219
|
+
[ "$status" -eq 0 ]
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
@test "verdict-shape subsection cites JTBD-301 by ID" {
|
|
223
|
+
run grep -nE 'JTBD-301' "$SKILL_FILE"
|
|
224
|
+
[ "$status" -eq 0 ]
|
|
225
|
+
}
|
|
@@ -16,7 +16,7 @@ The deprecated `/wr-itil:manage-problem <NNN> known-error` subcommand route rema
|
|
|
16
16
|
|
|
17
17
|
- `<NNN>` — the ticket ID (data parameter, e.g. `042`). Required.
|
|
18
18
|
- `<status>` — the destination status. One of:
|
|
19
|
-
- `known-error` — Open → Known Error (root cause
|
|
19
|
+
- `known-error` — Open → Known Error (root cause identified AND workaround documented; fix not yet proposed — per ADR-022 corrected semantics, fix proposal produces the RFC per ADR-072).
|
|
20
20
|
- `verifying` — Known Error → Verification Pending (fix released, awaiting user verification per ADR-022).
|
|
21
21
|
- `close` — Verification Pending → Closed (user has confirmed the fix works in production).
|
|
22
22
|
|
|
@@ -34,9 +34,15 @@ The user reviews the pending note on their next interactive session and runs `/w
|
|
|
34
34
|
|
|
35
35
|
### Step 0: Preflight (per ADR-019)
|
|
36
36
|
|
|
37
|
-
Before opening the work loop,
|
|
37
|
+
Before opening the work loop, **get the repo into a clean state** so the orchestrator does not iterate against a stale backlog, silently strand prior-session in-flight work, or proceed past an ambiguously-dirty tree (P040, P109, P293). ADR-019 names three branches under the umbrella goal:
|
|
38
38
|
|
|
39
|
-
**
|
|
39
|
+
- **Branch 1 — Pull**: origin moved; trivial fast-forward divergence. Action: `git pull --ff-only` non-interactively (the existing fetch/divergence path below).
|
|
40
|
+
- **Branch 2 — Commit**: pre-existing uncommitted work that belongs in a commit (prior AFK iter hit quota / cancel / crash mid-ticket). Auto-commit when **both** discriminator conditions hold: (a) provenance is unambiguous (attributable to the prior iter's own in-flight flow) AND (b) risk is within appetite per ADR-018. **Deferred — current implementation routes Branch 2 → Branch 3**: the auto-commit mechanism + gate-composition wiring + bats are not yet shipped. Pre-existing uncommitted source edits demote to Branch 3 (halt-with-report) until the follow-up lands.
|
|
41
|
+
- **Branch 3 — AskUserQuestion / AFK-halt**: genuinely messy tree (ambiguous uncommitted state, non-fast-forward divergence, partial-prior-session work whose provenance is unclear). Interactive: `AskUserQuestion` per ADR-013 Rule 1 (four-option report: Resume / Discard / Leave-and-lower-priority / Halt). AFK: halt with structured Prior-Session State report — a **deliberate carve-out from the 2026-06-06 ADR-013 Rule 6 queue-and-continue default** (ambiguous session-continuity state requires user input; non-interactive recovery would mask the bug this preflight is meant to surface).
|
|
42
|
+
|
|
43
|
+
The Branch 1 fetch/divergence table below is the live implementation of Branch 1. The session-continuity detection pass after it is **Branch 3's detection mechanism** — it enumerates the signals that populate the Prior-Session State report when Branch 3 fires.
|
|
44
|
+
|
|
45
|
+
**Branch 1 mechanism:**
|
|
40
46
|
|
|
41
47
|
1. Run `git fetch origin`.
|
|
42
48
|
2. Compare local `HEAD` with `origin/<base>` (default `main`; otherwise the branch the user is on).
|
|
@@ -46,7 +52,7 @@ Before opening the work loop, reconcile local state with origin so the orchestra
|
|
|
46
52
|
|---|---|
|
|
47
53
|
| HEAD at or ahead of origin/<base> | Proceed to Step 1 |
|
|
48
54
|
| origin/<base> ahead, local has no unpushed commits (pure fast-forward) | Run `git pull --ff-only` non-interactively. Log the count of pulled commits in the AFK iteration log. Proceed to Step 1. |
|
|
49
|
-
| origin/<base> ahead, local has unpushed commits (non-fast-forward) | STOP the loop. Report the divergence with `git log --oneline HEAD..origin/<base>` and `git log --oneline origin/<base>..HEAD`. Do NOT attempt to rebase or merge non-interactively — that is a judgment call the persona forbids in AFK mode. |
|
|
55
|
+
| origin/<base> ahead, local has unpushed commits (non-fast-forward) | STOP the loop (Branch 3 routing — non-fast-forward divergence is a "genuinely messy" signal). Report the divergence with `git log --oneline HEAD..origin/<base>` and `git log --oneline origin/<base>..HEAD`. Do NOT attempt to rebase or merge non-interactively — that is a judgment call the persona forbids in AFK mode. |
|
|
50
56
|
|
|
51
57
|
**Network failure**: if `git fetch origin` returns a network error, stop and report. Default behaviour is fail-closed — the user can retry when network is restored.
|
|
52
58
|
|
|
@@ -54,9 +60,9 @@ Before opening the work loop, reconcile local state with origin so the orchestra
|
|
|
54
60
|
|
|
55
61
|
**Cross-cutting**: this rule applies to every AFK orchestrator skill. The next-ID collision guard (ADR-019 confirmation criterion 2) belongs in the ticket-creator skills (`manage-problem` and `wr-architect:create-adr`), not here — see the related problem ticket for that work.
|
|
56
62
|
|
|
57
|
-
####
|
|
63
|
+
#### Branch 3 detection mechanism — session-continuity signal enumeration (per P109)
|
|
58
64
|
|
|
59
|
-
After the fetch/divergence check, Step 0 MUST run
|
|
65
|
+
After the Branch 1 fetch/divergence check, Step 0 MUST run the session-continuity detection pass that populates Branch 3's signal set (and, when the Branch 2 follow-up lands, feeds the Branch 2 / Branch 3 discriminator). The Branch 1 check handles "did origin move under us"; this pass handles the distinct failure mode "did the prior session leave partial work that changes what iter 1 should do". A prior AFK subprocess can exit mid-ticket (quota 429, user-cancel, subprocess crash) and leave observable state in the working tree that the orchestrator must classify before opening the work loop.
|
|
60
66
|
|
|
61
67
|
**Signals to enumerate** (each maps to one `git status --porcelain` / filesystem / `git worktree` probe):
|
|
62
68
|
|
|
@@ -236,7 +242,50 @@ The annotation pre-empts the "surprise heavy iter" perception JTBD-006 expects a
|
|
|
236
242
|
|
|
237
243
|
<!-- @jtbd JTBD-006 (Progress the Backlog While I'm Away — AFK orchestrator pre-flights review-problems so iters dispatch against fresh WSJF rankings) -->
|
|
238
244
|
|
|
239
|
-
After Step 0c completes (whether dispatched or silent-passed), proceed to Step
|
|
245
|
+
After Step 0c completes (whether dispatched or silent-passed), proceed to Step 0d.
|
|
246
|
+
|
|
247
|
+
### Step 0d: Outbound upstream-responses pre-flight (per JTBD-006 AFK driver + JTBD-004 cross-repo coordination)
|
|
248
|
+
|
|
249
|
+
After Step 0c's deferred-placeholder pre-flight and before Step 1's backlog scan, check whether the outbound-responses cache is fresh. P249 Phase 1 shipped `/wr-itil:check-upstream-responses` as a manual skill (the outbound symmetric counterpart to Step 0b's inbound pipeline); P220 names the cadence gap that without an auto-fire trigger, upstream responses to issues we filed via `/wr-itil:report-upstream` go unread until the maintainer remembers to invoke the skill. This step closes that gap with the same pre-flight shape Step 0b uses for the inbound axis.
|
|
250
|
+
|
|
251
|
+
**Mechanism:**
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
preflight_reason="$(wr-itil-check-outbound-responses-staleness "$PWD")"
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
`wr-itil-check-outbound-responses-staleness` is the ADR-049 + ADR-080 `$PATH` shim (adopter-safe — resolves `lib/check-outbound-responses-staleness.sh` relative to the script, NOT cwd; P317/RFC-009) that internalises `should_promote_outbound_responses_preflight "$PWD"` and echoes the result. NEVER `source packages/...` repo-relative from a SKILL — those paths only resolve in the source monorepo, not adopter installs.
|
|
258
|
+
|
|
259
|
+
The helper returns one of five outcomes (contract documented at `packages/itil/lib/check-outbound-responses-staleness.sh` + asserted by `packages/itil/skills/work-problems/test/work-problems-step-0d-outbound-responses-staleness-behavioural.bats`):
|
|
260
|
+
|
|
261
|
+
| `preflight_reason` | Action |
|
|
262
|
+
|-----------------------------------|--------------------------------------------------------------------------------------------------------|
|
|
263
|
+
| `no-back-link-tickets` | Silent-pass. No local tickets carry a `## Reported Upstream` section; nothing to poll. Downstream-adopter non-obligation analogue to Step 0b's `no-channels-config`. Proceed to Step 1. |
|
|
264
|
+
| `first-run-cache-absent` | Dispatch `/wr-itil:check-upstream-responses` as a pre-flight iter via the standard `claude -p` subprocess wrapper (same shape as Step 0b / Step 0c / Step 5). |
|
|
265
|
+
| `first-run-last-checked-null` | Same as `first-run-cache-absent` — cache schema present but never populated. |
|
|
266
|
+
| `ttl-expiry age=<N>s ttl=<M>s` | Dispatch `/wr-itil:check-upstream-responses` as a pre-flight iter. Cache stale; the skill polls each back-linked upstream URL, diffs against the cache, and emits STATE / NEW / LABEL / NONE / FAIL per back-link ticket. |
|
|
267
|
+
| `fresh-within-ttl` | Silent-pass per ADR-013 Rule 5 + P132 mechanical-stage carve-out. Proceed to Step 1. |
|
|
268
|
+
|
|
269
|
+
**Pre-flight dispatch shape**: when promoted, dispatch a single `claude -p --permission-mode bypassPermissions --output-format json` subprocess that invokes `/wr-itil:check-upstream-responses` (per P084 + ADR-032 subprocess isolation). Reuse the Step 5 subprocess wrapper verbatim — same flag set, same idle-timeout SIGTERM poll loop. The subprocess runs the full check-upstream-responses Step 1 + Step 2 + Step 3 pipeline; the cache file `docs/problems/.outbound-responses-cache.json` + audit-log `docs/audits/outbound-responses-log.md` are refreshed in its own commit per ADR-014 (check-upstream-responses' SKILL.md Step 3 commit grain). After the subprocess completes, the orchestrator proceeds to Step 1.
|
|
270
|
+
|
|
271
|
+
**Iter-summary annotation**:
|
|
272
|
+
|
|
273
|
+
- No back-link tickets: `Step 0d skipped — no tickets carry ## Reported Upstream (downstream-adopter non-obligation)`.
|
|
274
|
+
- Cache fresh: `Step 0d skipped — outbound-responses cache fresh within TTL`.
|
|
275
|
+
- Pre-flight ran: `Step 0d pre-flighted /wr-itil:check-upstream-responses — reason=<preflight_reason>, <N> back-link tickets polled, <M> STATE/NEW deltas surfaced`.
|
|
276
|
+
|
|
277
|
+
The annotation pre-empts the "surprise heavy iter" perception JTBD-006 expects auditability for — a maintainer running multiple short AFK loops within a 24h window will hit `fresh-within-ttl` on subsequent invocations and see the cache-fresh annotation, confirming the system's silent-pass discipline rather than wondering whether the check ran at all.
|
|
278
|
+
|
|
279
|
+
**AFK authorisation per ADR-013 Rule 6**: check-upstream-responses is itself AFK-safe by construction — read-only externally (`gh issue view` only; no `gh issue comment` / `gh issue create`), so does NOT trip ADR-028's external-comms gate; zero `AskUserQuestion` calls (flag-based knobs per CLAUDE.md P085); partial-failure exit code 2 distinguishes "some upstream URLs unreachable" from "everything broke" so AFK orchestrators can branch correctly. No new user-attention surface introduced at the Step 0d promotion point.
|
|
280
|
+
|
|
281
|
+
**Compose-with**: ADR-013 Rule 5/6 (silent-pass + AFK fail-safe), ADR-044 category 4 (silent-framework — the trigger is policy + observable evidence), ADR-014 (check-upstream-responses' commit grain holds — the pre-flight subprocess emits its own commit), ADR-024 (back-link `## Reported Upstream` section is the source-of-truth scanned by the helper and read by the dispatched skill), ADR-049 / ADR-080 (PATH shim grammar + highest-version-wins wrapper), ADR-062 § Step 0b (precedent staleness-pre-flight shape — Step 0d is the outbound symmetric counterpart), P084 + P077 (subprocess isolation reuse — same `claude -p` wrapper as Step 5), P132 (mechanical-stage carve-out — no `AskUserQuestion` at the promotion point), P170 / RFC-002 (dual-tolerant glob — the helper handles both layouts), P317 / RFC-009 (adopter-safe PATH shim), P249 Phase 1 (the manual skill this step wires into a cadence).
|
|
282
|
+
|
|
283
|
+
**Staleness contract drift**: the staleness comparison MUST stay symmetric with the check-upstream-responses SKILL's Confirmation surface (TTL semantics + outcome shape). Drift here re-opens the outbound-responses staleness contract — any change to TTL semantics MUST update this Step 0d, the lib helper, AND the check-upstream-responses SKILL.md Confirmation section in the same commit. <!-- OUTBOUND-RESPONSES-STALENESS-CONTRACT-SOURCE: packages/itil/skills/check-upstream-responses/SKILL.md ## Confirmation -->
|
|
284
|
+
|
|
285
|
+
<!-- @jtbd JTBD-006 (Progress the Backlog While I'm Away — AFK orchestrator pre-flights check-upstream-responses so outbound STATE/NEW deltas surface without manual polling) -->
|
|
286
|
+
<!-- @jtbd JTBD-004 (Connect Agents Across Repos to Collaborate — closes the outbound symmetric feedback loop) -->
|
|
287
|
+
|
|
288
|
+
After Step 0d completes (whether dispatched or silent-passed), proceed to Step 1.
|
|
240
289
|
|
|
241
290
|
### Step 1: Scan the backlog
|
|
242
291
|
|
|
@@ -509,10 +558,22 @@ rm -f "$ITER_JSON"
|
|
|
509
558
|
|
|
510
559
|
**Iteration prompt body (self-contained — the subprocess has no prior conversation context):**
|
|
511
560
|
|
|
561
|
+
**Re-ground per iter (P211 — orchestrator-side construction invariant)**: each iter's prompt body MUST be re-grounded per iter against the CURRENT ticket's identity (ID + title) only. The orchestrator does NOT inline the target ticket's `## Fix Strategy` section verbatim into the dispatch prompt — the subprocess reads Fix Strategy from disk via `/wr-itil:manage-problem` inside its own context, where the design rationale travels with the ticket file and stays anchored to the correct ticket. Across iterations, no prior-iter content leaks into iter N's prompt body — specifically, prior ticket ID, prior Fix Strategy text, prior outcome reason, prior commit SHA, prior retro findings, and prior outstanding-question entries MUST NOT carry across the iter boundary into the new prompt. The construction is template-driven and reset per iter; no global accumulator carries from iter to iter. The "self-contained" opener above is a subprocess-side property (the subprocess has no prior conversation context); the re-grounding invariant is the symmetric orchestrator-side property (the orchestrator main turn does not carry prior-iter prompt content into the next iter's dispatch construction). P211 reported as inbound from downstream consumer bbstats as their P194 — without this invariant, an iter inherits a stale design-rationale frame and may land fixes anchored on the wrong ticket's intent, degrading the JTBD-006 audit trail. **`@jtbd JTBD-006`** (load-bearing).
|
|
562
|
+
|
|
512
563
|
1. **Context**: this is one iteration of the AFK work-problems loop. The user is AFK. The orchestrator selected `P<NNN> (<title>)` as the highest-WSJF actionable ticket.
|
|
513
564
|
2. **Task**: apply the `/wr-itil:manage-problem` workflow for `work highest WSJF problem that can be progressed non-interactively as the user is AFK`. Follow manage-problem SKILL.md verbatim, including architect / jtbd / style-guide / voice-tone gate reviews and the commit gate (manage-problem Step 11). Because this subprocess has the Agent tool in its own surface, the normal review-via-subagent paths work — no inline-verdict fallback needed.
|
|
514
565
|
3. **Constraints**: commit the completed work per ADR-014. Do NOT push, do NOT run `push:watch`, do NOT run `release:watch` — the orchestrator's Step 6.5 owns release cadence. Do NOT invoke `capture-*` background skills mid-iter (AFK carve-out — ADR-032), **EXCEPT for retro-surfaced observations of recurring class-of-behaviour** — those route to `/wr-itil:capture-problem` per the **P342 mechanical-stage carve-out** (see retro-on-exit constraint #4 below; same trust-boundary as `/wr-retrospective:run-retro` Step 4a verification close-on-evidence — P342). Do NOT use `ScheduleWakeup` under any circumstance (P083 — iteration workers must not self-reschedule). **NEVER call `AskUserQuestion` mid-loop in AFK** (P135 / ADR-044): direction / deviation-approval / one-time-override / silent-framework observations queue at `ITERATION_SUMMARY.outstanding_questions` for loop-end batched presentation. **This includes the manage-problem substance-confirm-before-build guard (ADR-074 (Confirm a decision's substance before building dependent work)):** when the propose-fix step detects that the fix builds on a born-`proposed` decision whose substance is unconfirmed (via `wr-architect-is-decision-unconfirmed`), the iter does NOT implement on it and does NOT ask mid-loop — it queues a `category: "direction"` entry naming the unconfirmed ADR + its Decision Outcome for loop-end confirmation, and routes the ticket to `action: skipped`, `skip_reason_category: user-answerable`. Building on the unconfirmed substance instead (or guessing the choice) is the P315 failure this guard exists to prevent. The queued substance-confirm is a legitimate cat-1 direction ask — it is NOT counted as lazy in the Step 2d Ask Hygiene Pass (ADR-074 lazy-count exclusion). Per-iter `AskUserQuestion` calls are sub-contracting framework-resolved decisions back to the user (lazy deferral per Step 2d Ask Hygiene Pass classification). Non-interactive defaults apply per ADR-013 Rule 6 + ADR-044's framework-resolution boundary. **Treat the user as transient** (P130): even when observably present at orchestrator dispatch time, the user may answer one question and disappear for hours; presence is not a reliable signal and is not the goal. The iter's job is to progress the ticket and accumulate questions for batched surfacing — not to ask "is it OK to proceed?" at a mechanical-stage boundary. **Do NOT poll `bats` output with a bats-console-summary regex against TAP-format output** (P146 — bash until-loop-deadlock antipattern). The bats-console-summary line `<N> tests, <M> failures` is emitted ONLY by bats's *default* (non-TAP) formatter; `bats --tap` does not emit a console summary, so a polling loop of shape `until [ -f $OUT ] && grep -qE '^[0-9]+ tests?,' $OUT; do sleep 5; done` spins forever after bats completes (silent deadlock — no error, no exit; recovery requires manual SIGTERM with metadata loss per the P146/P147 stuck-before-emit subclass). When you need to wait on a backgrounded bats run, prefer `wait $bg_pid` (Unix idiom — completion signaled by process exit, no regex required) or, for the Bash tool, `run_in_background=true` + `BashOutput` polling on the tool's exit-state field rather than regex-poll on stdout. If you genuinely must regex-poll TAP output, anchor on the TAP plan line `^[0-9]+\.\.[0-9]+` (e.g. `1..1455`) — TAP's plan line is emitted on completion and is format-stable across bats versions; the bats-console-summary line is not. The console-summary vs TAP-format divergence is the load-bearing detail: `bats` and `bats --tap` produce structurally different stdout, and the antipattern assumes the former when iter dispatch typically uses the latter. **Do NOT poll subprocess completion with `pgrep -f '<pattern>'` inside an `until` / `while` loop** (P232 — self-referential pgrep deadlock; sibling variant of P146). `pgrep -f` matches against the FULL command line of every running process, so the polling loop's own `zsh -c` argument (which contains the literal `pgrep -f '<pattern>'` text) matches itself; with multiple concurrent polling loops, each loop matches the others and spins forever. Worked example of the antipattern: `until ! pgrep -f 'bats --recursive' > /dev/null 2>&1; do sleep 5; done` — the 2026-05-16 P232 deadlock witness; 4 concurrent polling loops each matched the others' command lines while no actual bats process ran; 45 min wall-clock + $20-30 wasted before manual SIGTERM. The same self-reference shape applies to `while pgrep -f ...; do sleep; done` and to `until ! pkill -0 -f '<pattern>'` / `while pkill -0 -f '<pattern>'` (signal-0 polling). The structural fix is the same as P146: prefer `wait $bg_pid` (Unix idiom — shell-native completion signal, no regex / no pgrep) or Bash-tool `run_in_background=true` + `BashOutput` polling (harness-tracked completion state). The hook `packages/itil/hooks/itil-bash-polling-antipattern-detect.sh` denies these shapes at PreToolUse:Bash, but the prompt rule belongs here too — structural enforcement + prompt discipline together close the class. **If the fix changes shippable code or package behaviour** (any path under `packages/<plugin>/{src,bin,hooks,skills,scripts,lib,agents}` excluding test paths — `test/`, `hooks/test/`, `scripts/test/` — and excluding `README.md` + `docs/*.md`), **the iter MUST author a `.changeset/*.md` entry in the same single ADR-014-grain commit as the fix** (the changeset names the bumping plugin via the YAML frontmatter `"@windyroad/<plugin>": <patch|minor|major>` per the changesets-action contract). **Doc-only changes** (under `docs/`, `*.md`) **and test-only changes** (under any `test/` path) **that ship no behaviour MAY omit the changeset**. The orchestrator's Step 6.5 release-cadence drain runs `release:watch` only when `.changeset/` is non-empty after push — without an iter-authored changeset, code-shape fixes accumulate without ever shipping to npm (violating JTBD-006's audit-trail expectation + JTBD-007's "Keep Plugins Current" closure dependency). Hook `packages/itil/hooks/itil-changeset-discipline.sh` (P141) provides hook-level enforcement at `git commit` time as defence-in-depth — but plugin hook execution depends on the marketplace cache carrying the current hook version, so the prompt-time constraint here MUST land independently (composes-with the hook; does NOT rely on the hook being installed). Inbound-reported from downstream consumer bbstats as their P195 — see [Related](#related) for `**Origin**: inbound-reported (bbstats#195)` per ADR-076. **`@jtbd JTBD-006`** (load-bearing) **`@jtbd JTBD-007`** (closure-dependent).
|
|
515
|
-
4. **Retro-on-exit (P086) + retro-surfaced observation classification (P342)**: before emitting `ITERATION_SUMMARY`, invoke `/wr-retrospective:run-retro`. Retro runs INSIDE this subprocess so its Step 2b pipeline-instability scan has access to the iteration's rich tool-call history (hook misbehaviour, repeat-workaround patterns, subagent-delegation friction, release-path instability).
|
|
566
|
+
4. **Retro-on-exit (P086) + retro-surfaced observation classification (P342) + iter-owned BRIEFING commit (P212)**: before emitting `ITERATION_SUMMARY`, invoke `/wr-retrospective:run-retro`. Retro runs INSIDE this subprocess so its Step 2b pipeline-instability scan has access to the iteration's rich tool-call history (hook misbehaviour, repeat-workaround patterns, subagent-delegation friction, release-path instability). Tickets retro creates ride a separate path: they delegate through `/wr-itil:manage-problem` which IS ADR-014 in-scope and self-commits each ticket per its own Step 11. Those commits land independently and the orchestrator picks them up on the next Step 1 scan.
|
|
567
|
+
|
|
568
|
+
**BRIEFING.md commit responsibility — iter owns, run-retro does not (P212).** run-retro is explicitly out-of-scope for self-commit per ADR-014's Scope section (which lists `packages/retrospective/skills/run-retro/SKILL.md` under "Out of scope for now"). Retro therefore EDITS but DOES NOT COMMIT `docs/BRIEFING.md` / `docs/briefing/*.md`. The iter subprocess (NOT run-retro, NOT the orchestrator main turn) owns the BRIEFING commit. After retro completes, run `git status --porcelain docs/BRIEFING.md docs/briefing/`. If non-empty, the iter:
|
|
569
|
+
|
|
570
|
+
1. Stages the dirty BRIEFING paths (`git add docs/BRIEFING.md docs/briefing/`).
|
|
571
|
+
2. Delegates to `wr-risk-scorer:pipeline` per ADR-014's `work → score → commit` ordering. The BRIEFING refresh is mechanical chore-class (derived retro output, no source-of-truth change) — within-appetite by construction, same risk shape as the `chore(problems): reconcile README ...` and `chore(problems): check upstream responses` precedents in ADR-014's commit-message convention table.
|
|
572
|
+
3. Commits as `chore(briefing): refresh from iter retro (P<NNN>)` where `P<NNN>` is the ticket the iter was working.
|
|
573
|
+
|
|
574
|
+
Pre-P212, the orchestrator's Step 6.75 absorbed this as `dirty-for-a-known-reason` and added the commit at orchestrator-main-turn cost, invoking `wr-risk-scorer:pipeline` twice per iter (once for the ticket commit, once for the orchestrator-side hand-off). Shifting the commit into the iter subprocess preserves the audit trail (the same `chore(briefing)` commit lands), eliminates the orchestrator-main-turn hand-off, and moves the second scoring call from expensive main-turn context to cheaper iter-subprocess context. Step 6.75's table is amended below to classify dirty BRIEFING-at-iter-exit as a bug class rather than an expected hand-off.
|
|
575
|
+
|
|
576
|
+
Proceed to `ITERATION_SUMMARY` emission regardless of retro findings — retro is non-blocking at the iter-subprocess layer (do not block on retro): if retro fails or surfaces findings, the iteration still returns a summary so the AFK loop does not silently halt on a flaky retro run. The iter MUST verify `git status` is clean (no remaining BRIEFING dirty state) before emitting `ITERATION_SUMMARY`. (Session-level retro at the orchestrator-main-turn layer per Step 2.4 gate (b) IS load-bearing — distinct surface; see Step 2.4 prose for the orchestrator-layer halt semantics.)
|
|
516
577
|
|
|
517
578
|
**P342 classification taxonomy — retro-surfaced observations.** When the iter-retro's Step 4b Stage 1 surfaces a ticketable observation, the routing depends on classification:
|
|
518
579
|
|
|
@@ -614,17 +675,25 @@ Do NOT extract `session_id`, `model`, `stop_reason`, `permission_denials`, `uuid
|
|
|
614
675
|
|
|
615
676
|
Aggregation rule: sum `.total_cost_usd` into the session total and trust it; sum `.usage.*` into the session totals for cache-reuse ratio reasoning but label them best-effort in the Session Cost table. This asymmetry is correct-by-CLI-contract (cost is a session cumulative; usage is a per-response envelope); the orchestrator documents the asymmetry so adopters do not silently under-count tokens. First observed AFK-iter-7 iter 5 (2026-04-21): 1071s wall-clock / 60+ tool-use subprocess returned `duration_ms: 8546, num_turns: 1, usage.* ≈ 137K tokens, total_cost_usd: 6.08` — cost cumulative and correct, tokens reflecting only the final ack turn.
|
|
616
677
|
|
|
617
|
-
**Exit-code semantics.** `claude -p` exits non-zero when the subprocess fails hard — subprocess crash, auth failure, unresolvable permission denial, API/quota exhaustion.
|
|
678
|
+
**Exit-code semantics — ordered check (P214 amendment to the P261 carve-out).** `claude -p` exits non-zero when the subprocess fails hard — subprocess crash, auth failure, unresolvable permission denial, API/quota exhaustion. Orthogonally, the `--output-format json` envelope carries an `is_error` field that fires `true` on transient API failures (529 Overloaded / 429 rate-limit / 401 auth-expired) where the subprocess exits 0 with `total_cost_usd: 0` — the API call never landed; no work was done; no `ITERATION_SUMMARY` was emitted. Before P214, the prose presented the exit-code rule first and the `is_error` carve-out as "orthogonal", which let an implementer silently route exit 0 + `is_error: true` to the `ITERATION_SUMMARY` parse path and miscount the failure as success. The orchestrator MUST instead read both fields in this explicit order, BEFORE parsing `.result`:
|
|
618
679
|
|
|
619
|
-
|
|
620
|
-
|
|
680
|
+
1. **Read the exit code.** Non-zero → halt the loop; report the exit code, stderr, and any partial `.result` in the final summary. Do NOT spawn the next iteration. The user returns to a stopped loop with a clear failure reason (e.g. "quota exhausted — resume when quota resets"). Exit-code check fires FIRST in the ordered sequence — non-zero exit takes precedence over the `is_error` branch below.
|
|
681
|
+
2. **Parse `is_error` from the JSON stdout BEFORE attempting to parse `ITERATION_SUMMARY`.** When `is_error: true`, route to the SALVAGE-vs-HALT decision contract below (the existing P261 carve-out, extended by P214 with the transient-API-error HALT advisory). The check MUST happen before the Exit-0 → `ITERATION_SUMMARY` parse path — the load-bearing P214 invariant is that `is_error: true` never silently falls through to the parse path.
|
|
682
|
+
3. **Exit 0 AND `is_error: false`** → parse `ITERATION_SUMMARY` from `.result` field; proceed to Step 6.
|
|
621
683
|
|
|
622
|
-
**`is_error: true`
|
|
684
|
+
**`is_error: true` class taxonomy (P261 SALVAGE branch + P214 HALT branch).** Two sub-classes of `is_error: true` route differently inside the ordered check above. Deterministic SALVAGE-vs-HALT decision contract:
|
|
623
685
|
|
|
624
|
-
- **IF** `is_error: true` AND staged files exist in the working tree (`git diff --cached --name-only` non-empty) AND any iter-authored bats fixtures pass → the orchestrator MAY apply the documented **4-step salvage path**: (1) run the iter's bats as a structural sanity check; (2) inspect the changeset + diffs for quality; (3) commit the staged work from the orchestrator main turn with explicit iter-attribution in the message (e.g. "iter hit API stream timeout before commit — committed staged work from orchestrator main turn"); (4) **the commit gate fires fresh** on the salvage commit, so architect / JTBD / risk-scorer validate the work cleanly on the orchestrator's own SESSION_ID (never reusing the dead subprocess's gate markers, per ADR-009 line 89). The salvage commit IS the iteration's one commit per ADR-014 (amend-folding is inapplicable — no iter commit exists to amend).
|
|
625
|
-
- **
|
|
686
|
+
- **SALVAGE branch (P261 — stream-timeout class).** **IF** `is_error: true` AND staged files exist in the working tree (`git diff --cached --name-only` non-empty) AND any iter-authored bats fixtures pass → the orchestrator MAY apply the documented **4-step salvage path**: (1) run the iter's bats as a structural sanity check; (2) inspect the changeset + diffs for quality; (3) commit the staged work from the orchestrator main turn with explicit iter-attribution in the message (e.g. "iter hit API stream timeout before commit — committed staged work from orchestrator main turn"); (4) **the commit gate fires fresh** on the salvage commit, so architect / JTBD / risk-scorer validate the work cleanly on the orchestrator's own SESSION_ID (never reusing the dead subprocess's gate markers, per ADR-009 line 89). The salvage commit IS the iteration's one commit per ADR-014 (amend-folding is inapplicable — no iter commit exists to amend). Production shape: `API Error: Stream idle timeout - partial response received` in `.result` after staging coherent work but before `git commit` — staged files survive; JSON metadata preserved (unlike the P147 stuck-before-emit class).
|
|
687
|
+
- **HALT branch (P214 — transient-API-error class).** **ELSE IF** `is_error: true` AND nothing staged (`git diff --cached --name-only` empty) → halt the loop with a class-appropriate advisory line in the final summary. The transient-API-error class fires when the API call never landed; `total_cost_usd: 0`; no work was done. Map `.result` substrings to the advisory:
|
|
688
|
+
- `529` / `Overloaded` → `"API overloaded; retry when service recovers"`
|
|
689
|
+
- `429` / `rate limit` → `"API rate-limited; retry when limit window resets"`
|
|
690
|
+
- `401` / `Authentication` / `auth expired` → `"API auth expired; refresh credentials before resuming"`
|
|
691
|
+
- any other `is_error: true` shape → `"transient API error; inspect .result and resume manually"`
|
|
626
692
|
|
|
627
|
-
|
|
693
|
+
Do NOT spawn the next iteration; the loop has no recoverable state to advance from. Retry policy for the transient classes (e.g. exponential backoff on 529 Overloaded, max-N attempts) is deferred to a Phase 2 amendment per P214's Investigation Tasks — Phase 1 is HALT-with-advisory only.
|
|
694
|
+
- **ELSE** (staged work incoherent / bats fail) → halt per the SALVAGE branch's fall-through contract.
|
|
695
|
+
|
|
696
|
+
The decision is deterministic and non-interactive — no `AskUserQuestion` (Rule 6, mirroring the P121 SIGTERM precedent at line 154 of ADR-032). **Distinct classes** within the `is_error: true` taxonomy: P261 SALVAGE (stream-timeout — staged work survives) vs P214 HALT (transient API error — nothing staged). **Distinct from** sibling subprocess-failure classes: P121 (SIGTERM idle-timeout — `is_error: false` clean exit-flush; subprocess HAD committed before going idle), P147 (SIGTERM stuck-before-emit — exit 143 + 0-byte JSON, metadata lost), and P146 (bash-polling antipattern — the deadlock mechanism behind P147). Here the iter exits on its own with `is_error: true`; no SIGTERM involved; metadata survives in the JSON envelope. Full contract: ADR-032 § "is_error:true stream-timeout salvage (P261 amendment)" + § P214 transient-API-error HALT extension. Behavioural fixtures: `test/work-problems-step-5-stream-timeout-salvage.bats` (SALVAGE branch — P261), `test/work-problems-step-5-is-error-transient-halt.bats` (HALT branch — P214).
|
|
628
697
|
|
|
629
698
|
**Quota as the natural stop.** The AFK loop runs until quota is exhausted or a stop-condition from Step 2 fires. There is no per-iteration dollar cap; running iterations until quota is actually exhausted maximises backlog progress per quota cycle. Quota-exhaust on a `claude -p` invocation surfaces as a non-zero exit and the orchestrator halts cleanly per the rule above.
|
|
630
699
|
|
|
@@ -742,9 +811,38 @@ After the iteration's commit lands but before starting the next iteration, check
|
|
|
742
811
|
1. Run `npm run push:watch` (push + wait for CI to pass).
|
|
743
812
|
2. If `.changeset/` is non-empty after push, run `npm run release:watch` (merge the release PR + wait for npm publish).
|
|
744
813
|
3. Resume the loop only after the release lands on npm.
|
|
745
|
-
4. **Post-release
|
|
814
|
+
4. **Post-release K→V auto-transition (P228)**: if step 2 actually ran AND succeeded (a release shipped to npm), fire the K→V auto-transition callback for `.known-error.md` tickets whose Release-vehicle citation matches a just-shipped changeset. See the **Post-release K→V auto-transition** subsection below for the full contract.
|
|
815
|
+
5. **Post-release cache refresh (P233)**: if step 2 actually ran AND succeeded (a release shipped to npm), chain `/install-updates` to refresh the plugin cache before the next iter dispatches. Skipped when step 2 was a no-op (empty `.changeset/` after push; no new plugin version exists). See the **Post-release cache refresh** subsection below for the full contract.
|
|
816
|
+
|
|
817
|
+
**Post-release K→V auto-transition (P228) — fires only after within-appetite Drain action step 2 (release:watch) succeeded:**
|
|
818
|
+
|
|
819
|
+
ADR-022 prescribes that Known Error tickets transition to Verification Pending on release, but until P228 there was no auto-fire surface to back-fill the transition once a fix ships. Iter subprocesses MUST NOT release (the orchestrator owns Step 6.5 per the iter dispatch constraints), so a fix that lands in iter N stays in `.known-error.md` until the orchestrator drains release in Step 6.5 — and prior to this callback, the K→V transition was silently deferred to "the next session" citing a misapplied P143 amendment. The 2026-06-08 P220 empirical witness — `## Fix Released` populated with no K→V transition — confirmed the gap.
|
|
820
|
+
|
|
821
|
+
**Mechanism:**
|
|
822
|
+
|
|
823
|
+
1. Invoke `wr-itil-enumerate-postrelease-kv-candidates` (ADR-049 PATH shim resolving to `packages/itil/scripts/run-enumerate-postrelease-kv-candidates.sh` / `packages/itil/lib/enumerate-postrelease-kv-candidates.sh`). The helper walks `docs/problems/known-error/*.md`, invokes `wr-itil-derive-release-vehicle <NNN>` per ticket, and emits one `KV_CANDIDATE: P<NNN> | <changeset>` line per ticket whose changeset has been shipped (derive exit 0). Tickets with no `**Release vehicle**: .changeset/<name>.md` reference (derive exit 2 — legacy pre-P330) and tickets whose changeset is still in the working tree (derive exit 3 — unreleased) are skipped silently. Final line: `KV_CANDIDATES_SUMMARY: total=<N>`.
|
|
824
|
+
2. Parse `KV_CANDIDATE:` lines from stdout.
|
|
825
|
+
3. For each candidate `P<NNN>`, dispatch `/wr-itil:transition-problem <NNN> verifying` via the Skill tool. The dispatched transition-problem skill is the authoritative executor for K→V per ADR-010 amended "Split-skill execution ownership" (P093) — orchestrator dispatch is the documented forwarder pattern, NOT a round-trip. The dispatched skill rides its OWN ADR-014 commit through architect / JTBD / risk-scorer gates per its existing Step 8 contract (rename + Status edit + `## Fix Released` write + README refresh + commit). The orchestrator does NOT re-implement the transition mechanics; it dispatches and reads the outcome.
|
|
826
|
+
4. After all candidates dispatched: emit one per-ticket transition outcome line to the iter summary in the form `K→V: P<NNN> | commit=<sha> | release=<vehicle>` (read from the dispatched transition-problem's `RELEASE_VEHICLE` block or Report-the-outcome stdout per Step 9 of transition-problem).
|
|
827
|
+
5. Push the resulting K→V commits via `git push` (the release itself has already shipped — these are post-release audit-trail commits and do NOT require a second release:watch round-trip).
|
|
828
|
+
|
|
829
|
+
**Conditional on actual release**: only fires when `release:watch` actually published (step 2 of the Drain action above ran AND returned success). Skipped when `push:watch` ran alone (empty `.changeset/`; no new plugin version). Without this guard, the enumerator would scan `.known-error/` on every iter with no shipped changeset to match — wasted reads.
|
|
830
|
+
|
|
831
|
+
**Non-blocking on individual transition failure**: if a dispatched `/wr-itil:transition-problem` fails (pre-flight reject, gate rejection, P057 staging trap, derive helper transient error), the orchestrator logs the failure for that ticket and continues to the next candidate. A single transition failure MUST NOT halt the loop or block siblings in the same cohort. Persistent failures across multiple iters surface as accumulated `outstanding_questions` entries per the standard Step 2.5b discipline.
|
|
832
|
+
|
|
833
|
+
**Policy authorisation (ADR-013 Rule 5)**: rides the same Rule 5 silent-proceed that already covers `push:watch` / `release:watch` / `/install-updates` in the drain — the K→V auto-transition is mechanically downstream of release and shares its authorisation. The derive-helper-citation match against the just-shipped changeset is deterministic (filename equality), not a judgment call — squarely in the safe-default tier per JTBD-006 "Decisions that would normally require my input are resolved using safe defaults".
|
|
834
|
+
|
|
835
|
+
**Mid-loop ask discipline (P130) preserved**: the dispatched transition-problem skill is wired to skip `AskUserQuestion` when invoked under AFK orchestrator context per its own ADR-013 Rule 6 fail-safe (transition-problem SKILL.md Step 8 risk-above-appetite branch). The orchestrator MUST NOT introduce any `AskUserQuestion` call at the callback site — the per-candidate routing is framework-resolved per ADR-044, and the callback fires in a mechanical-stage transition between drain step 2 and step 5 (cache refresh).
|
|
836
|
+
|
|
837
|
+
**V→C remains the maintainer's surface (persona constraint per JTBD-006)**: this callback fires ONLY for K→V (`known-error → verifying` — "fix released, awaiting verification"). It explicitly does NOT auto-fire V→C — the maintainer's judgment-reserved "fix actually works" closure remains untouched and continues to require their return per the existing transition-problem Step 4 `Verification Pending → Closed` precondition ("the user has explicitly confirmed the fix works in production").
|
|
838
|
+
|
|
839
|
+
**Composition with the Above-appetite branch (below)**: the K→V callback is anchored to the within-appetite Drain action step 4 — it does NOT fire after the above-appetite Rule 5 halt (no release shipped → nothing to match) and it does NOT fire mid-loop in the above-appetite auto-apply loop. When the auto-apply loop converges and re-enters the within-appetite Drain action, the K→V callback fires there per step 4.
|
|
840
|
+
|
|
841
|
+
**Composition with Cohort-graduation pre-check (P246)**: the cohort-graduation pre-check (step 2a above) fires BEFORE the Drain action; its `git mv` operations from `docs/changesets-holding/` to `.changeset/` happen BEFORE release:watch and ship as part of the same release. The K→V callback fires AFTER release:watch and consumes the just-shipped changeset set — so graduated cohorts that ship in the same release are correctly matched by the enumerator (the deleted-from-tree changeset has the graduated basename; the K-ticket's `**Release vehicle**: .changeset/<basename>.md` reference matches).
|
|
842
|
+
|
|
843
|
+
Per ADR-022 (Verifying lifecycle) + ADR-018 (release-cadence host) + ADR-010 amended P093 (transition-problem authoritative executor) + ADR-014 (per-transition commit grain) + ADR-013 Rule 5 (policy-authorised silent-proceed) + ADR-044 (framework-resolution boundary) + P228 (this ticket) + P233 (sibling callback) + P267 (derive-release-vehicle composed helper) + P330 (Release vehicle seed reference — input signal).
|
|
746
844
|
|
|
747
|
-
**Post-release cache refresh (P233) — fires only after within-appetite Drain action step
|
|
845
|
+
**Post-release cache refresh (P233) — fires only after within-appetite Drain action step 5 (above):**
|
|
748
846
|
|
|
749
847
|
After a successful release-cadence drain has shipped a new plugin version to npm, the orchestrator chains `/install-updates` to refresh the plugin cache before the next iter dispatches. Empirical evidence in `docs/briefing/afk-subprocess.md` ("Just-shipped gate-class hooks DON'T protect the immediate-next iter" entry) confirms iter subprocesses re-resolve plugin cache on spawn — so a just-shipped gate-class hook is inactive in the next iter unless the cache is refreshed first. The orchestrator IS the "restart" boundary for the next iter subprocess (each subprocess is a fresh `claude -p` per ADR-032 + `afk-subprocess-mechanics.md`); the cache refresh between release:watch and next-iter dispatch is the load-bearing step.
|
|
750
848
|
|
|
@@ -753,7 +851,7 @@ After a successful release-cadence drain has shipped a new plugin version to npm
|
|
|
753
851
|
- **Policy authorisation (ADR-013 Rule 5)**: rides the same Rule 5 silent-proceed that already covers `push:watch` / `release:watch` in the drain — the post-release cache refresh is mechanically downstream of release and shares its authorisation. Composes with P106's claude-plugin-install no-op-when-already-installed factor (the chained `/install-updates` handles the uninstall+install dance per P106).
|
|
754
852
|
- **Mid-loop ask discipline (P130) preserved**: if `/install-updates` Step 5b/5c consent gate fires (cache miss / scope delta / `INSTALL_UPDATES_RECONFIRM=1`), the orchestrator main turn treats this AS the **Non-interactive fallback** documented in `scripts/repo-local-skills/install-updates/SKILL.md` "Non-interactive fallback" subsection — log the dry-run output, do not interrupt the loop. The orchestrator's `.claude/.install-updates-consent` is normally present (install-updates Step 5a cache hit) so the gate fires silently. **ADR-044 framework-resolution boundary** authorises this AskUserQuestion-available-but-forbidden routing: invocation between iters is a mechanical-stage transition the framework has resolved; surfacing it to the user would dilute the Step 2.5b accumulated-question discipline.
|
|
755
853
|
|
|
756
|
-
**Composition with the Above-appetite branch (below)**: the cache refresh is anchored to the within-appetite Drain action step
|
|
854
|
+
**Composition with the Above-appetite branch (below)**: the cache refresh is anchored to the within-appetite Drain action step 5 — it does NOT fire after the above-appetite Rule 5 halt (no release shipped → nothing to refresh) and it does NOT fire mid-loop in the above-appetite auto-apply loop. When the auto-apply loop converges and re-enters the within-appetite Drain action, the cache refresh fires there per step 5. The chain's site is the Drain action only.
|
|
757
855
|
|
|
758
856
|
**Failure handling (P140)**: When `push:watch` or `release:watch` reports a CI failure or publish failure, the orchestrator follows a diagnose-then-classify routing — fix-and-continue for the documented mechanically-fixable allow-list, halt for everything else. The previous uniform halt rule converted mechanically-fixable failures (1-line stale-grep-string updates, transient flakes) into ~45min queue stalls, regressing JTBD-006 "Progress the Backlog While I'm Away" without any governance benefit.
|
|
759
857
|
|
|
@@ -838,7 +936,7 @@ Before spawning the next iteration's subagent, verify the working tree state aga
|
|
|
838
936
|
|---|---|---|
|
|
839
937
|
| Clean (empty output) | The subagent committed successfully (the default happy path) | Proceed to Step 7 |
|
|
840
938
|
| Dirty for a known reason | A deliberate hand-off to the next iteration (e.g. the subagent chose to skip the commit and report "uncommitted state" because risk was above appetite — per the Non-Interactive Decision Making table above). Reason MUST be stated in the iteration report. | Include the dirty state in the next iteration's subagent context and proceed to Step 7 |
|
|
841
|
-
| Dirty for an unknown reason | Neither of the above — the subagent reported success but the tree is not clean, or the tree is dirty without a documented reason in the iteration report | **Halt the loop.** Report the `git status --porcelain` output, the last subagent's reported outcome, and the divergence. Do NOT spawn the next iteration. |
|
|
939
|
+
| Dirty for an unknown reason | Neither of the above — the subagent reported success but the tree is not clean, or the tree is dirty without a documented reason in the iteration report. **P212 case (no longer a hand-off)**: dirty `docs/BRIEFING.md` / `docs/briefing/*.md` at iter exit is a bug class — Step 5 retro-on-exit clause #4 now requires the iter to commit retro's BRIEFING edits as `chore(briefing): refresh from iter retro (P<NNN>)` before emitting `ITERATION_SUMMARY`. A dirty BRIEFING-at-iter-exit means the iter's retro-on-exit clause did not run to completion (retro hook failure, scoring failure, commit-gate rejection) and the orchestrator must NOT silently absorb it via a main-turn hand-off commit. | **Halt the loop.** Report the `git status --porcelain` output, the last subagent's reported outcome, and the divergence. Do NOT spawn the next iteration. |
|
|
842
940
|
|
|
843
941
|
**Rationale**: the orchestrator previously treated the subagent's reported outcome as truth. Any lie, partial write, or silent failure in the subagent propagated into the summary. The `git status --porcelain` check is the cheapest possible independent verification — policy-authorised, no network, no judgement required — and it catches exactly the class of failure the subagent cannot self-report.
|
|
844
942
|
|
|
@@ -881,11 +979,13 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
|
|
|
881
979
|
| Cohort-graduation pre-check fires before Drain action (within-appetite branch, `docs/changesets-holding/` non-empty) — evaluator returns `status=resolved` | Route to Rule 4 evidence-floor judgement (LLM-owned per ADR-061 Rule 4 + ADR-044 framework-resolution boundary). Evaluator's `status=resolved` is necessary-but-not-sufficient (P308 — evaluator script disclaims Rule 4 at lines 19-22). Interactive: per-held-entry `AskUserQuestion` with inline evidence summary (P350 brief-before-ID) + 3 options (Graduate / Defer / Reject). AFK: queue per-held-entry `outstanding_question` to `.afk-run-state/outstanding-questions.jsonl` (P352 / ADR-013 Rule 6 queue-and-continue universal default) — do NOT graduate, continue Drain for any pre-existing `.changeset/` entries. On Graduate verdict: `git mv docs/changesets-holding/<basename> .changeset/<basename>`, append README "Recently reinstated" entry citing the user's Rule 4 verdict, amend the iter's main commit per ADR-042 Rule 3. For class=3b cohorts, all cohort members graduate atomically on any-member Graduate verdict (Rule 3b cohort propagation); any Defer/Reject keeps entire cohort held. Per ADR-061 Rule 4 + Rule 5 + Rule 6 + Rule 7 + ADR-013 Rule 6 + P246 + P308 + P350 + P352 (Step 6.5 Cohort-graduation pre-check; step 2a Rule 4 evidence-floor judgement). Graduation criterion is evidence-of-working-as-desired (Rule 4 evidence floor), not elapsed wall-clock time — user direction 2026-05-17: "Dogfooding makes sense, but it shouldn't be time based, it should be until we are happy that it's working as desired." |
|
|
882
980
|
| Cohort-graduation pre-check — evaluator returns `status=vp-blocked` | Skip. Per ADR-061 Rule 2 Verification Pending carve-out (symmetric to ADR-042 Rule 2b). Do NOT graduate; held entry stays. `.verifying.md` → `.closed.md` transition auto-clears the carve-out at a later pass. Per ADR-061 Rule 2 + P246. |
|
|
883
981
|
| Cohort-graduation pre-check — evaluator returns `status=halt-no-resolution` | Halt at the framework-prescribed "Step 6.5 cohort-graduation halt-no-resolution" halt point. Per ADR-061 Rule 1a terminal: ambiguous join is a user-decision surface, not an agent-decision surface. Halt-with-batched-questions per the Step 2.5b cross-reference. Per ADR-061 Rule 1a + P246. |
|
|
982
|
+
| Post-release K→V auto-transition between iters (P228) | After a successful within-appetite Drain action shipped a release to npm, invoke `wr-itil-enumerate-postrelease-kv-candidates` to enumerate `.known-error.md` tickets whose `**Release vehicle**: .changeset/<name>.md` citation matches a just-shipped (deleted-from-tree) changeset, and dispatch `/wr-itil:transition-problem <NNN> verifying` per emitted `KV_CANDIDATE` line. Conditional on actual release (skipped when `push:watch` ran alone with no changeset); non-blocking on individual transition failure (logs per-ticket, continues to next candidate; persistent failures route to Step 2.5b accumulated questions). V→C remains a maintainer-only surface — this callback fires K→V only. Per ADR-022 + ADR-018 + ADR-010 amended P093 + ADR-014 + ADR-013 Rule 5 + ADR-044 + P228 + P233 + P267 + P330 (Step 6.5 Post-release K→V auto-transition subsection). |
|
|
884
983
|
| Post-release plugin cache refresh between iters (P233) | After a successful within-appetite Drain action shipped a release to npm, chain `/install-updates` to refresh the plugin cache before the next iter dispatches. Conditional on actual release (skipped when `push:watch` ran alone with no changeset); non-blocking on `/install-updates` failure (degrades to cache-stays-stale, equivalent to pre-amendment behaviour). Mid-loop ask discipline preserved by treating any `/install-updates` AskUserQuestion surface AS the Non-interactive fallback dry-run path. Per ADR-013 Rule 5 + ADR-044 + P130 + P106 + P233 (Step 6.5 Post-release cache refresh subsection). |
|
|
885
984
|
| CI failure during Step 6.5 drain (within-appetite branch) | Diagnose via `gh run view --log-failed`, classify against the closed fixable-in-iter allow-list (P081-class stale-grep-string, hook stub mismatch, test ID drift, environmental flake), fix-and-continue for fixable classes (each retry rides its own ADR-014 commit gate), 3-retry cap per iteration, halt for unrecoverable classes. Ambiguous classification defaults to halt. ADR-013 Rule 5 policy-authorised. Per ADR-026 grounding + ADR-044 framework-resolution boundary + P140 (Step 6.5 Failure handling). |
|
|
886
985
|
| Pipeline risk above appetite (push or release >= 5/25) | Auto-apply scorer remediations incrementally (ADR-042 Rule 2). The agent reads suggestions and decides what to do. Re-score after each apply; drain when within appetite. **Never release above appetite** (ADR-042 Rule 1) — no AskUserQuestion shortcut. Halt the loop with `outcome: halted-above-appetite` if the loop exhausts without convergence (ADR-042 Rule 5). Verification Pending commits excluded from auto-revert (Rule 2b). Per ADR-042 (Step 6.5 Above-appetite branch). |
|
|
887
|
-
| Origin diverged before start | Pull `--ff-only` if trivial; stop with
|
|
888
|
-
|
|
|
986
|
+
| Origin diverged before start (Branch 1) | Pull `--ff-only` if trivial; route to Branch 3 (stop with `git log HEAD..origin/<base>` and reverse report) if non-fast-forward — per ADR-019 (Step 0 Branch 1 / Branch 3). |
|
|
987
|
+
| Pre-existing uncommitted work attributable to prior iter's in-flight flow (Branch 2 — DEFERRED) | Per ADR-019 Branch 2 (currently routes → Branch 3 until follow-up lands the auto-commit mechanism + JTBD-001 gate composition + bats). Auto-commit criteria when shipped: (a) provenance unambiguous AND (b) risk within appetite per ADR-018. Commit subject convention: `chore(preflight): recover prior-session in-flight work — <ticket-ref>` (JTBD-006 audit trail). |
|
|
988
|
+
| Prior-session partial work detected at start (Branch 3 detection — session-continuity dirty: untracked `docs/decisions/*.proposed.md` / `docs/problems/*.md`, `.afk-run-state/iter-*.json` with `is_error: true` or `api_error_status >= 400`, stale `.claude/worktrees/*`, uncommitted SKILL.md/source/ADR edits) | Halt the loop with a structured Prior-Session State report in the AFK summary — deliberate carve-out from the 2026-06-06 Rule 6 queue-and-continue default (ambiguous state would mask the bug this preflight surfaces). Do NOT attempt non-interactive resume. Interactive invocations prompt via `AskUserQuestion` with 4 options (resume / discard / leave-and-lower-priority / halt). Per P109 + ADR-013 Rule 6 + ADR-019 (Step 0 Branch 3 detection mechanism). |
|
|
889
989
|
| Fix verification needed | Skip problem, add to "needs verification" list |
|
|
890
990
|
| Stop-condition #2 with user-answerable skip-reasons | Default: call AskUserQuestion (batched, ≤4 per call, sequential when >4) — the orchestrator's main turn is interactive by construction per ADR-032 subprocess-boundary; user is presumed at the keyboard. Fallback: emit Outstanding Design Questions table when AskUserQuestion is unavailable (Rule 6 fail-safe). Per ADR-013 Rule 1 + P122 (Step 2.5). |
|
|
891
991
|
| Pre-`ALL_DONE` gate sequence at any loop end (every stop-condition + every halt-path that emits a final summary + quota-exhaustion natural end) | Run Step 2.4 sequence UNCONDITIONALLY before `ALL_DONE` emit: gate (a) outstanding-questions surface via Step 2.5b; gate (b) session-level retro via `/wr-retrospective:run-retro`; gate (c) emit `ALL_DONE` only after (a) AND (b) complete. Hard-fail mode: if either gate cannot complete cleanly, halt with directive instead of emit `ALL_DONE` — recovery is the user satisfying the gate and re-invoking the skill. Per ADR-044 framework-resolution boundary + ADR-013 + ADR-014 (retro commits its own work) + P086 (extends iter-level retro to orchestrator-level) + P341 (Step 2.4). |
|
|
@@ -1023,6 +1123,7 @@ When every skipped ticket is in the `upstream-blocked` category (stop-condition
|
|
|
1023
1123
|
- **ADR-022** (`docs/decisions/022-problem-verification-pending.proposed.md`) — iteration outcomes map into the return-summary's `outcome` field (`verifying` for a released fix, `known-error` for a root-cause-confirmed ticket awaiting release, etc.).
|
|
1024
1124
|
- **ADR-032** (`docs/decisions/032-governance-skill-invocation-patterns.proposed.md`) — pattern taxonomy parent; Step 5 implements the AFK iteration-isolation wrapper — subprocess-boundary variant per the P084 amendment (2026-04-21), refining the P077 Agent-tool amendment. The P077 amendment remains in the ADR as the historical Agent-tool variant; the subprocess variant is the lead for new adopters.
|
|
1025
1125
|
- **ADR-037** (`docs/decisions/037-skill-testing-strategy.proposed.md`) — doc-lint bats contract-assertion pattern used by `test/work-problems-step-5-delegation.bats`.
|
|
1126
|
+
- **P211** (`docs/problems/known-error/211-work-problems-orchestrator-carries-prior-ticket-fix-strategy-text-into-iter-dispatch-without-re-grounding.md`) — driver for Step 5 iteration-prompt-body's "Re-ground per iter" orchestrator-side construction invariant. The bug shape (reported as inbound from downstream consumer bbstats as their P194): the orchestrator builds each iter's dispatch prompt by reading the target ticket's `## Fix Strategy` section and citing it verbatim into the subprocess prompt; across iterations, prior-ticket Fix Strategy text leaks into subsequent dispatches without re-grounding in the new ticket's design intent, and iters land fixes anchored on the wrong design rationale. Fix: SKILL.md Step 5's "Iteration prompt body" section now carries an explicit re-grounding paragraph (immediately after the "self-contained" opener) that (a) names the per-iter re-ground invariant against current-ticket-ID + title only, (b) forbids inlining `## Fix Strategy` verbatim into the dispatch prompt (the subprocess reads it from disk via `/wr-itil:manage-problem`), (c) names the cross-iter leakage class (prior ticket ID, prior Fix Strategy text, prior outcome reason, prior commit SHA, prior retro findings, prior outstanding-questions), (d) names the construction shape (template-driven, reset per iter, no global accumulator). Behavioural second-source: `test/work-problems-step-5-prompt-body-re-grounding.bats` (structural-permitted per ADR-052 Surface 2; tdd-review comment in fixture cites P012 as harness-gap). Composes with P084 (subprocess-boundary isolation — re-grounding is the symmetric orchestrator-side property of the subprocess's "no prior conversation context"), ADR-032 (AFK iteration-isolation wrapper — re-grounding clarifies the wrapper's isolation intent on the orchestrator side), JTBD-006 (load-bearing — audit trail degrades if iters work the wrong ticket's design rationale).
|
|
1026
1127
|
- **P206** (`docs/problems/known-error/206-work-problems-iter-workers-dont-add-changesets-fix-commits-accumulate-without-release.md`) — driver for Step 5 iter-prompt-body's explicit "if the fix changes shippable code, author a `.changeset/*.md` in the same commit" constraint (composes defence-in-depth with hook P141's `git commit`-time enforcement). Inbound-reported by downstream consumer **bbstats** as their P195 (`**Origin**: inbound-reported (bbstats#195)` per ADR-076 sort tier). Behavioural second-source: `test/work-problems-step-5-iter-changeset-required.bats` (structural-permitted per ADR-052; tdd-review comment in fixture).
|
|
1027
1128
|
- **P141** (`docs/problems/verifying/141-iter-prompt-time-reminder-misses-40-percent-of-publishable-iters-hook-level-enforcement.md`) — sibling hook (`packages/itil/hooks/itil-changeset-discipline.sh`) that enforces the changeset-discipline rule at `git commit` time. The Step 5 iter-prompt-body constraint composes-with this hook; the prompt-time rule is load-bearing because plugin-hook execution depends on the marketplace cache carrying the current hook version (a fresh-cache adopter without P141 still gets the constraint via the prompt).
|
|
1028
1129
|
- **JTBD-001**, **JTBD-006**, **JTBD-007**, **JTBD-101**, **JTBD-201** — personas whose reliability expectations the iteration-isolation wrapper restores. JTBD-006 (Progress the Backlog While I'm Away) + JTBD-007 (Keep Plugins Current Across Projects) are the load-bearing pair for the P206 changeset-discipline constraint — JTBD-006 requires the audit trail to stay accurate at release boundary; JTBD-007's closure depends on fixes actually shipping to npm.
|