@windyroad/itil 0.47.12 → 0.47.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Step 0d behavioural fixture per P220 + ADR-062 § JTBD-006 driver:
4
+ # work-problems pre-flights /wr-itil:check-upstream-responses when the
5
+ # outbound-responses cache is stale or missing AND there exist local
6
+ # tickets carrying `## Reported Upstream` back-link sections. The
7
+ # staleness decision lives in
8
+ # `packages/itil/lib/check-outbound-responses-staleness.sh::should_promote_outbound_responses_preflight`
9
+ # so the SKILL.md Step 0d prose is a thin source-and-call wrapper
10
+ # around a behaviorally-testable shell function (P081 / user feedback:
11
+ # prefer behavioural over structural-grep tests).
12
+ #
13
+ # Cases covered (symmetric to Step 0b cases plus the back-link discovery
14
+ # axis that replaces channels-config):
15
+ # 1. No tickets with `## Reported Upstream` section → "no-back-link-tickets"
16
+ # (downstream-adopter non-obligation; analogue to no-channels-config).
17
+ # 2. Back-link ticket present, cache absent → "first-run-cache-absent".
18
+ # 3. Back-link ticket present, cache present, last_checked null → "first-run-last-checked-null".
19
+ # 4. Back-link ticket present, cache fresh within TTL → "fresh-within-ttl".
20
+ # 5. Back-link ticket present, cache older than TTL → "ttl-expiry" (with age + ttl in the reason).
21
+ # 6. Custom ttl_seconds in cache honored (not hardcoded default).
22
+ # 7. Missing ttl_seconds field defaults to 86400 (24h symmetric with inbound).
23
+
24
+ setup() {
25
+ REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../../.." && pwd)"
26
+ HELPER="$REPO_ROOT/packages/itil/lib/check-outbound-responses-staleness.sh"
27
+
28
+ FIXTURE="$(mktemp -d)"
29
+ mkdir -p "$FIXTURE/docs/problems"
30
+ }
31
+
32
+ teardown() {
33
+ rm -rf "$FIXTURE"
34
+ }
35
+
36
+ # Helper: write a back-link ticket fixture under docs/problems/.
37
+ _write_backlink_ticket() {
38
+ local ticket_path="$1"
39
+ cat > "$ticket_path" <<'EOF'
40
+ # Problem 999: example back-link fixture
41
+
42
+ **Status**: Open
43
+
44
+ ## Description
45
+
46
+ Fixture for Step 0d behavioural test.
47
+
48
+ ## Reported Upstream
49
+
50
+ - **Repo**: example/upstream
51
+ - **URL**: https://github.com/example/upstream/issues/999
52
+ - **Filed**: 2026-06-08
53
+ EOF
54
+ }
55
+
56
+ @test "helper exists at the contracted path" {
57
+ [ -f "$HELPER" ]
58
+ }
59
+
60
+ @test "case 1: no back-link tickets → no-back-link-tickets" {
61
+ # shellcheck disable=SC1090
62
+ source "$HELPER"
63
+ run should_promote_outbound_responses_preflight "$FIXTURE"
64
+ [ "$status" -eq 0 ]
65
+ [ "$output" = "no-back-link-tickets" ]
66
+ }
67
+
68
+ @test "case 1b: tickets without ## Reported Upstream section → no-back-link-tickets" {
69
+ cat > "$FIXTURE/docs/problems/100-no-back-link.open.md" <<'EOF'
70
+ # Problem 100: no upstream link
71
+
72
+ ## Description
73
+
74
+ Local-only ticket.
75
+ EOF
76
+ # shellcheck disable=SC1090
77
+ source "$HELPER"
78
+ run should_promote_outbound_responses_preflight "$FIXTURE"
79
+ [ "$status" -eq 0 ]
80
+ [ "$output" = "no-back-link-tickets" ]
81
+ }
82
+
83
+ @test "case 2: back-link ticket present, cache absent → first-run-cache-absent" {
84
+ _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
85
+ # shellcheck disable=SC1090
86
+ source "$HELPER"
87
+ run should_promote_outbound_responses_preflight "$FIXTURE"
88
+ [ "$status" -eq 0 ]
89
+ [ "$output" = "first-run-cache-absent" ]
90
+ }
91
+
92
+ @test "case 2b: back-link in per-state subdir layout (RFC-002) is discovered" {
93
+ mkdir -p "$FIXTURE/docs/problems/known-error"
94
+ _write_backlink_ticket "$FIXTURE/docs/problems/known-error/220-cadence-gap.md"
95
+ # shellcheck disable=SC1090
96
+ source "$HELPER"
97
+ run should_promote_outbound_responses_preflight "$FIXTURE"
98
+ [ "$status" -eq 0 ]
99
+ [ "$output" = "first-run-cache-absent" ]
100
+ }
101
+
102
+ @test "case 3: cache present, last_checked null → first-run-last-checked-null" {
103
+ _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
104
+ cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<'EOF'
105
+ { "last_checked": null, "tickets": {} }
106
+ EOF
107
+ # shellcheck disable=SC1090
108
+ source "$HELPER"
109
+ run should_promote_outbound_responses_preflight "$FIXTURE"
110
+ [ "$status" -eq 0 ]
111
+ [ "$output" = "first-run-last-checked-null" ]
112
+ }
113
+
114
+ @test "case 4: cache fresh within TTL → fresh-within-ttl (silent-pass)" {
115
+ _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
116
+ # last_checked 1 hour ago — well within 24h default TTL.
117
+ local recent_iso
118
+ recent_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
119
+ cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
120
+ { "last_checked": "$recent_iso", "tickets": {} }
121
+ EOF
122
+ # shellcheck disable=SC1090
123
+ source "$HELPER"
124
+ run should_promote_outbound_responses_preflight "$FIXTURE"
125
+ [ "$status" -eq 0 ]
126
+ [ "$output" = "fresh-within-ttl" ]
127
+ }
128
+
129
+ @test "case 5: cache older than TTL → ttl-expiry with age + ttl in the reason" {
130
+ _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
131
+ # last_checked 2 days ago — past 24h default TTL.
132
+ local stale_iso
133
+ stale_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
134
+ cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
135
+ { "last_checked": "$stale_iso", "tickets": {} }
136
+ EOF
137
+ # shellcheck disable=SC1090
138
+ source "$HELPER"
139
+ run should_promote_outbound_responses_preflight "$FIXTURE"
140
+ [ "$status" -eq 0 ]
141
+ # Format: "ttl-expiry age=<N>s ttl=<M>s"
142
+ [[ "$output" =~ ^ttl-expiry\ age=[0-9]+s\ ttl=86400s$ ]]
143
+ }
144
+
145
+ @test "case 6: custom ttl_seconds in cache is honored (not hardcoded default)" {
146
+ # 1-hour TTL; last_checked 90 minutes ago → stale under the custom TTL,
147
+ # but would be FRESH under the 86400s default. Confirms the helper reads
148
+ # ttl_seconds from cache rather than hardcoding 86400.
149
+ _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
150
+ local mid_iso
151
+ mid_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=90)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
152
+ cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
153
+ { "last_checked": "$mid_iso", "tickets": {}, "ttl_seconds": 3600 }
154
+ EOF
155
+ # shellcheck disable=SC1090
156
+ source "$HELPER"
157
+ run should_promote_outbound_responses_preflight "$FIXTURE"
158
+ [ "$status" -eq 0 ]
159
+ [[ "$output" =~ ^ttl-expiry\ age=[0-9]+s\ ttl=3600s$ ]]
160
+ }
161
+
162
+ @test "case 7: missing ttl_seconds defaults to 86400 (symmetric with inbound)" {
163
+ _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
164
+ local recent_iso
165
+ recent_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
166
+ cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
167
+ { "last_checked": "$recent_iso", "tickets": {} }
168
+ EOF
169
+ # shellcheck disable=SC1090
170
+ source "$HELPER"
171
+ run should_promote_outbound_responses_preflight "$FIXTURE"
172
+ [ "$status" -eq 0 ]
173
+ [ "$output" = "fresh-within-ttl" ]
174
+ }
@@ -0,0 +1,278 @@
1
+ #!/usr/bin/env bats
2
+ # tdd-review: structural-permitted (justification: the doc-lint slice below
3
+ # asserts SKILL.md / ADR-032 prose contract — SKILL.md is the contract
4
+ # document per ADR-037 Permitted Exception; these guards catch prose drift
5
+ # away from the behavioural HALT-with-advisory contract exercised above. The
6
+ # load-bearing core of this fixture is behavioural per ADR-052. harness-gap P012)
7
+ #
8
+ # Behavioural test: work-problems Step 5 exit-code semantics — the
9
+ # is_error:true TRANSIENT-API-ERROR HALT branch (P214). When an iter
10
+ # subprocess returns `is_error: true` with `total_cost_usd: 0` AND no staged
11
+ # work in the tree (the 529 Overloaded / 429 rate-limit / 401 auth-expired
12
+ # shape — the API call never landed; nothing was done; metadata records the
13
+ # failure), the orchestrator MUST halt the loop with a class-appropriate
14
+ # advisory line in the final summary — NOT silently treat exit-0 as success
15
+ # and try to parse a missing ITERATION_SUMMARY block.
16
+ #
17
+ # This is the HALT counterpart to the existing P261 SALVAGE branch (covered
18
+ # by work-problems-step-5-stream-timeout-salvage.bats):
19
+ # - SALVAGE: is_error:true + staged work + bats green (stream-timeout class)
20
+ # - HALT: is_error:true + nothing staged (transient-API-error class — P214)
21
+ # Both branches require the orchestrator to read `is_error` BEFORE the
22
+ # Exit-0 → parse-ITERATION_SUMMARY path; without the explicit check-order
23
+ # the loop silently miscounts and may spawn further subprocesses that fail
24
+ # identically (the AFK-promise-breaking shape P214 reports).
25
+ #
26
+ # The fake-shim below re-creates the production 529 Overloaded shape:
27
+ # is_error:true, total_cost_usd:0, no staged work, .result carrying the
28
+ # upstream error string. The harness re-implements the orchestrator's
29
+ # ordered-check decision contract (faithful to SKILL.md Step 5) and asserts
30
+ # the HALT routing + class-appropriate advisory for each transient class.
31
+ #
32
+ # @problem P214
33
+ # @jtbd JTBD-006
34
+ #
35
+ # Cross-reference:
36
+ # P214 (work-problems Step 5 exit-code rule doesn't handle is_error:true
37
+ # transient API failures) — driver ticket
38
+ # P261 (is_error:true stream-timeout salvage carve-out) — sibling SALVAGE
39
+ # branch; this fixture covers the HALT counterpart
40
+ # ADR-032 (governance skill invocation patterns — is_error:true class
41
+ # taxonomy: SALVAGE = stream-timeout; HALT = transient-API-error) — the
42
+ # amended contract this fixture pins
43
+ # ADR-013 Rule 6 (AFK fail-safe — HALT routing is non-interactive; no
44
+ # AskUserQuestion) — invariant honoured
45
+ # ADR-037 / ADR-052 (skill testing strategy — behavioural default; doc-lint
46
+ # contract assertion is the Permitted Exception, marked above)
47
+
48
+ setup() {
49
+ TEST_TMP="$(mktemp -d)"
50
+ FAKE_BIN="${TEST_TMP}/bin"
51
+ mkdir -p "$FAKE_BIN"
52
+
53
+ # Fake `claude` binary simulating the transient-API-error shape: exits 0,
54
+ # emits an is_error:true JSON envelope with total_cost_usd:0 and the
55
+ # transient-class error string in `.result`. No staged work — the API call
56
+ # never landed; nothing was done.
57
+ cat > "$FAKE_BIN/claude" <<'FAKE_EOF'
58
+ #!/usr/bin/env bash
59
+ # Test fake for work-problems Step 5 P214 transient-API-error halt fixture.
60
+ # Emits is_error:true with total_cost_usd:0 and a class-specific .result string.
61
+ # FAKE_ERROR_CLASS selects the transient class: overloaded | rate-limit | auth-expired
62
+ case "${FAKE_ERROR_CLASS:-overloaded}" in
63
+ overloaded)
64
+ RESULT='API Error (529): Overloaded'
65
+ ;;
66
+ rate-limit)
67
+ RESULT='API Error (429): Rate limit exceeded'
68
+ ;;
69
+ auth-expired)
70
+ RESULT='API Error (401): Authentication expired'
71
+ ;;
72
+ *)
73
+ RESULT='API Error: Unknown'
74
+ ;;
75
+ esac
76
+ printf '%s\n' "{\"is_error\":true,\"result\":\"${RESULT}\",\"total_cost_usd\":0,\"duration_ms\":1500,\"usage\":{\"input_tokens\":0,\"output_tokens\":0,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0}}"
77
+ FAKE_EOF
78
+ chmod +x "$FAKE_BIN/claude"
79
+ export PATH="$FAKE_BIN:$PATH"
80
+
81
+ # A throwaway git repo so staged-work detection is real (and empty — no
82
+ # staged work is the load-bearing characteristic of this class).
83
+ REPO="${TEST_TMP}/repo"
84
+ mkdir -p "$REPO"
85
+ git -C "$REPO" init -q
86
+ git -C "$REPO" config user.email "test@example.com"
87
+ git -C "$REPO" config user.name "Test"
88
+ git -C "$REPO" commit -q --allow-empty -m "root"
89
+
90
+ SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
91
+ SKILL_FILE="${SKILL_DIR}/SKILL.md"
92
+ ADR_FILE="$(cd "${SKILL_DIR}/../../../.." && pwd)/docs/decisions/032-governance-skill-invocation-patterns.proposed.md"
93
+ }
94
+
95
+ teardown() {
96
+ if [ -n "${TEST_TMP:-}" ] && [ -d "$TEST_TMP" ]; then
97
+ rm -rf "$TEST_TMP"
98
+ fi
99
+ }
100
+
101
+ # Faithful re-implementation of SKILL.md Step 5's ORDERED-CHECK decision
102
+ # contract (P214 amendment to the P261 carve-out). The orchestrator reads
103
+ # (1) exit code, (2) is_error, (3) ITERATION_SUMMARY — in that order. On
104
+ # is_error:true + nothing staged, emit a class-appropriate advisory.
105
+ ordered_check_decision() {
106
+ local exit_code="$1"
107
+ local json="$2"
108
+ local repo="$3"
109
+
110
+ # (1) Non-zero exit → halt per the exit-code contract.
111
+ if [ "$exit_code" -ne 0 ]; then
112
+ printf 'DECISION=HALT reason=non-zero-exit\n'
113
+ return 0
114
+ fi
115
+
116
+ # (2) Parse is_error BEFORE attempting to parse ITERATION_SUMMARY (the
117
+ # ordered-check rule P214 amends in).
118
+ local is_error result
119
+ is_error=$(printf '%s' "$json" | python3 -c 'import json,sys; print(str(json.load(sys.stdin).get("is_error")).lower())')
120
+ result=$(printf '%s' "$json" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("result",""))')
121
+
122
+ if [ "$is_error" = "true" ]; then
123
+ # is_error:true with staged work → defer to existing P261 SALVAGE branch
124
+ # (covered by sibling fixture work-problems-step-5-stream-timeout-salvage.bats).
125
+ local staged
126
+ staged=$(git -C "$repo" diff --cached --name-only)
127
+ if [ -n "$staged" ]; then
128
+ printf 'DECISION=DEFER_TO_SALVAGE_BRANCH\n'
129
+ return 0
130
+ fi
131
+
132
+ # is_error:true with NO staged work → HALT with class-appropriate advisory.
133
+ local advisory
134
+ case "$result" in
135
+ *"529"*|*"Overloaded"*|*"overloaded"*)
136
+ advisory='API overloaded; retry when service recovers'
137
+ ;;
138
+ *"429"*|*"Rate limit"*|*"rate limit"*|*"rate-limit"*)
139
+ advisory='API rate-limited; retry when limit window resets'
140
+ ;;
141
+ *"401"*|*"Authentication"*|*"auth"*)
142
+ advisory='API auth expired; refresh credentials before resuming'
143
+ ;;
144
+ *)
145
+ advisory='transient API error; inspect .result and resume manually'
146
+ ;;
147
+ esac
148
+ printf 'DECISION=HALT reason=is-error-transient advisory=%s\n' "$advisory"
149
+ return 0
150
+ fi
151
+
152
+ # (3) Exit 0 AND is_error:false → parse ITERATION_SUMMARY.
153
+ printf 'DECISION=PARSE_SUMMARY\n'
154
+ return 0
155
+ }
156
+
157
+ # ---------------------------------------------------------------------------
158
+ # Behavioural cases (the load-bearing core per ADR-052).
159
+ # ---------------------------------------------------------------------------
160
+
161
+ @test "P214: is_error:true + 529 Overloaded + no staged work -> HALT with API-overloaded advisory" {
162
+ export FAKE_ERROR_CLASS=overloaded
163
+ local json
164
+ json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
165
+ run ordered_check_decision 0 "$json" "$REPO"
166
+ [ "$status" -eq 0 ]
167
+ [[ "$output" == *"DECISION=HALT"* ]]
168
+ [[ "$output" == *"reason=is-error-transient"* ]]
169
+ [[ "$output" == *"API overloaded"* ]]
170
+ [[ "$output" == *"retry when service recovers"* ]]
171
+ }
172
+
173
+ @test "P214: is_error:true + 429 rate-limit + no staged work -> HALT with rate-limited advisory" {
174
+ export FAKE_ERROR_CLASS=rate-limit
175
+ local json
176
+ json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
177
+ run ordered_check_decision 0 "$json" "$REPO"
178
+ [ "$status" -eq 0 ]
179
+ [[ "$output" == *"DECISION=HALT"* ]]
180
+ [[ "$output" == *"reason=is-error-transient"* ]]
181
+ [[ "$output" == *"rate-limited"* ]]
182
+ }
183
+
184
+ @test "P214: is_error:true + 401 auth-expired + no staged work -> HALT with refresh-credentials advisory" {
185
+ export FAKE_ERROR_CLASS=auth-expired
186
+ local json
187
+ json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
188
+ run ordered_check_decision 0 "$json" "$REPO"
189
+ [ "$status" -eq 0 ]
190
+ [[ "$output" == *"DECISION=HALT"* ]]
191
+ [[ "$output" == *"reason=is-error-transient"* ]]
192
+ [[ "$output" == *"auth expired"* ]]
193
+ [[ "$output" == *"refresh credentials"* ]]
194
+ }
195
+
196
+ @test "P214: is_error MUST be checked BEFORE ITERATION_SUMMARY parse on Exit 0 (ordered-check invariant)" {
197
+ # The load-bearing P214 invariant: when exit 0 AND is_error:true, the
198
+ # decision is HALT, NOT PARSE_SUMMARY. Without the ordered-check rule the
199
+ # loop would silently route to PARSE_SUMMARY and miss the failure.
200
+ export FAKE_ERROR_CLASS=overloaded
201
+ local json
202
+ json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
203
+ run ordered_check_decision 0 "$json" "$REPO"
204
+ [ "$status" -eq 0 ]
205
+ [[ "$output" != *"DECISION=PARSE_SUMMARY"* ]]
206
+ [[ "$output" == *"DECISION=HALT"* ]]
207
+ }
208
+
209
+ @test "P214: non-zero exit takes precedence over is_error check (HALT routing)" {
210
+ # Non-zero exit halts regardless of is_error value — the exit-code rule
211
+ # is check (1) in the ordered sequence.
212
+ export FAKE_ERROR_CLASS=overloaded
213
+ local json
214
+ json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
215
+ run ordered_check_decision 1 "$json" "$REPO"
216
+ [ "$status" -eq 0 ]
217
+ [[ "$output" == *"DECISION=HALT"* ]]
218
+ [[ "$output" == *"reason=non-zero-exit"* ]]
219
+ }
220
+
221
+ @test "P214: is_error:true + staged work -> defers to existing P261 SALVAGE branch (no double-handling)" {
222
+ # When staged work exists, the transient-API-error HALT branch must NOT
223
+ # fire — it MUST defer to the P261 SALVAGE branch. This guards against
224
+ # the new branch swallowing salvage-eligible work.
225
+ export FAKE_ERROR_CLASS=overloaded
226
+ printf 'salvageable work\n' > "$REPO/salvage-me.txt"
227
+ git -C "$REPO" add salvage-me.txt
228
+ local json
229
+ json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
230
+ run ordered_check_decision 0 "$json" "$REPO"
231
+ [ "$status" -eq 0 ]
232
+ [[ "$output" == *"DECISION=DEFER_TO_SALVAGE_BRANCH"* ]]
233
+ }
234
+
235
+ # ---------------------------------------------------------------------------
236
+ # Doc-lint contract assertions (Permitted Exception per ADR-037; structural
237
+ # slice marked at top of file per ADR-052 Surface 2). These guard the SKILL.md
238
+ # / ADR-032 prose against drift away from the behavioural contract above.
239
+ # ---------------------------------------------------------------------------
240
+
241
+ @test "P214: SKILL.md Step 5 documents the ORDERED check sequence (exit-code, is_error, ITERATION_SUMMARY)" {
242
+ # The ordered-check rule must be explicit in the prose so an implementer
243
+ # reading Step 5 routes is_error:true to HALT before attempting to parse
244
+ # a missing ITERATION_SUMMARY block.
245
+ run grep -niE "(check|read|parse).{0,40}is_error.{0,80}before.{0,80}(ITERATION_SUMMARY|parse|\.result)|ordered check|check.order" "$SKILL_FILE"
246
+ [ "$status" -eq 0 ]
247
+ }
248
+
249
+ @test "P214: SKILL.md Step 5 names the transient-API-error classes (overloaded / rate-limit / auth-expired)" {
250
+ # The HALT advisory must enumerate the known transient classes so the
251
+ # final summary carries an actionable message rather than a generic
252
+ # "loop halted" line.
253
+ run grep -niE "529|Overloaded|overload" "$SKILL_FILE"
254
+ [ "$status" -eq 0 ]
255
+ run grep -niE "429|rate.?limit" "$SKILL_FILE"
256
+ [ "$status" -eq 0 ]
257
+ run grep -niE "401|auth.?expired|auth.*expir" "$SKILL_FILE"
258
+ [ "$status" -eq 0 ]
259
+ }
260
+
261
+ @test "P214: SKILL.md Step 5 cites P214 as the driver of the transient-API-error HALT branch" {
262
+ run grep -nE "P214" "$SKILL_FILE"
263
+ [ "$status" -eq 0 ]
264
+ }
265
+
266
+ @test "P214: SKILL.md HALT branch distinguishes the transient-API-error class from the P261 stream-timeout SALVAGE class" {
267
+ # The two is_error:true branches (SALVAGE vs HALT) must be cross-referenced
268
+ # so adopters reading either branch see the other.
269
+ run grep -niE "P261.{0,200}(transient|HALT|overload|class)|transient.{0,200}P261|salvage.{0,200}(transient|class)" "$SKILL_FILE"
270
+ [ "$status" -eq 0 ]
271
+ }
272
+
273
+ @test "P214: ADR-032 P261 section names the is_error:true class taxonomy (SALVAGE = stream-timeout; HALT = transient-API-error)" {
274
+ # ADR-032's P261 amendment should be extended with the broader class
275
+ # taxonomy so the SKILL prose and the ADR contract stay in sync.
276
+ run grep -niE "P214|transient.?(API.?)?error|class taxonomy|(overload|rate.?limit|auth.?expired).{0,80}HALT|HALT.{0,80}(overload|rate.?limit|auth.?expired)" "$ADR_FILE"
277
+ [ "$status" -eq 0 ]
278
+ }
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env bats
2
+ # P211 — work-problems Step 5 iteration-prompt-body must EXPLICITLY re-ground
3
+ # each iter's dispatch prompt against the CURRENT ticket only. The orchestrator
4
+ # MUST NOT inline the ticket's `## Fix Strategy` text verbatim, and MUST NOT
5
+ # leak prior-iter content (prior ticket ID, prior Fix Strategy text, prior
6
+ # outcome reason, prior commit SHA, prior retro findings) across iterations.
7
+ #
8
+ # Reported as inbound from downstream consumer bbstats (their P194) on
9
+ # 2026-05-15; covered by ADR-076 Origin field tier.
10
+ #
11
+ # Behavioural mechanism for the bug: AFK iter subprocesses inherit a stale
12
+ # design-rationale frame and may attempt fixes anchored on the wrong ticket's
13
+ # intent. Workaround the ticket names: user-in-the-loop verification after
14
+ # each iter, reading the subprocess's commit and checking whether it cites
15
+ # the correct ticket's design rationale — a manual-policing burden the AFK
16
+ # loop is meant to eliminate. JTBD-006 (Progress the Backlog While I'm Away)
17
+ # is load-bearing: the audit trail and trust in the AFK loop degrade if iters
18
+ # work the wrong ticket's design rationale.
19
+ #
20
+ # tdd-review: structural-permitted (justification: SKILL.md is the named
21
+ # contract document under ADR-052; behavioural alternative would require a
22
+ # synthetic `claude -p` iter dispatch harness that simulates multiple
23
+ # sequential iters and asserts no cross-iter prompt-body content leakage —
24
+ # that harness sits outside the skill layer and depends on the Anthropic CLI
25
+ # binary. Same Permitted Exception precedent as
26
+ # `work-problems-step-5-iter-changeset-required.bats:14-21`,
27
+ # `work-problems-step-5-delegation.bats:99-105`, and the P083 / P086 / P089
28
+ # ScheduleWakeup / retro / stdin-redirect fixtures in the same directory.
29
+ # P012 is the harness-gap ticket).
30
+ #
31
+ # @problem P211
32
+ # @problem P012
33
+ # @jtbd JTBD-006
34
+ # @jtbd JTBD-001
35
+ #
36
+ # Cross-reference:
37
+ # P211 — this ticket (orchestrator carries prior-ticket Fix Strategy into
38
+ # next iter's dispatch context — pollutes the new iter's framing)
39
+ # bbstats#194 — inbound report from downstream consumer
40
+ # ADR-014 (single-commit grain — fix lands as one coherent commit)
41
+ # ADR-032 (governance skill invocation patterns — AFK iteration-isolation
42
+ # wrapper; re-grounding is a clarification of that isolation intent)
43
+ # ADR-052 (behavioural tests default; structural-permitted with comment)
44
+ # ADR-076 (inbound-reported problems rank ahead via sort tier — Origin
45
+ # field stamping)
46
+ # JTBD-006 (Progress the Backlog While I'm Away) — load-bearing
47
+
48
+ setup() {
49
+ SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
50
+ SKILL_FILE="${SKILL_DIR}/SKILL.md"
51
+ }
52
+
53
+ @test "SKILL.md cites P211 (re-grounding driver) in Related section" {
54
+ # Self-documenting contract — a future contributor weakening the
55
+ # re-grounding constraint reads P211 and understands why it exists.
56
+ run grep -nE 'P211' "$SKILL_FILE"
57
+ [ "$status" -eq 0 ]
58
+ }
59
+
60
+ @test "SKILL.md Step 5 iteration prompt body names re-grounding per iter explicitly" {
61
+ # The "self-contained" opener at line 510 is the existing weaker form; the
62
+ # stricter "re-ground per iter" phrasing names the construction invariant
63
+ # the orchestrator MUST satisfy on each iter dispatch. P211's bug shape is
64
+ # exactly the case where "self-contained" was read as a subprocess-side
65
+ # property only, with the orchestrator-side construction leaking prior-iter
66
+ # content into the new iter's prompt body.
67
+ run grep -niE "re.?ground.{0,40}per iter|re.?grounded.{0,40}per iter|per.?iter.{0,40}re.?ground" "$SKILL_FILE"
68
+ [ "$status" -eq 0 ]
69
+ }
70
+
71
+ @test "SKILL.md Step 5 iter prompt body forbids inlining Fix Strategy verbatim" {
72
+ # The bug shape: orchestrator reads target ticket's `## Fix Strategy` and
73
+ # cites it verbatim into the iteration subprocess's prompt body. The
74
+ # SKILL.md MUST explicitly forbid this so future contributors understand
75
+ # the subprocess reads Fix Strategy from disk via manage-problem inside
76
+ # its own context.
77
+ run grep -niE "(not|never|MUST NOT|does not).{0,40}inline.{0,40}Fix Strategy|Fix Strategy.{0,40}(not|never|MUST NOT|does not).{0,40}inline|do not.{0,40}cite.{0,40}Fix Strategy.{0,40}verbatim|Fix Strategy.{0,40}verbatim.{0,40}(not|never|forbid)" "$SKILL_FILE"
78
+ [ "$status" -eq 0 ]
79
+ }
80
+
81
+ @test "SKILL.md Step 5 iter prompt body explicitly forbids prior-iter content leakage" {
82
+ # The cross-iter leakage class names: prior ticket ID, prior Fix Strategy
83
+ # text, prior outcome reason, prior commit SHA, prior retro findings. The
84
+ # SKILL.md MUST name the no-leakage invariant explicitly so the orchestrator
85
+ # main turn's prompt construction is constrained on every iter.
86
+ run grep -niE "(no prior|not.{0,20}prior|prior.?iter.{0,40}(leak|carry|inherit)|leak.{0,40}prior|carry.{0,40}prior.{0,40}iter)" "$SKILL_FILE"
87
+ [ "$status" -eq 0 ]
88
+ }
89
+
90
+ @test "SKILL.md Step 5 names template-driven reset-per-iter construction" {
91
+ # The construction shape: template-driven, reset per iter, no global
92
+ # accumulator across iters. This is the structural invariant the
93
+ # orchestrator main turn must satisfy when building each iter's prompt.
94
+ run grep -niE "template.?driven|reset per iter|reset.{0,20}per.{0,20}iter|no.{0,20}(global )?accumulator" "$SKILL_FILE"
95
+ [ "$status" -eq 0 ]
96
+ }
97
+
98
+ @test "SKILL.md Step 5 iteration prompt body cites P211 inline" {
99
+ # The re-grounding clause must cite P211 inline so the contract document
100
+ # is self-documenting — a future contributor removing the clause reads the
101
+ # P211 reference and understands why it exists before deleting it. Same
102
+ # pattern as the P083 / P086 / P146 / P232 inline citations in the same
103
+ # block.
104
+ run grep -nE "re.?ground.{0,200}P211|P211.{0,200}re.?ground|P211.{0,200}Fix Strategy|Fix Strategy.{0,200}P211" "$SKILL_FILE"
105
+ [ "$status" -eq 0 ]
106
+ }
107
+
108
+ @test "SKILL.md re-grounding clause sits inside Step 5 iteration prompt body section" {
109
+ # Structural locality: the re-grounding clause must live INSIDE Step 5's
110
+ # Iteration prompt body section (after the "self-contained" opener at
111
+ # line 510), not free-floating elsewhere in SKILL.md. Locality matters
112
+ # because the rule is read alongside the rest of the prompt-body contract,
113
+ # and a future contributor refactoring Step 5 must encounter it inline.
114
+ # Assertion shape: the line containing "re-ground" sits after the line
115
+ # containing "Iteration prompt body" and before the line containing
116
+ # "Return-summary contract".
117
+ iter_line=$(grep -nE '^\*\*Iteration prompt body' "$SKILL_FILE" | head -1 | cut -d: -f1)
118
+ # Tightened regex: require the literal hyphenated form "re-ground" /
119
+ # "re-grounded" / "re-grounding" so partial-substring matches like
120
+ # "foreground" (line 33) don't satisfy the assertion.
121
+ reground_line=$(grep -niE "re-ground(ed|ing)?" "$SKILL_FILE" | head -1 | cut -d: -f1)
122
+ return_summary_line=$(grep -nE '^\*\*Return-summary contract' "$SKILL_FILE" | head -1 | cut -d: -f1)
123
+ [ -n "$iter_line" ]
124
+ [ -n "$reground_line" ]
125
+ [ -n "$return_summary_line" ]
126
+ [ "$reground_line" -gt "$iter_line" ]
127
+ [ "$reground_line" -lt "$return_summary_line" ]
128
+ }