@windyroad/itil 0.47.12 → 0.47.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/bin/wr-itil-check-outbound-responses-staleness +51 -0
- package/bin/wr-itil-enumerate-postrelease-kv-candidates +51 -0
- package/lib/check-outbound-responses-staleness.sh +93 -0
- package/lib/enumerate-postrelease-kv-candidates.sh +106 -0
- package/package.json +1 -1
- package/scripts/run-check-outbound-responses-staleness.sh +21 -0
- package/scripts/run-enumerate-postrelease-kv-candidates.sh +29 -0
- package/skills/check-upstream-responses/SKILL.md +5 -2
- package/skills/manage-problem/SKILL.md +5 -5
- package/skills/review-problems/SKILL.md +28 -4
- package/skills/review-problems/test/jtbd-301-verdict-shape-contract.bats +225 -0
- package/skills/transition-problem/SKILL.md +1 -1
- package/skills/work-problems/SKILL.md +121 -20
- package/skills/work-problems/test/work-problems-step-0d-outbound-responses-staleness-behavioural.bats +174 -0
- package/skills/work-problems/test/work-problems-step-5-is-error-transient-halt.bats +278 -0
- package/skills/work-problems/test/work-problems-step-5-prompt-body-re-grounding.bats +128 -0
- package/skills/work-problems/test/work-problems-step-6-5-postrelease-kv-callback.bats +209 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
# Step 0d behavioural fixture per P220 + ADR-062 § JTBD-006 driver:
|
|
4
|
+
# work-problems pre-flights /wr-itil:check-upstream-responses when the
|
|
5
|
+
# outbound-responses cache is stale or missing AND there exist local
|
|
6
|
+
# tickets carrying `## Reported Upstream` back-link sections. The
|
|
7
|
+
# staleness decision lives in
|
|
8
|
+
# `packages/itil/lib/check-outbound-responses-staleness.sh::should_promote_outbound_responses_preflight`
|
|
9
|
+
# so the SKILL.md Step 0d prose is a thin source-and-call wrapper
|
|
10
|
+
# around a behaviorally-testable shell function (P081 / user feedback:
|
|
11
|
+
# prefer behavioural over structural-grep tests).
|
|
12
|
+
#
|
|
13
|
+
# Cases covered (symmetric to Step 0b cases plus the back-link discovery
|
|
14
|
+
# axis that replaces channels-config):
|
|
15
|
+
# 1. No tickets with `## Reported Upstream` section → "no-back-link-tickets"
|
|
16
|
+
# (downstream-adopter non-obligation; analogue to no-channels-config).
|
|
17
|
+
# 2. Back-link ticket present, cache absent → "first-run-cache-absent".
|
|
18
|
+
# 3. Back-link ticket present, cache present, last_checked null → "first-run-last-checked-null".
|
|
19
|
+
# 4. Back-link ticket present, cache fresh within TTL → "fresh-within-ttl".
|
|
20
|
+
# 5. Back-link ticket present, cache older than TTL → "ttl-expiry" (with age + ttl in the reason).
|
|
21
|
+
# 6. Custom ttl_seconds in cache honored (not hardcoded default).
|
|
22
|
+
# 7. Missing ttl_seconds field defaults to 86400 (24h symmetric with inbound).
|
|
23
|
+
|
|
24
|
+
setup() {
|
|
25
|
+
REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../../.." && pwd)"
|
|
26
|
+
HELPER="$REPO_ROOT/packages/itil/lib/check-outbound-responses-staleness.sh"
|
|
27
|
+
|
|
28
|
+
FIXTURE="$(mktemp -d)"
|
|
29
|
+
mkdir -p "$FIXTURE/docs/problems"
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
teardown() {
|
|
33
|
+
rm -rf "$FIXTURE"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Helper: write a back-link ticket fixture under docs/problems/.
|
|
37
|
+
_write_backlink_ticket() {
|
|
38
|
+
local ticket_path="$1"
|
|
39
|
+
cat > "$ticket_path" <<'EOF'
|
|
40
|
+
# Problem 999: example back-link fixture
|
|
41
|
+
|
|
42
|
+
**Status**: Open
|
|
43
|
+
|
|
44
|
+
## Description
|
|
45
|
+
|
|
46
|
+
Fixture for Step 0d behavioural test.
|
|
47
|
+
|
|
48
|
+
## Reported Upstream
|
|
49
|
+
|
|
50
|
+
- **Repo**: example/upstream
|
|
51
|
+
- **URL**: https://github.com/example/upstream/issues/999
|
|
52
|
+
- **Filed**: 2026-06-08
|
|
53
|
+
EOF
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
@test "helper exists at the contracted path" {
|
|
57
|
+
[ -f "$HELPER" ]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@test "case 1: no back-link tickets → no-back-link-tickets" {
|
|
61
|
+
# shellcheck disable=SC1090
|
|
62
|
+
source "$HELPER"
|
|
63
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
64
|
+
[ "$status" -eq 0 ]
|
|
65
|
+
[ "$output" = "no-back-link-tickets" ]
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
@test "case 1b: tickets without ## Reported Upstream section → no-back-link-tickets" {
|
|
69
|
+
cat > "$FIXTURE/docs/problems/100-no-back-link.open.md" <<'EOF'
|
|
70
|
+
# Problem 100: no upstream link
|
|
71
|
+
|
|
72
|
+
## Description
|
|
73
|
+
|
|
74
|
+
Local-only ticket.
|
|
75
|
+
EOF
|
|
76
|
+
# shellcheck disable=SC1090
|
|
77
|
+
source "$HELPER"
|
|
78
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
79
|
+
[ "$status" -eq 0 ]
|
|
80
|
+
[ "$output" = "no-back-link-tickets" ]
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
@test "case 2: back-link ticket present, cache absent → first-run-cache-absent" {
|
|
84
|
+
_write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
|
|
85
|
+
# shellcheck disable=SC1090
|
|
86
|
+
source "$HELPER"
|
|
87
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
88
|
+
[ "$status" -eq 0 ]
|
|
89
|
+
[ "$output" = "first-run-cache-absent" ]
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
@test "case 2b: back-link in per-state subdir layout (RFC-002) is discovered" {
|
|
93
|
+
mkdir -p "$FIXTURE/docs/problems/known-error"
|
|
94
|
+
_write_backlink_ticket "$FIXTURE/docs/problems/known-error/220-cadence-gap.md"
|
|
95
|
+
# shellcheck disable=SC1090
|
|
96
|
+
source "$HELPER"
|
|
97
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
98
|
+
[ "$status" -eq 0 ]
|
|
99
|
+
[ "$output" = "first-run-cache-absent" ]
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
@test "case 3: cache present, last_checked null → first-run-last-checked-null" {
|
|
103
|
+
_write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
|
|
104
|
+
cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<'EOF'
|
|
105
|
+
{ "last_checked": null, "tickets": {} }
|
|
106
|
+
EOF
|
|
107
|
+
# shellcheck disable=SC1090
|
|
108
|
+
source "$HELPER"
|
|
109
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
110
|
+
[ "$status" -eq 0 ]
|
|
111
|
+
[ "$output" = "first-run-last-checked-null" ]
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
@test "case 4: cache fresh within TTL → fresh-within-ttl (silent-pass)" {
|
|
115
|
+
_write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
|
|
116
|
+
# last_checked 1 hour ago — well within 24h default TTL.
|
|
117
|
+
local recent_iso
|
|
118
|
+
recent_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
|
|
119
|
+
cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
|
|
120
|
+
{ "last_checked": "$recent_iso", "tickets": {} }
|
|
121
|
+
EOF
|
|
122
|
+
# shellcheck disable=SC1090
|
|
123
|
+
source "$HELPER"
|
|
124
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
125
|
+
[ "$status" -eq 0 ]
|
|
126
|
+
[ "$output" = "fresh-within-ttl" ]
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
@test "case 5: cache older than TTL → ttl-expiry with age + ttl in the reason" {
|
|
130
|
+
_write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
|
|
131
|
+
# last_checked 2 days ago — past 24h default TTL.
|
|
132
|
+
local stale_iso
|
|
133
|
+
stale_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
|
|
134
|
+
cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
|
|
135
|
+
{ "last_checked": "$stale_iso", "tickets": {} }
|
|
136
|
+
EOF
|
|
137
|
+
# shellcheck disable=SC1090
|
|
138
|
+
source "$HELPER"
|
|
139
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
140
|
+
[ "$status" -eq 0 ]
|
|
141
|
+
# Format: "ttl-expiry age=<N>s ttl=<M>s"
|
|
142
|
+
[[ "$output" =~ ^ttl-expiry\ age=[0-9]+s\ ttl=86400s$ ]]
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
@test "case 6: custom ttl_seconds in cache is honored (not hardcoded default)" {
|
|
146
|
+
# 1-hour TTL; last_checked 90 minutes ago → stale under the custom TTL,
|
|
147
|
+
# but would be FRESH under the 86400s default. Confirms the helper reads
|
|
148
|
+
# ttl_seconds from cache rather than hardcoding 86400.
|
|
149
|
+
_write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
|
|
150
|
+
local mid_iso
|
|
151
|
+
mid_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=90)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
|
|
152
|
+
cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
|
|
153
|
+
{ "last_checked": "$mid_iso", "tickets": {}, "ttl_seconds": 3600 }
|
|
154
|
+
EOF
|
|
155
|
+
# shellcheck disable=SC1090
|
|
156
|
+
source "$HELPER"
|
|
157
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
158
|
+
[ "$status" -eq 0 ]
|
|
159
|
+
[[ "$output" =~ ^ttl-expiry\ age=[0-9]+s\ ttl=3600s$ ]]
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
@test "case 7: missing ttl_seconds defaults to 86400 (symmetric with inbound)" {
|
|
163
|
+
_write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
|
|
164
|
+
local recent_iso
|
|
165
|
+
recent_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
|
|
166
|
+
cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
|
|
167
|
+
{ "last_checked": "$recent_iso", "tickets": {} }
|
|
168
|
+
EOF
|
|
169
|
+
# shellcheck disable=SC1090
|
|
170
|
+
source "$HELPER"
|
|
171
|
+
run should_promote_outbound_responses_preflight "$FIXTURE"
|
|
172
|
+
[ "$status" -eq 0 ]
|
|
173
|
+
[ "$output" = "fresh-within-ttl" ]
|
|
174
|
+
}
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
# tdd-review: structural-permitted (justification: the doc-lint slice below
|
|
3
|
+
# asserts SKILL.md / ADR-032 prose contract — SKILL.md is the contract
|
|
4
|
+
# document per ADR-037 Permitted Exception; these guards catch prose drift
|
|
5
|
+
# away from the behavioural HALT-with-advisory contract exercised above. The
|
|
6
|
+
# load-bearing core of this fixture is behavioural per ADR-052. harness-gap P012)
|
|
7
|
+
#
|
|
8
|
+
# Behavioural test: work-problems Step 5 exit-code semantics — the
|
|
9
|
+
# is_error:true TRANSIENT-API-ERROR HALT branch (P214). When an iter
|
|
10
|
+
# subprocess returns `is_error: true` with `total_cost_usd: 0` AND no staged
|
|
11
|
+
# work in the tree (the 529 Overloaded / 429 rate-limit / 401 auth-expired
|
|
12
|
+
# shape — the API call never landed; nothing was done; metadata records the
|
|
13
|
+
# failure), the orchestrator MUST halt the loop with a class-appropriate
|
|
14
|
+
# advisory line in the final summary — NOT silently treat exit-0 as success
|
|
15
|
+
# and try to parse a missing ITERATION_SUMMARY block.
|
|
16
|
+
#
|
|
17
|
+
# This is the HALT counterpart to the existing P261 SALVAGE branch (covered
|
|
18
|
+
# by work-problems-step-5-stream-timeout-salvage.bats):
|
|
19
|
+
# - SALVAGE: is_error:true + staged work + bats green (stream-timeout class)
|
|
20
|
+
# - HALT: is_error:true + nothing staged (transient-API-error class — P214)
|
|
21
|
+
# Both branches require the orchestrator to read `is_error` BEFORE the
|
|
22
|
+
# Exit-0 → parse-ITERATION_SUMMARY path; without the explicit check-order
|
|
23
|
+
# the loop silently miscounts and may spawn further subprocesses that fail
|
|
24
|
+
# identically (the AFK-promise-breaking shape P214 reports).
|
|
25
|
+
#
|
|
26
|
+
# The fake-shim below re-creates the production 529 Overloaded shape:
|
|
27
|
+
# is_error:true, total_cost_usd:0, no staged work, .result carrying the
|
|
28
|
+
# upstream error string. The harness re-implements the orchestrator's
|
|
29
|
+
# ordered-check decision contract (faithful to SKILL.md Step 5) and asserts
|
|
30
|
+
# the HALT routing + class-appropriate advisory for each transient class.
|
|
31
|
+
#
|
|
32
|
+
# @problem P214
|
|
33
|
+
# @jtbd JTBD-006
|
|
34
|
+
#
|
|
35
|
+
# Cross-reference:
|
|
36
|
+
# P214 (work-problems Step 5 exit-code rule doesn't handle is_error:true
|
|
37
|
+
# transient API failures) — driver ticket
|
|
38
|
+
# P261 (is_error:true stream-timeout salvage carve-out) — sibling SALVAGE
|
|
39
|
+
# branch; this fixture covers the HALT counterpart
|
|
40
|
+
# ADR-032 (governance skill invocation patterns — is_error:true class
|
|
41
|
+
# taxonomy: SALVAGE = stream-timeout; HALT = transient-API-error) — the
|
|
42
|
+
# amended contract this fixture pins
|
|
43
|
+
# ADR-013 Rule 6 (AFK fail-safe — HALT routing is non-interactive; no
|
|
44
|
+
# AskUserQuestion) — invariant honoured
|
|
45
|
+
# ADR-037 / ADR-052 (skill testing strategy — behavioural default; doc-lint
|
|
46
|
+
# contract assertion is the Permitted Exception, marked above)
|
|
47
|
+
|
|
48
|
+
setup() {
|
|
49
|
+
TEST_TMP="$(mktemp -d)"
|
|
50
|
+
FAKE_BIN="${TEST_TMP}/bin"
|
|
51
|
+
mkdir -p "$FAKE_BIN"
|
|
52
|
+
|
|
53
|
+
# Fake `claude` binary simulating the transient-API-error shape: exits 0,
|
|
54
|
+
# emits an is_error:true JSON envelope with total_cost_usd:0 and the
|
|
55
|
+
# transient-class error string in `.result`. No staged work — the API call
|
|
56
|
+
# never landed; nothing was done.
|
|
57
|
+
cat > "$FAKE_BIN/claude" <<'FAKE_EOF'
|
|
58
|
+
#!/usr/bin/env bash
|
|
59
|
+
# Test fake for work-problems Step 5 P214 transient-API-error halt fixture.
|
|
60
|
+
# Emits is_error:true with total_cost_usd:0 and a class-specific .result string.
|
|
61
|
+
# FAKE_ERROR_CLASS selects the transient class: overloaded | rate-limit | auth-expired
|
|
62
|
+
case "${FAKE_ERROR_CLASS:-overloaded}" in
|
|
63
|
+
overloaded)
|
|
64
|
+
RESULT='API Error (529): Overloaded'
|
|
65
|
+
;;
|
|
66
|
+
rate-limit)
|
|
67
|
+
RESULT='API Error (429): Rate limit exceeded'
|
|
68
|
+
;;
|
|
69
|
+
auth-expired)
|
|
70
|
+
RESULT='API Error (401): Authentication expired'
|
|
71
|
+
;;
|
|
72
|
+
*)
|
|
73
|
+
RESULT='API Error: Unknown'
|
|
74
|
+
;;
|
|
75
|
+
esac
|
|
76
|
+
printf '%s\n' "{\"is_error\":true,\"result\":\"${RESULT}\",\"total_cost_usd\":0,\"duration_ms\":1500,\"usage\":{\"input_tokens\":0,\"output_tokens\":0,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0}}"
|
|
77
|
+
FAKE_EOF
|
|
78
|
+
chmod +x "$FAKE_BIN/claude"
|
|
79
|
+
export PATH="$FAKE_BIN:$PATH"
|
|
80
|
+
|
|
81
|
+
# A throwaway git repo so staged-work detection is real (and empty — no
|
|
82
|
+
# staged work is the load-bearing characteristic of this class).
|
|
83
|
+
REPO="${TEST_TMP}/repo"
|
|
84
|
+
mkdir -p "$REPO"
|
|
85
|
+
git -C "$REPO" init -q
|
|
86
|
+
git -C "$REPO" config user.email "test@example.com"
|
|
87
|
+
git -C "$REPO" config user.name "Test"
|
|
88
|
+
git -C "$REPO" commit -q --allow-empty -m "root"
|
|
89
|
+
|
|
90
|
+
SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
|
91
|
+
SKILL_FILE="${SKILL_DIR}/SKILL.md"
|
|
92
|
+
ADR_FILE="$(cd "${SKILL_DIR}/../../../.." && pwd)/docs/decisions/032-governance-skill-invocation-patterns.proposed.md"
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
teardown() {
|
|
96
|
+
if [ -n "${TEST_TMP:-}" ] && [ -d "$TEST_TMP" ]; then
|
|
97
|
+
rm -rf "$TEST_TMP"
|
|
98
|
+
fi
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Faithful re-implementation of SKILL.md Step 5's ORDERED-CHECK decision
|
|
102
|
+
# contract (P214 amendment to the P261 carve-out). The orchestrator reads
|
|
103
|
+
# (1) exit code, (2) is_error, (3) ITERATION_SUMMARY — in that order. On
|
|
104
|
+
# is_error:true + nothing staged, emit a class-appropriate advisory.
|
|
105
|
+
ordered_check_decision() {
|
|
106
|
+
local exit_code="$1"
|
|
107
|
+
local json="$2"
|
|
108
|
+
local repo="$3"
|
|
109
|
+
|
|
110
|
+
# (1) Non-zero exit → halt per the exit-code contract.
|
|
111
|
+
if [ "$exit_code" -ne 0 ]; then
|
|
112
|
+
printf 'DECISION=HALT reason=non-zero-exit\n'
|
|
113
|
+
return 0
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
# (2) Parse is_error BEFORE attempting to parse ITERATION_SUMMARY (the
|
|
117
|
+
# ordered-check rule P214 amends in).
|
|
118
|
+
local is_error result
|
|
119
|
+
is_error=$(printf '%s' "$json" | python3 -c 'import json,sys; print(str(json.load(sys.stdin).get("is_error")).lower())')
|
|
120
|
+
result=$(printf '%s' "$json" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("result",""))')
|
|
121
|
+
|
|
122
|
+
if [ "$is_error" = "true" ]; then
|
|
123
|
+
# is_error:true with staged work → defer to existing P261 SALVAGE branch
|
|
124
|
+
# (covered by sibling fixture work-problems-step-5-stream-timeout-salvage.bats).
|
|
125
|
+
local staged
|
|
126
|
+
staged=$(git -C "$repo" diff --cached --name-only)
|
|
127
|
+
if [ -n "$staged" ]; then
|
|
128
|
+
printf 'DECISION=DEFER_TO_SALVAGE_BRANCH\n'
|
|
129
|
+
return 0
|
|
130
|
+
fi
|
|
131
|
+
|
|
132
|
+
# is_error:true with NO staged work → HALT with class-appropriate advisory.
|
|
133
|
+
local advisory
|
|
134
|
+
case "$result" in
|
|
135
|
+
*"529"*|*"Overloaded"*|*"overloaded"*)
|
|
136
|
+
advisory='API overloaded; retry when service recovers'
|
|
137
|
+
;;
|
|
138
|
+
*"429"*|*"Rate limit"*|*"rate limit"*|*"rate-limit"*)
|
|
139
|
+
advisory='API rate-limited; retry when limit window resets'
|
|
140
|
+
;;
|
|
141
|
+
*"401"*|*"Authentication"*|*"auth"*)
|
|
142
|
+
advisory='API auth expired; refresh credentials before resuming'
|
|
143
|
+
;;
|
|
144
|
+
*)
|
|
145
|
+
advisory='transient API error; inspect .result and resume manually'
|
|
146
|
+
;;
|
|
147
|
+
esac
|
|
148
|
+
printf 'DECISION=HALT reason=is-error-transient advisory=%s\n' "$advisory"
|
|
149
|
+
return 0
|
|
150
|
+
fi
|
|
151
|
+
|
|
152
|
+
# (3) Exit 0 AND is_error:false → parse ITERATION_SUMMARY.
|
|
153
|
+
printf 'DECISION=PARSE_SUMMARY\n'
|
|
154
|
+
return 0
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# ---------------------------------------------------------------------------
|
|
158
|
+
# Behavioural cases (the load-bearing core per ADR-052).
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
@test "P214: is_error:true + 529 Overloaded + no staged work -> HALT with API-overloaded advisory" {
|
|
162
|
+
export FAKE_ERROR_CLASS=overloaded
|
|
163
|
+
local json
|
|
164
|
+
json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
|
|
165
|
+
run ordered_check_decision 0 "$json" "$REPO"
|
|
166
|
+
[ "$status" -eq 0 ]
|
|
167
|
+
[[ "$output" == *"DECISION=HALT"* ]]
|
|
168
|
+
[[ "$output" == *"reason=is-error-transient"* ]]
|
|
169
|
+
[[ "$output" == *"API overloaded"* ]]
|
|
170
|
+
[[ "$output" == *"retry when service recovers"* ]]
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
@test "P214: is_error:true + 429 rate-limit + no staged work -> HALT with rate-limited advisory" {
|
|
174
|
+
export FAKE_ERROR_CLASS=rate-limit
|
|
175
|
+
local json
|
|
176
|
+
json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
|
|
177
|
+
run ordered_check_decision 0 "$json" "$REPO"
|
|
178
|
+
[ "$status" -eq 0 ]
|
|
179
|
+
[[ "$output" == *"DECISION=HALT"* ]]
|
|
180
|
+
[[ "$output" == *"reason=is-error-transient"* ]]
|
|
181
|
+
[[ "$output" == *"rate-limited"* ]]
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
@test "P214: is_error:true + 401 auth-expired + no staged work -> HALT with refresh-credentials advisory" {
|
|
185
|
+
export FAKE_ERROR_CLASS=auth-expired
|
|
186
|
+
local json
|
|
187
|
+
json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
|
|
188
|
+
run ordered_check_decision 0 "$json" "$REPO"
|
|
189
|
+
[ "$status" -eq 0 ]
|
|
190
|
+
[[ "$output" == *"DECISION=HALT"* ]]
|
|
191
|
+
[[ "$output" == *"reason=is-error-transient"* ]]
|
|
192
|
+
[[ "$output" == *"auth expired"* ]]
|
|
193
|
+
[[ "$output" == *"refresh credentials"* ]]
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
@test "P214: is_error MUST be checked BEFORE ITERATION_SUMMARY parse on Exit 0 (ordered-check invariant)" {
|
|
197
|
+
# The load-bearing P214 invariant: when exit 0 AND is_error:true, the
|
|
198
|
+
# decision is HALT, NOT PARSE_SUMMARY. Without the ordered-check rule the
|
|
199
|
+
# loop would silently route to PARSE_SUMMARY and miss the failure.
|
|
200
|
+
export FAKE_ERROR_CLASS=overloaded
|
|
201
|
+
local json
|
|
202
|
+
json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
|
|
203
|
+
run ordered_check_decision 0 "$json" "$REPO"
|
|
204
|
+
[ "$status" -eq 0 ]
|
|
205
|
+
[[ "$output" != *"DECISION=PARSE_SUMMARY"* ]]
|
|
206
|
+
[[ "$output" == *"DECISION=HALT"* ]]
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
@test "P214: non-zero exit takes precedence over is_error check (HALT routing)" {
|
|
210
|
+
# Non-zero exit halts regardless of is_error value — the exit-code rule
|
|
211
|
+
# is check (1) in the ordered sequence.
|
|
212
|
+
export FAKE_ERROR_CLASS=overloaded
|
|
213
|
+
local json
|
|
214
|
+
json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
|
|
215
|
+
run ordered_check_decision 1 "$json" "$REPO"
|
|
216
|
+
[ "$status" -eq 0 ]
|
|
217
|
+
[[ "$output" == *"DECISION=HALT"* ]]
|
|
218
|
+
[[ "$output" == *"reason=non-zero-exit"* ]]
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
@test "P214: is_error:true + staged work -> defers to existing P261 SALVAGE branch (no double-handling)" {
|
|
222
|
+
# When staged work exists, the transient-API-error HALT branch must NOT
|
|
223
|
+
# fire — it MUST defer to the P261 SALVAGE branch. This guards against
|
|
224
|
+
# the new branch swallowing salvage-eligible work.
|
|
225
|
+
export FAKE_ERROR_CLASS=overloaded
|
|
226
|
+
printf 'salvageable work\n' > "$REPO/salvage-me.txt"
|
|
227
|
+
git -C "$REPO" add salvage-me.txt
|
|
228
|
+
local json
|
|
229
|
+
json=$( cd "$REPO" && claude -p --output-format json "TEST" < /dev/null )
|
|
230
|
+
run ordered_check_decision 0 "$json" "$REPO"
|
|
231
|
+
[ "$status" -eq 0 ]
|
|
232
|
+
[[ "$output" == *"DECISION=DEFER_TO_SALVAGE_BRANCH"* ]]
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
# ---------------------------------------------------------------------------
|
|
236
|
+
# Doc-lint contract assertions (Permitted Exception per ADR-037; structural
|
|
237
|
+
# slice marked at top of file per ADR-052 Surface 2). These guard the SKILL.md
|
|
238
|
+
# / ADR-032 prose against drift away from the behavioural contract above.
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
|
|
241
|
+
@test "P214: SKILL.md Step 5 documents the ORDERED check sequence (exit-code, is_error, ITERATION_SUMMARY)" {
|
|
242
|
+
# The ordered-check rule must be explicit in the prose so an implementer
|
|
243
|
+
# reading Step 5 routes is_error:true to HALT before attempting to parse
|
|
244
|
+
# a missing ITERATION_SUMMARY block.
|
|
245
|
+
run grep -niE "(check|read|parse).{0,40}is_error.{0,80}before.{0,80}(ITERATION_SUMMARY|parse|\.result)|ordered check|check.order" "$SKILL_FILE"
|
|
246
|
+
[ "$status" -eq 0 ]
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
@test "P214: SKILL.md Step 5 names the transient-API-error classes (overloaded / rate-limit / auth-expired)" {
|
|
250
|
+
# The HALT advisory must enumerate the known transient classes so the
|
|
251
|
+
# final summary carries an actionable message rather than a generic
|
|
252
|
+
# "loop halted" line.
|
|
253
|
+
run grep -niE "529|Overloaded|overload" "$SKILL_FILE"
|
|
254
|
+
[ "$status" -eq 0 ]
|
|
255
|
+
run grep -niE "429|rate.?limit" "$SKILL_FILE"
|
|
256
|
+
[ "$status" -eq 0 ]
|
|
257
|
+
run grep -niE "401|auth.?expired|auth.*expir" "$SKILL_FILE"
|
|
258
|
+
[ "$status" -eq 0 ]
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
@test "P214: SKILL.md Step 5 cites P214 as the driver of the transient-API-error HALT branch" {
|
|
262
|
+
run grep -nE "P214" "$SKILL_FILE"
|
|
263
|
+
[ "$status" -eq 0 ]
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
@test "P214: SKILL.md HALT branch distinguishes the transient-API-error class from the P261 stream-timeout SALVAGE class" {
|
|
267
|
+
# The two is_error:true branches (SALVAGE vs HALT) must be cross-referenced
|
|
268
|
+
# so adopters reading either branch see the other.
|
|
269
|
+
run grep -niE "P261.{0,200}(transient|HALT|overload|class)|transient.{0,200}P261|salvage.{0,200}(transient|class)" "$SKILL_FILE"
|
|
270
|
+
[ "$status" -eq 0 ]
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
@test "P214: ADR-032 P261 section names the is_error:true class taxonomy (SALVAGE = stream-timeout; HALT = transient-API-error)" {
|
|
274
|
+
# ADR-032's P261 amendment should be extended with the broader class
|
|
275
|
+
# taxonomy so the SKILL prose and the ADR contract stay in sync.
|
|
276
|
+
run grep -niE "P214|transient.?(API.?)?error|class taxonomy|(overload|rate.?limit|auth.?expired).{0,80}HALT|HALT.{0,80}(overload|rate.?limit|auth.?expired)" "$ADR_FILE"
|
|
277
|
+
[ "$status" -eq 0 ]
|
|
278
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
# P211 — work-problems Step 5 iteration-prompt-body must EXPLICITLY re-ground
|
|
3
|
+
# each iter's dispatch prompt against the CURRENT ticket only. The orchestrator
|
|
4
|
+
# MUST NOT inline the ticket's `## Fix Strategy` text verbatim, and MUST NOT
|
|
5
|
+
# leak prior-iter content (prior ticket ID, prior Fix Strategy text, prior
|
|
6
|
+
# outcome reason, prior commit SHA, prior retro findings) across iterations.
|
|
7
|
+
#
|
|
8
|
+
# Reported as inbound from downstream consumer bbstats (their P194) on
|
|
9
|
+
# 2026-05-15; covered by ADR-076 Origin field tier.
|
|
10
|
+
#
|
|
11
|
+
# Behavioural mechanism for the bug: AFK iter subprocesses inherit a stale
|
|
12
|
+
# design-rationale frame and may attempt fixes anchored on the wrong ticket's
|
|
13
|
+
# intent. Workaround the ticket names: user-in-the-loop verification after
|
|
14
|
+
# each iter, reading the subprocess's commit and checking whether it cites
|
|
15
|
+
# the correct ticket's design rationale — a manual-policing burden the AFK
|
|
16
|
+
# loop is meant to eliminate. JTBD-006 (Progress the Backlog While I'm Away)
|
|
17
|
+
# is load-bearing: the audit trail and trust in the AFK loop degrade if iters
|
|
18
|
+
# work the wrong ticket's design rationale.
|
|
19
|
+
#
|
|
20
|
+
# tdd-review: structural-permitted (justification: SKILL.md is the named
|
|
21
|
+
# contract document under ADR-052; behavioural alternative would require a
|
|
22
|
+
# synthetic `claude -p` iter dispatch harness that simulates multiple
|
|
23
|
+
# sequential iters and asserts no cross-iter prompt-body content leakage —
|
|
24
|
+
# that harness sits outside the skill layer and depends on the Anthropic CLI
|
|
25
|
+
# binary. Same Permitted Exception precedent as
|
|
26
|
+
# `work-problems-step-5-iter-changeset-required.bats:14-21`,
|
|
27
|
+
# `work-problems-step-5-delegation.bats:99-105`, and the P083 / P086 / P089
|
|
28
|
+
# ScheduleWakeup / retro / stdin-redirect fixtures in the same directory.
|
|
29
|
+
# P012 is the harness-gap ticket).
|
|
30
|
+
#
|
|
31
|
+
# @problem P211
|
|
32
|
+
# @problem P012
|
|
33
|
+
# @jtbd JTBD-006
|
|
34
|
+
# @jtbd JTBD-001
|
|
35
|
+
#
|
|
36
|
+
# Cross-reference:
|
|
37
|
+
# P211 — this ticket (orchestrator carries prior-ticket Fix Strategy into
|
|
38
|
+
# next iter's dispatch context — pollutes the new iter's framing)
|
|
39
|
+
# bbstats#194 — inbound report from downstream consumer
|
|
40
|
+
# ADR-014 (single-commit grain — fix lands as one coherent commit)
|
|
41
|
+
# ADR-032 (governance skill invocation patterns — AFK iteration-isolation
|
|
42
|
+
# wrapper; re-grounding is a clarification of that isolation intent)
|
|
43
|
+
# ADR-052 (behavioural tests default; structural-permitted with comment)
|
|
44
|
+
# ADR-076 (inbound-reported problems rank ahead via sort tier — Origin
|
|
45
|
+
# field stamping)
|
|
46
|
+
# JTBD-006 (Progress the Backlog While I'm Away) — load-bearing
|
|
47
|
+
|
|
48
|
+
setup() {
|
|
49
|
+
SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
|
50
|
+
SKILL_FILE="${SKILL_DIR}/SKILL.md"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
@test "SKILL.md cites P211 (re-grounding driver) in Related section" {
|
|
54
|
+
# Self-documenting contract — a future contributor weakening the
|
|
55
|
+
# re-grounding constraint reads P211 and understands why it exists.
|
|
56
|
+
run grep -nE 'P211' "$SKILL_FILE"
|
|
57
|
+
[ "$status" -eq 0 ]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@test "SKILL.md Step 5 iteration prompt body names re-grounding per iter explicitly" {
|
|
61
|
+
# The "self-contained" opener at line 510 is the existing weaker form; the
|
|
62
|
+
# stricter "re-ground per iter" phrasing names the construction invariant
|
|
63
|
+
# the orchestrator MUST satisfy on each iter dispatch. P211's bug shape is
|
|
64
|
+
# exactly the case where "self-contained" was read as a subprocess-side
|
|
65
|
+
# property only, with the orchestrator-side construction leaking prior-iter
|
|
66
|
+
# content into the new iter's prompt body.
|
|
67
|
+
run grep -niE "re.?ground.{0,40}per iter|re.?grounded.{0,40}per iter|per.?iter.{0,40}re.?ground" "$SKILL_FILE"
|
|
68
|
+
[ "$status" -eq 0 ]
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
@test "SKILL.md Step 5 iter prompt body forbids inlining Fix Strategy verbatim" {
|
|
72
|
+
# The bug shape: orchestrator reads target ticket's `## Fix Strategy` and
|
|
73
|
+
# cites it verbatim into the iteration subprocess's prompt body. The
|
|
74
|
+
# SKILL.md MUST explicitly forbid this so future contributors understand
|
|
75
|
+
# the subprocess reads Fix Strategy from disk via manage-problem inside
|
|
76
|
+
# its own context.
|
|
77
|
+
run grep -niE "(not|never|MUST NOT|does not).{0,40}inline.{0,40}Fix Strategy|Fix Strategy.{0,40}(not|never|MUST NOT|does not).{0,40}inline|do not.{0,40}cite.{0,40}Fix Strategy.{0,40}verbatim|Fix Strategy.{0,40}verbatim.{0,40}(not|never|forbid)" "$SKILL_FILE"
|
|
78
|
+
[ "$status" -eq 0 ]
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
@test "SKILL.md Step 5 iter prompt body explicitly forbids prior-iter content leakage" {
|
|
82
|
+
# The cross-iter leakage class names: prior ticket ID, prior Fix Strategy
|
|
83
|
+
# text, prior outcome reason, prior commit SHA, prior retro findings. The
|
|
84
|
+
# SKILL.md MUST name the no-leakage invariant explicitly so the orchestrator
|
|
85
|
+
# main turn's prompt construction is constrained on every iter.
|
|
86
|
+
run grep -niE "(no prior|not.{0,20}prior|prior.?iter.{0,40}(leak|carry|inherit)|leak.{0,40}prior|carry.{0,40}prior.{0,40}iter)" "$SKILL_FILE"
|
|
87
|
+
[ "$status" -eq 0 ]
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
@test "SKILL.md Step 5 names template-driven reset-per-iter construction" {
|
|
91
|
+
# The construction shape: template-driven, reset per iter, no global
|
|
92
|
+
# accumulator across iters. This is the structural invariant the
|
|
93
|
+
# orchestrator main turn must satisfy when building each iter's prompt.
|
|
94
|
+
run grep -niE "template.?driven|reset per iter|reset.{0,20}per.{0,20}iter|no.{0,20}(global )?accumulator" "$SKILL_FILE"
|
|
95
|
+
[ "$status" -eq 0 ]
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
@test "SKILL.md Step 5 iteration prompt body cites P211 inline" {
|
|
99
|
+
# The re-grounding clause must cite P211 inline so the contract document
|
|
100
|
+
# is self-documenting — a future contributor removing the clause reads the
|
|
101
|
+
# P211 reference and understands why it exists before deleting it. Same
|
|
102
|
+
# pattern as the P083 / P086 / P146 / P232 inline citations in the same
|
|
103
|
+
# block.
|
|
104
|
+
run grep -nE "re.?ground.{0,200}P211|P211.{0,200}re.?ground|P211.{0,200}Fix Strategy|Fix Strategy.{0,200}P211" "$SKILL_FILE"
|
|
105
|
+
[ "$status" -eq 0 ]
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
@test "SKILL.md re-grounding clause sits inside Step 5 iteration prompt body section" {
|
|
109
|
+
# Structural locality: the re-grounding clause must live INSIDE Step 5's
|
|
110
|
+
# Iteration prompt body section (after the "self-contained" opener at
|
|
111
|
+
# line 510), not free-floating elsewhere in SKILL.md. Locality matters
|
|
112
|
+
# because the rule is read alongside the rest of the prompt-body contract,
|
|
113
|
+
# and a future contributor refactoring Step 5 must encounter it inline.
|
|
114
|
+
# Assertion shape: the line containing "re-ground" sits after the line
|
|
115
|
+
# containing "Iteration prompt body" and before the line containing
|
|
116
|
+
# "Return-summary contract".
|
|
117
|
+
iter_line=$(grep -nE '^\*\*Iteration prompt body' "$SKILL_FILE" | head -1 | cut -d: -f1)
|
|
118
|
+
# Tightened regex: require the literal hyphenated form "re-ground" /
|
|
119
|
+
# "re-grounded" / "re-grounding" so partial-substring matches like
|
|
120
|
+
# "foreground" (line 33) don't satisfy the assertion.
|
|
121
|
+
reground_line=$(grep -niE "re-ground(ed|ing)?" "$SKILL_FILE" | head -1 | cut -d: -f1)
|
|
122
|
+
return_summary_line=$(grep -nE '^\*\*Return-summary contract' "$SKILL_FILE" | head -1 | cut -d: -f1)
|
|
123
|
+
[ -n "$iter_line" ]
|
|
124
|
+
[ -n "$reground_line" ]
|
|
125
|
+
[ -n "$return_summary_line" ]
|
|
126
|
+
[ "$reground_line" -gt "$iter_line" ]
|
|
127
|
+
[ "$reground_line" -lt "$return_summary_line" ]
|
|
128
|
+
}
|