npm - @windyroad/itil - Versions diffs - 0.47.12-preview.598 → 0.47.12-preview.617 - Mend

@windyroad/itil 0.47.12-preview.598 → 0.47.12-preview.617

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/skills/review-problems/test/jtbd-301-verdict-shape-contract.bats ADDED Viewed

@@ -0,0 +1,225 @@
+#!/usr/bin/env bats
+# Contract assertions for /wr-itil:review-problems Step 4.5d + Step 4.5e
+# verdict-shape ack-comment templates (P229 / JTBD-301).
+#
+# Structural assertions — Permitted Exception to the source-grep ban
+# per ADR-005 / P011 / ADR-037 / ADR-052 § Surface 2. SKILL.md prose
+# governs LLM-driven runtime behaviour; behavioural-replay testing
+# requires a synthetic agent harness (P012 master ticket; P176 follow-up
+# for the SKILL.md surface; P324 review-problems agent-prose harness gap).
+# Until that harness lands, contract bats assert the load-bearing
+# template elements are present so future edits don't silently strip
+# the JTBD-301 verdict vocabulary and re-introduce framework-vocab leakage.
+#
+# @problem P229
+# @problem P012 (master harness ticket — justification for structural exception)
+# @problem P176 (SKILL.md surface follow-up)
+# @problem P324 (review-problems agent-prose harness gap)
+# @jtbd JTBD-301 (verdict-shape acknowledgement contract — non-negotiable)
+# @adr ADR-024 (report-upstream contract — symmetry mirror)
+# @adr ADR-036 (downstream-scaffold contract — adopter inheritance)
+# @adr ADR-052 (behavioural-tests default + Permitted Exception)
+# @adr ADR-062 (inbound-discovery + assessment pipeline)
+setup() {
+  SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
+  SKILL_FILE="${SKILL_DIR}/SKILL.md"
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# Verdict-shape contract subsection exists at the head of Step 4.5e
+# ──────────────────────────────────────────────────────────────────────────────
+@test "4.5e-comment-shape subsection exists (briefs JTBD-301 contract upstream of branch templates)" {
+  run grep -nE '^#### 4\.5e-comment-shape' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "verdict-shape subsection names all four JTBD-301 verdict words verbatim" {
+  # JTBD-301 Desired Outcome row 6 names exactly four verdicts.
+  # The subsection MUST name all four so a reader sees the vocabulary
+  # before reading the per-branch templates that implement it.
+  run grep -nE 'fix released' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+  run grep -nE 'won.t-fix|won\\.t fix' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+  run grep -nE 'duplicate' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+  run grep -nE 'parked' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "verdict-shape subsection explicitly forbids framework-vocab leakage in ack-comment bodies" {
+  # Load-bearing rule: maintainer-internal jargon (Step IDs, branch
+  # names, classification tokens) MUST NOT appear in reporter-facing
+  # comment bodies. Audit-log at 4.5f keeps the tokens.
+  run grep -inE 'framework.vocab|maintainer.internal|reporter.facing' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "verdict-shape subsection cites the report-upstream symmetry (ADR-024 / ADR-036)" {
+  # JTBD-301 line 23 + ADR-024 / ADR-036 establish the inbound/outbound
+  # symmetry: outbound `/wr-itil:report-upstream` posts structured
+  # human-language; inbound ack mirrors that shape.
+  run grep -nE 'symmetry|mirror' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "verdict-shape subsection documents the C2 gate-substitution caveat (architect condition C2)" {
+  # The external-comms gate (ADR-028) fires on the SUBSTITUTED body,
+  # not the template — template authors must ensure no maintainer
+  # jargon leaks via P<NNN> title substitution or <reason> expansion.
+  run grep -inE 'substituted body|gate.fires.on.the.*substituted|template.author' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# Step 4.5d — matched-local-ticket cross-reference uses verdict-shape
+# (duplicate verdict)
+# ──────────────────────────────────────────────────────────────────────────────
+@test "Step 4.5d cross-reference comment uses 'duplicate' verdict language" {
+  # Replaces the bureaucratic "Tracked locally as docs/problems/..." boilerplate
+  # with verdict-shape "we're tracking this as a duplicate of P<NNN>".
+  run grep -inE 'duplicate of P.NNN.|tracking.*duplicate' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "Step 4.5d cross-reference comment template is documented inline (not just referenced)" {
+  # SKILL prose must carry the actual template body so a single-pass
+  # reader sees the shape, not just a cross-reference to JTBD-301.
+  run grep -nE '4\.5d.*[Cc]omment template|matched-local-ticket.*template' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# Step 4.5e Step 4 — above-threshold-pushback verdict template (won't-fix)
+# ──────────────────────────────────────────────────────────────────────────────
+@test "Step 4.5e Step 4 pushback template uses 'we don't plan to fix this' verdict language" {
+  run grep -inE "we don.t plan to fix|don.t plan to fix this" "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# Step 4.5e Step 5 — clear-malicious verdict template (policy-violation close)
+# Architect condition C4: name this as fifth implicit verdict, not conflated
+# with won't-fix
+# ──────────────────────────────────────────────────────────────────────────────
+@test "Step 4.5e Step 5 clear-malicious template uses 'we're closing this report' verdict language" {
+  run grep -inE "we.re closing this|closing this report" "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "verdict-shape subsection names clear-malicious as fifth implicit verdict (architect C4)" {
+  # The four documented JTBD-301 verdicts are fix-released / parked /
+  # duplicate / won't-fix. clear-malicious is a stronger close
+  # (policy-violation) — name it precisely in the subsection prose
+  # rather than conflating with won't-fix.
+  run grep -inE 'policy.violation close|fifth.*verdict|implicit.*verdict' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "clear-malicious classification gloss is plain-language (JTBD non-blocking advisory)" {
+  # JTBD advisory: <classification> in the clear-malicious template
+  # MUST be a plain-language gloss, NOT the raw wr-risk-scorer verdict
+  # token. SKILL prose must specify this so the reporter sees
+  # human language, not "out-of-scope-for-documented-personas".
+  run grep -inE 'plain.language gloss|plain-language.*classification' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# Step 4.5e Step 6 — safe-and-valid verdict template (accepted-into-backlog)
+# Architect condition C3: name as "accepted into backlog", not fix-released
+# ──────────────────────────────────────────────────────────────────────────────
+@test "Step 4.5e Step 6 safe-and-valid template uses 'we're tracking this as a real bug' verdict language" {
+  run grep -inE "tracking this as a real|tracking.*real bug" "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "Step 4.5e Step 6 template does NOT include framework vocab 'safe-and-valid branch' in comment body" {
+  # The 31-comment leak: "classified via /wr-itil:review-problems Step
+  # 4.5e safe-and-valid branch with safe-low-fix-risk" appeared in
+  # comment bodies. The new template prose must show the reporter-facing
+  # body and not include the framework-vocab phrasing inside a comment-body block.
+  # Note: the steps section header itself names "Safe-and-valid branch" —
+  # that's the maintainer prose and fine. This test checks the COMMENT-BODY
+  # template (which lives in a fenced code block under Step 6) does not include
+  # the leak phrase "safe-low-fix-risk".
+  run grep -nE 'safe-low-fix-risk' "$SKILL_FILE"
+  # Token may appear in maintainer prose / classifier docs — but MUST NOT
+  # appear inside a fenced comment-body template block. We assert the token
+  # appears AT MOST in step-3 dual-axis-risk-classifier prose and the audit-log
+  # surface; not in a quoted comment template body. We use a structural proxy:
+  # the new 4.5e-comment-shape subsection MUST explicitly call out that the
+  # token belongs in maintainer-side audit-log only, not in the user-facing comment.
+  run grep -inE "safe-low-fix-risk.*audit.log only|safe.low.fix.risk.*maintainer.side" "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "verdict-shape subsection names Step 6 verdict as 'accepted into backlog' (architect C3)" {
+  # JTBD-301 'fix released' is the post-release verdict — the Step 6
+  # ack fires at accept-into-backlog time. Name the verdict precisely
+  # in the subsection prose.
+  run grep -inE 'accepted into backlog|accept.into.backlog' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "Step 4.5e Step 6 template references release-notes / status surface for future updates" {
+  # JTBD-301 desired outcome: reporter knows where to watch for updates.
+  # The template must point to a stable surface (release notes / status
+  # page / linked issue) so the reporter has actionable expectation.
+  run grep -inE 'release notes|release-notes|watch this issue' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# Architect condition A1 — gate-denial sub-branches preserved across all four
+# verdict-shape templates (no silent-skip regression)
+# ──────────────────────────────────────────────────────────────────────────────
+@test "All four ack-comment branches preserve gate-denial sub-branches (architect A1)" {
+  # Step 4 (pushback), Step 5 (clear-malicious), Step 6 (safe-and-valid)
+  # already have gate-denial sub-branches in the current SKILL. The
+  # P229 fix preserves them; this assertion catches a regression where
+  # a template rewrite accidentally drops the sub-branch.
+  run grep -nE 'gate-denied-pushback' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+  run grep -nE 'gate-denied-clear-malicious' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+  run grep -nE 'gate-denied-safe-and-valid' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# Architect condition A2 — audit-log preserves classification tokens verbatim
+# ──────────────────────────────────────────────────────────────────────────────
+@test "Audit-log surface at 4.5f preserves classification tokens (architect A2)" {
+  # The user-side template change strips framework vocab from comment
+  # bodies; the audit-log surface (4.5f) MUST continue receiving the
+  # raw classification tokens for replay determinism per ADR-062.
+  run grep -nE '4\.5f.*[Aa]udit-log' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+  run grep -nE 'safe-and-valid-local-ticket-created|above-threshold-pushback|clear-malicious-closed' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+# ──────────────────────────────────────────────────────────────────────────────
+# JTBD-301 vocabulary visibility — four-verdict words appear in branch templates
+# ──────────────────────────────────────────────────────────────────────────────
+@test "P229 root-cause ticket cross-referenced from verdict-shape subsection" {
+  # Audit-trail grounding per ADR-026: the SKILL prose change cites
+  # the originating ticket so the rationale stays discoverable.
+  run grep -nE 'P229' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}
+@test "verdict-shape subsection cites JTBD-301 by ID" {
+  run grep -nE 'JTBD-301' "$SKILL_FILE"
+  [ "$status" -eq 0 ]
+}

package/skills/work-problems/SKILL.md CHANGED Viewed

@@ -236,7 +236,50 @@ The annotation pre-empts the "surprise heavy iter" perception JTBD-006 expects a
 <!-- @jtbd JTBD-006 (Progress the Backlog While I'm Away — AFK orchestrator pre-flights review-problems so iters dispatch against fresh WSJF rankings) -->
-After Step 0c completes (whether dispatched or silent-passed), proceed to Step 1.
+After Step 0c completes (whether dispatched or silent-passed), proceed to Step 0d.
+### Step 0d: Outbound upstream-responses pre-flight (per JTBD-006 AFK driver + JTBD-004 cross-repo coordination)
+After Step 0c's deferred-placeholder pre-flight and before Step 1's backlog scan, check whether the outbound-responses cache is fresh. P249 Phase 1 shipped `/wr-itil:check-upstream-responses` as a manual skill (the outbound symmetric counterpart to Step 0b's inbound pipeline); P220 names the cadence gap that without an auto-fire trigger, upstream responses to issues we filed via `/wr-itil:report-upstream` go unread until the maintainer remembers to invoke the skill. This step closes that gap with the same pre-flight shape Step 0b uses for the inbound axis.
+**Mechanism:**
+```bash
+preflight_reason="$(wr-itil-check-outbound-responses-staleness "$PWD")"
+```
+`wr-itil-check-outbound-responses-staleness` is the ADR-049 + ADR-080 `$PATH` shim (adopter-safe — resolves `lib/check-outbound-responses-staleness.sh` relative to the script, NOT cwd; P317/RFC-009) that internalises `should_promote_outbound_responses_preflight "$PWD"` and echoes the result. NEVER `source packages/...` repo-relative from a SKILL — those paths only resolve in the source monorepo, not adopter installs.
+The helper returns one of five outcomes (contract documented at `packages/itil/lib/check-outbound-responses-staleness.sh` + asserted by `packages/itil/skills/work-problems/test/work-problems-step-0d-outbound-responses-staleness-behavioural.bats`):
+| `preflight_reason`                | Action                                                                                                |
+|-----------------------------------|--------------------------------------------------------------------------------------------------------|
+| `no-back-link-tickets`            | Silent-pass. No local tickets carry a `## Reported Upstream` section; nothing to poll. Downstream-adopter non-obligation analogue to Step 0b's `no-channels-config`. Proceed to Step 1. |
+| `first-run-cache-absent`          | Dispatch `/wr-itil:check-upstream-responses` as a pre-flight iter via the standard `claude -p` subprocess wrapper (same shape as Step 0b / Step 0c / Step 5). |
+| `first-run-last-checked-null`     | Same as `first-run-cache-absent` — cache schema present but never populated.                          |
+| `ttl-expiry age=<N>s ttl=<M>s`    | Dispatch `/wr-itil:check-upstream-responses` as a pre-flight iter. Cache stale; the skill polls each back-linked upstream URL, diffs against the cache, and emits STATE / NEW / LABEL / NONE / FAIL per back-link ticket. |
+| `fresh-within-ttl`                | Silent-pass per ADR-013 Rule 5 + P132 mechanical-stage carve-out. Proceed to Step 1.                  |
+**Pre-flight dispatch shape**: when promoted, dispatch a single `claude -p --permission-mode bypassPermissions --output-format json` subprocess that invokes `/wr-itil:check-upstream-responses` (per P084 + ADR-032 subprocess isolation). Reuse the Step 5 subprocess wrapper verbatim — same flag set, same idle-timeout SIGTERM poll loop. The subprocess runs the full check-upstream-responses Step 1 + Step 2 + Step 3 pipeline; the cache file `docs/problems/.outbound-responses-cache.json` + audit-log `docs/audits/outbound-responses-log.md` are refreshed in its own commit per ADR-014 (check-upstream-responses' SKILL.md Step 3 commit grain). After the subprocess completes, the orchestrator proceeds to Step 1.
+**Iter-summary annotation**:
+- No back-link tickets: `Step 0d skipped — no tickets carry ## Reported Upstream (downstream-adopter non-obligation)`.
+- Cache fresh: `Step 0d skipped — outbound-responses cache fresh within TTL`.
+- Pre-flight ran: `Step 0d pre-flighted /wr-itil:check-upstream-responses — reason=<preflight_reason>, <N> back-link tickets polled, <M> STATE/NEW deltas surfaced`.
+The annotation pre-empts the "surprise heavy iter" perception JTBD-006 expects auditability for — a maintainer running multiple short AFK loops within a 24h window will hit `fresh-within-ttl` on subsequent invocations and see the cache-fresh annotation, confirming the system's silent-pass discipline rather than wondering whether the check ran at all.
+**AFK authorisation per ADR-013 Rule 6**: check-upstream-responses is itself AFK-safe by construction — read-only externally (`gh issue view` only; no `gh issue comment` / `gh issue create`), so does NOT trip ADR-028's external-comms gate; zero `AskUserQuestion` calls (flag-based knobs per CLAUDE.md P085); partial-failure exit code 2 distinguishes "some upstream URLs unreachable" from "everything broke" so AFK orchestrators can branch correctly. No new user-attention surface introduced at the Step 0d promotion point.
+**Compose-with**: ADR-013 Rule 5/6 (silent-pass + AFK fail-safe), ADR-044 category 4 (silent-framework — the trigger is policy + observable evidence), ADR-014 (check-upstream-responses' commit grain holds — the pre-flight subprocess emits its own commit), ADR-024 (back-link `## Reported Upstream` section is the source-of-truth scanned by the helper and read by the dispatched skill), ADR-049 / ADR-080 (PATH shim grammar + highest-version-wins wrapper), ADR-062 § Step 0b (precedent staleness-pre-flight shape — Step 0d is the outbound symmetric counterpart), P084 + P077 (subprocess isolation reuse — same `claude -p` wrapper as Step 5), P132 (mechanical-stage carve-out — no `AskUserQuestion` at the promotion point), P170 / RFC-002 (dual-tolerant glob — the helper handles both layouts), P317 / RFC-009 (adopter-safe PATH shim), P249 Phase 1 (the manual skill this step wires into a cadence).
+**Staleness contract drift**: the staleness comparison MUST stay symmetric with the check-upstream-responses SKILL's Confirmation surface (TTL semantics + outcome shape). Drift here re-opens the outbound-responses staleness contract — any change to TTL semantics MUST update this Step 0d, the lib helper, AND the check-upstream-responses SKILL.md Confirmation section in the same commit. <!-- OUTBOUND-RESPONSES-STALENESS-CONTRACT-SOURCE: packages/itil/skills/check-upstream-responses/SKILL.md ## Confirmation -->
+<!-- @jtbd JTBD-006 (Progress the Backlog While I'm Away — AFK orchestrator pre-flights check-upstream-responses so outbound STATE/NEW deltas surface without manual polling) -->
+<!-- @jtbd JTBD-004 (Connect Agents Across Repos to Collaborate — closes the outbound symmetric feedback loop) -->
+After Step 0d completes (whether dispatched or silent-passed), proceed to Step 1.
 ### Step 1: Scan the backlog
@@ -626,17 +669,25 @@ Do NOT extract `session_id`, `model`, `stop_reason`, `permission_denials`, `uuid
 Aggregation rule: sum `.total_cost_usd` into the session total and trust it; sum `.usage.*` into the session totals for cache-reuse ratio reasoning but label them best-effort in the Session Cost table. This asymmetry is correct-by-CLI-contract (cost is a session cumulative; usage is a per-response envelope); the orchestrator documents the asymmetry so adopters do not silently under-count tokens. First observed AFK-iter-7 iter 5 (2026-04-21): 1071s wall-clock / 60+ tool-use subprocess returned `duration_ms: 8546, num_turns: 1, usage.* ≈ 137K tokens, total_cost_usd: 6.08` — cost cumulative and correct, tokens reflecting only the final ack turn.
-**Exit-code semantics.** `claude -p` exits non-zero when the subprocess fails hard — subprocess crash, auth failure, unresolvable permission denial, API/quota exhaustion. The orchestrator reads the exit code BEFORE parsing `.result`:
+**Exit-code semantics — ordered check (P214 amendment to the P261 carve-out).** `claude -p` exits non-zero when the subprocess fails hard — subprocess crash, auth failure, unresolvable permission denial, API/quota exhaustion. Orthogonally, the `--output-format json` envelope carries an `is_error` field that fires `true` on transient API failures (529 Overloaded / 429 rate-limit / 401 auth-expired) where the subprocess exits 0 with `total_cost_usd: 0` — the API call never landed; no work was done; no `ITERATION_SUMMARY` was emitted. Before P214, the prose presented the exit-code rule first and the `is_error` carve-out as "orthogonal", which let an implementer silently route exit 0 + `is_error: true` to the `ITERATION_SUMMARY` parse path and miscount the failure as success. The orchestrator MUST instead read both fields in this explicit order, BEFORE parsing `.result`:
-- Exit 0 → parse `ITERATION_SUMMARY` from `.result` field; proceed to Step 6.
-- Non-zero exit → halt the loop; report the exit code, stderr, and any partial `.result` in the final summary. Do NOT spawn the next iteration. The user returns to a stopped loop with a clear failure reason (e.g. "quota exhausted — resume when quota resets").
+1. **Read the exit code.** Non-zero → halt the loop; report the exit code, stderr, and any partial `.result` in the final summary. Do NOT spawn the next iteration. The user returns to a stopped loop with a clear failure reason (e.g. "quota exhausted — resume when quota resets"). Exit-code check fires FIRST in the ordered sequence — non-zero exit takes precedence over the `is_error` branch below.
+2. **Parse `is_error` from the JSON stdout BEFORE attempting to parse `ITERATION_SUMMARY`.** When `is_error: true`, route to the SALVAGE-vs-HALT decision contract below (the existing P261 carve-out, extended by P214 with the transient-API-error HALT advisory). The check MUST happen before the Exit-0 → `ITERATION_SUMMARY` parse path — the load-bearing P214 invariant is that `is_error: true` never silently falls through to the parse path.
+3. **Exit 0 AND `is_error: false`** → parse `ITERATION_SUMMARY` from `.result` field; proceed to Step 6.
-**`is_error: true` stream-timeout salvage carve-out (P261).** Orthogonal to the process exit code, the `claude -p --output-format json` envelope carries an `is_error` field. An iter that returns `is_error: true` with `API Error: Stream idle timeout - partial response received` in `.result` AFTER staging coherent work but BEFORE `git commit` leaves that work intact in the working tree (the staged files survive; the JSON metadata is preserved — unlike the P147 stuck-before-emit class). This is a NEW recovery branch, not a replacement for the halt rule above. Deterministic SALVAGE-vs-HALT decision contract:
+**`is_error: true` class taxonomy (P261 SALVAGE branch + P214 HALT branch).** Two sub-classes of `is_error: true` route differently inside the ordered check above. Deterministic SALVAGE-vs-HALT decision contract:
-- **IF** `is_error: true` AND staged files exist in the working tree (`git diff --cached --name-only` non-empty) AND any iter-authored bats fixtures pass → the orchestrator MAY apply the documented **4-step salvage path**: (1) run the iter's bats as a structural sanity check; (2) inspect the changeset + diffs for quality; (3) commit the staged work from the orchestrator main turn with explicit iter-attribution in the message (e.g. "iter hit API stream timeout before commit — committed staged work from orchestrator main turn"); (4) **the commit gate fires fresh** on the salvage commit, so architect / JTBD / risk-scorer validate the work cleanly on the orchestrator's own SESSION_ID (never reusing the dead subprocess's gate markers, per ADR-009 line 89). The salvage commit IS the iteration's one commit per ADR-014 (amend-folding is inapplicable — no iter commit exists to amend).
-- **ELSE** (staged work incoherent / bats fail / nothing staged) → halt per the existing exit-code contract above.
+- **SALVAGE branch (P261 — stream-timeout class).** **IF** `is_error: true` AND staged files exist in the working tree (`git diff --cached --name-only` non-empty) AND any iter-authored bats fixtures pass → the orchestrator MAY apply the documented **4-step salvage path**: (1) run the iter's bats as a structural sanity check; (2) inspect the changeset + diffs for quality; (3) commit the staged work from the orchestrator main turn with explicit iter-attribution in the message (e.g. "iter hit API stream timeout before commit — committed staged work from orchestrator main turn"); (4) **the commit gate fires fresh** on the salvage commit, so architect / JTBD / risk-scorer validate the work cleanly on the orchestrator's own SESSION_ID (never reusing the dead subprocess's gate markers, per ADR-009 line 89). The salvage commit IS the iteration's one commit per ADR-014 (amend-folding is inapplicable — no iter commit exists to amend). Production shape: `API Error: Stream idle timeout - partial response received` in `.result` after staging coherent work but before `git commit` — staged files survive; JSON metadata preserved (unlike the P147 stuck-before-emit class).
+- **HALT branch (P214 — transient-API-error class).** **ELSE IF** `is_error: true` AND nothing staged (`git diff --cached --name-only` empty) → halt the loop with a class-appropriate advisory line in the final summary. The transient-API-error class fires when the API call never landed; `total_cost_usd: 0`; no work was done. Map `.result` substrings to the advisory:
+  - `529` / `Overloaded` → `"API overloaded; retry when service recovers"`
+  - `429` / `rate limit` → `"API rate-limited; retry when limit window resets"`
+  - `401` / `Authentication` / `auth expired` → `"API auth expired; refresh credentials before resuming"`
+  - any other `is_error: true` shape → `"transient API error; inspect .result and resume manually"`
-The decision is deterministic and non-interactive — no `AskUserQuestion` (Rule 6, mirroring the P121 SIGTERM precedent at line 154 of ADR-032). **Distinct class** from: P121 (SIGTERM idle-timeout — `is_error: false` clean exit-flush; subprocess HAD committed before going idle), P147 (SIGTERM stuck-before-emit — exit 143 + 0-byte JSON, metadata lost), and P146 (bash-polling antipattern — the deadlock mechanism behind P147). Here the iter exits on its own with `is_error: true`; no SIGTERM involved; metadata AND staged files survive. Full contract: ADR-032 § "is_error:true stream-timeout salvage (P261 amendment)". Behavioural fixture: `test/work-problems-step-5-stream-timeout-salvage.bats`.
+  Do NOT spawn the next iteration; the loop has no recoverable state to advance from. Retry policy for the transient classes (e.g. exponential backoff on 529 Overloaded, max-N attempts) is deferred to a Phase 2 amendment per P214's Investigation Tasks — Phase 1 is HALT-with-advisory only.
+- **ELSE** (staged work incoherent / bats fail) → halt per the SALVAGE branch's fall-through contract.
+The decision is deterministic and non-interactive — no `AskUserQuestion` (Rule 6, mirroring the P121 SIGTERM precedent at line 154 of ADR-032). **Distinct classes** within the `is_error: true` taxonomy: P261 SALVAGE (stream-timeout — staged work survives) vs P214 HALT (transient API error — nothing staged). **Distinct from** sibling subprocess-failure classes: P121 (SIGTERM idle-timeout — `is_error: false` clean exit-flush; subprocess HAD committed before going idle), P147 (SIGTERM stuck-before-emit — exit 143 + 0-byte JSON, metadata lost), and P146 (bash-polling antipattern — the deadlock mechanism behind P147). Here the iter exits on its own with `is_error: true`; no SIGTERM involved; metadata survives in the JSON envelope. Full contract: ADR-032 § "is_error:true stream-timeout salvage (P261 amendment)" + § P214 transient-API-error HALT extension. Behavioural fixtures: `test/work-problems-step-5-stream-timeout-salvage.bats` (SALVAGE branch — P261), `test/work-problems-step-5-is-error-transient-halt.bats` (HALT branch — P214).
 **Quota as the natural stop.** The AFK loop runs until quota is exhausted or a stop-condition from Step 2 fires. There is no per-iteration dollar cap; running iterations until quota is actually exhausted maximises backlog progress per quota cycle. Quota-exhaust on a `claude -p` invocation surfaces as a non-zero exit and the orchestrator halts cleanly per the rule above.
@@ -754,9 +805,38 @@ After the iteration's commit lands but before starting the next iteration, check
 1. Run `npm run push:watch` (push + wait for CI to pass).
 2. If `.changeset/` is non-empty after push, run `npm run release:watch` (merge the release PR + wait for npm publish).
 3. Resume the loop only after the release lands on npm.
-4. **Post-release cache refresh (P233)**: if step 2 actually ran AND succeeded (a release shipped to npm), chain `/install-updates` to refresh the plugin cache before the next iter dispatches. Skipped when step 2 was a no-op (empty `.changeset/` after push; no new plugin version exists). See the **Post-release cache refresh** subsection below for the full contract.
+4. **Post-release K→V auto-transition (P228)**: if step 2 actually ran AND succeeded (a release shipped to npm), fire the K→V auto-transition callback for `.known-error.md` tickets whose Release-vehicle citation matches a just-shipped changeset. See the **Post-release K→V auto-transition** subsection below for the full contract.
+5. **Post-release cache refresh (P233)**: if step 2 actually ran AND succeeded (a release shipped to npm), chain `/install-updates` to refresh the plugin cache before the next iter dispatches. Skipped when step 2 was a no-op (empty `.changeset/` after push; no new plugin version exists). See the **Post-release cache refresh** subsection below for the full contract.
+**Post-release K→V auto-transition (P228) — fires only after within-appetite Drain action step 2 (release:watch) succeeded:**
+ADR-022 prescribes that Known Error tickets transition to Verification Pending on release, but until P228 there was no auto-fire surface to back-fill the transition once a fix ships. Iter subprocesses MUST NOT release (the orchestrator owns Step 6.5 per the iter dispatch constraints), so a fix that lands in iter N stays in `.known-error.md` until the orchestrator drains release in Step 6.5 — and prior to this callback, the K→V transition was silently deferred to "the next session" citing a misapplied P143 amendment. The 2026-06-08 P220 empirical witness — `## Fix Released` populated with no K→V transition — confirmed the gap.
+**Mechanism:**
+1. Invoke `wr-itil-enumerate-postrelease-kv-candidates` (ADR-049 PATH shim resolving to `packages/itil/scripts/run-enumerate-postrelease-kv-candidates.sh` / `packages/itil/lib/enumerate-postrelease-kv-candidates.sh`). The helper walks `docs/problems/known-error/*.md`, invokes `wr-itil-derive-release-vehicle <NNN>` per ticket, and emits one `KV_CANDIDATE: P<NNN> | <changeset>` line per ticket whose changeset has been shipped (derive exit 0). Tickets with no `**Release vehicle**: .changeset/<name>.md` reference (derive exit 2 — legacy pre-P330) and tickets whose changeset is still in the working tree (derive exit 3 — unreleased) are skipped silently. Final line: `KV_CANDIDATES_SUMMARY: total=<N>`.
+2. Parse `KV_CANDIDATE:` lines from stdout.
+3. For each candidate `P<NNN>`, dispatch `/wr-itil:transition-problem <NNN> verifying` via the Skill tool. The dispatched transition-problem skill is the authoritative executor for K→V per ADR-010 amended "Split-skill execution ownership" (P093) — orchestrator dispatch is the documented forwarder pattern, NOT a round-trip. The dispatched skill rides its OWN ADR-014 commit through architect / JTBD / risk-scorer gates per its existing Step 8 contract (rename + Status edit + `## Fix Released` write + README refresh + commit). The orchestrator does NOT re-implement the transition mechanics; it dispatches and reads the outcome.
+4. After all candidates dispatched: emit one per-ticket transition outcome line to the iter summary in the form `K→V: P<NNN> | commit=<sha> | release=<vehicle>` (read from the dispatched transition-problem's `RELEASE_VEHICLE` block or Report-the-outcome stdout per Step 9 of transition-problem).
+5. Push the resulting K→V commits via `git push` (the release itself has already shipped — these are post-release audit-trail commits and do NOT require a second release:watch round-trip).
+**Conditional on actual release**: only fires when `release:watch` actually published (step 2 of the Drain action above ran AND returned success). Skipped when `push:watch` ran alone (empty `.changeset/`; no new plugin version). Without this guard, the enumerator would scan `.known-error/` on every iter with no shipped changeset to match — wasted reads.
+**Non-blocking on individual transition failure**: if a dispatched `/wr-itil:transition-problem` fails (pre-flight reject, gate rejection, P057 staging trap, derive helper transient error), the orchestrator logs the failure for that ticket and continues to the next candidate. A single transition failure MUST NOT halt the loop or block siblings in the same cohort. Persistent failures across multiple iters surface as accumulated `outstanding_questions` entries per the standard Step 2.5b discipline.
+**Policy authorisation (ADR-013 Rule 5)**: rides the same Rule 5 silent-proceed that already covers `push:watch` / `release:watch` / `/install-updates` in the drain — the K→V auto-transition is mechanically downstream of release and shares its authorisation. The derive-helper-citation match against the just-shipped changeset is deterministic (filename equality), not a judgment call — squarely in the safe-default tier per JTBD-006 "Decisions that would normally require my input are resolved using safe defaults".
+**Mid-loop ask discipline (P130) preserved**: the dispatched transition-problem skill is wired to skip `AskUserQuestion` when invoked under AFK orchestrator context per its own ADR-013 Rule 6 fail-safe (transition-problem SKILL.md Step 8 risk-above-appetite branch). The orchestrator MUST NOT introduce any `AskUserQuestion` call at the callback site — the per-candidate routing is framework-resolved per ADR-044, and the callback fires in a mechanical-stage transition between drain step 2 and step 5 (cache refresh).
+**V→C remains the maintainer's surface (persona constraint per JTBD-006)**: this callback fires ONLY for K→V (`known-error → verifying` — "fix released, awaiting verification"). It explicitly does NOT auto-fire V→C — the maintainer's judgment-reserved "fix actually works" closure remains untouched and continues to require their return per the existing transition-problem Step 4 `Verification Pending → Closed` precondition ("the user has explicitly confirmed the fix works in production").
+**Composition with the Above-appetite branch (below)**: the K→V callback is anchored to the within-appetite Drain action step 4 — it does NOT fire after the above-appetite Rule 5 halt (no release shipped → nothing to match) and it does NOT fire mid-loop in the above-appetite auto-apply loop. When the auto-apply loop converges and re-enters the within-appetite Drain action, the K→V callback fires there per step 4.
+**Composition with Cohort-graduation pre-check (P246)**: the cohort-graduation pre-check (step 2a above) fires BEFORE the Drain action; its `git mv` operations from `docs/changesets-holding/` to `.changeset/` happen BEFORE release:watch and ship as part of the same release. The K→V callback fires AFTER release:watch and consumes the just-shipped changeset set — so graduated cohorts that ship in the same release are correctly matched by the enumerator (the deleted-from-tree changeset has the graduated basename; the K-ticket's `**Release vehicle**: .changeset/<basename>.md` reference matches).
+Per ADR-022 (Verifying lifecycle) + ADR-018 (release-cadence host) + ADR-010 amended P093 (transition-problem authoritative executor) + ADR-014 (per-transition commit grain) + ADR-013 Rule 5 (policy-authorised silent-proceed) + ADR-044 (framework-resolution boundary) + P228 (this ticket) + P233 (sibling callback) + P267 (derive-release-vehicle composed helper) + P330 (Release vehicle seed reference — input signal).
-**Post-release cache refresh (P233) — fires only after within-appetite Drain action step 4 (above):**
+**Post-release cache refresh (P233) — fires only after within-appetite Drain action step 5 (above):**
 After a successful release-cadence drain has shipped a new plugin version to npm, the orchestrator chains `/install-updates` to refresh the plugin cache before the next iter dispatches. Empirical evidence in `docs/briefing/afk-subprocess.md` ("Just-shipped gate-class hooks DON'T protect the immediate-next iter" entry) confirms iter subprocesses re-resolve plugin cache on spawn — so a just-shipped gate-class hook is inactive in the next iter unless the cache is refreshed first. The orchestrator IS the "restart" boundary for the next iter subprocess (each subprocess is a fresh `claude -p` per ADR-032 + `afk-subprocess-mechanics.md`); the cache refresh between release:watch and next-iter dispatch is the load-bearing step.
@@ -765,7 +845,7 @@ After a successful release-cadence drain has shipped a new plugin version to npm
 - **Policy authorisation (ADR-013 Rule 5)**: rides the same Rule 5 silent-proceed that already covers `push:watch` / `release:watch` in the drain — the post-release cache refresh is mechanically downstream of release and shares its authorisation. Composes with P106's claude-plugin-install no-op-when-already-installed factor (the chained `/install-updates` handles the uninstall+install dance per P106).
 - **Mid-loop ask discipline (P130) preserved**: if `/install-updates` Step 5b/5c consent gate fires (cache miss / scope delta / `INSTALL_UPDATES_RECONFIRM=1`), the orchestrator main turn treats this AS the **Non-interactive fallback** documented in `scripts/repo-local-skills/install-updates/SKILL.md` "Non-interactive fallback" subsection — log the dry-run output, do not interrupt the loop. The orchestrator's `.claude/.install-updates-consent` is normally present (install-updates Step 5a cache hit) so the gate fires silently. **ADR-044 framework-resolution boundary** authorises this AskUserQuestion-available-but-forbidden routing: invocation between iters is a mechanical-stage transition the framework has resolved; surfacing it to the user would dilute the Step 2.5b accumulated-question discipline.
-**Composition with the Above-appetite branch (below)**: the cache refresh is anchored to the within-appetite Drain action step 4 — it does NOT fire after the above-appetite Rule 5 halt (no release shipped → nothing to refresh) and it does NOT fire mid-loop in the above-appetite auto-apply loop. When the auto-apply loop converges and re-enters the within-appetite Drain action, the cache refresh fires there per step 4. The chain's site is the Drain action only.
+**Composition with the Above-appetite branch (below)**: the cache refresh is anchored to the within-appetite Drain action step 5 — it does NOT fire after the above-appetite Rule 5 halt (no release shipped → nothing to refresh) and it does NOT fire mid-loop in the above-appetite auto-apply loop. When the auto-apply loop converges and re-enters the within-appetite Drain action, the cache refresh fires there per step 5. The chain's site is the Drain action only.
 **Failure handling (P140)**: When `push:watch` or `release:watch` reports a CI failure or publish failure, the orchestrator follows a diagnose-then-classify routing — fix-and-continue for the documented mechanically-fixable allow-list, halt for everything else. The previous uniform halt rule converted mechanically-fixable failures (1-line stale-grep-string updates, transient flakes) into ~45min queue stalls, regressing JTBD-006 "Progress the Backlog While I'm Away" without any governance benefit.
@@ -893,6 +973,7 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
 | Cohort-graduation pre-check fires before Drain action (within-appetite branch, `docs/changesets-holding/` non-empty) — evaluator returns `status=resolved` | Route to Rule 4 evidence-floor judgement (LLM-owned per ADR-061 Rule 4 + ADR-044 framework-resolution boundary). Evaluator's `status=resolved` is necessary-but-not-sufficient (P308 — evaluator script disclaims Rule 4 at lines 19-22). Interactive: per-held-entry `AskUserQuestion` with inline evidence summary (P350 brief-before-ID) + 3 options (Graduate / Defer / Reject). AFK: queue per-held-entry `outstanding_question` to `.afk-run-state/outstanding-questions.jsonl` (P352 / ADR-013 Rule 6 queue-and-continue universal default) — do NOT graduate, continue Drain for any pre-existing `.changeset/` entries. On Graduate verdict: `git mv docs/changesets-holding/<basename> .changeset/<basename>`, append README "Recently reinstated" entry citing the user's Rule 4 verdict, amend the iter's main commit per ADR-042 Rule 3. For class=3b cohorts, all cohort members graduate atomically on any-member Graduate verdict (Rule 3b cohort propagation); any Defer/Reject keeps entire cohort held. Per ADR-061 Rule 4 + Rule 5 + Rule 6 + Rule 7 + ADR-013 Rule 6 + P246 + P308 + P350 + P352 (Step 6.5 Cohort-graduation pre-check; step 2a Rule 4 evidence-floor judgement). Graduation criterion is evidence-of-working-as-desired (Rule 4 evidence floor), not elapsed wall-clock time — user direction 2026-05-17: "Dogfooding makes sense, but it shouldn't be time based, it should be until we are happy that it's working as desired." |
 | Cohort-graduation pre-check — evaluator returns `status=vp-blocked` | Skip. Per ADR-061 Rule 2 Verification Pending carve-out (symmetric to ADR-042 Rule 2b). Do NOT graduate; held entry stays. `.verifying.md` → `.closed.md` transition auto-clears the carve-out at a later pass. Per ADR-061 Rule 2 + P246. |
 | Cohort-graduation pre-check — evaluator returns `status=halt-no-resolution` | Halt at the framework-prescribed "Step 6.5 cohort-graduation halt-no-resolution" halt point. Per ADR-061 Rule 1a terminal: ambiguous join is a user-decision surface, not an agent-decision surface. Halt-with-batched-questions per the Step 2.5b cross-reference. Per ADR-061 Rule 1a + P246. |
+| Post-release K→V auto-transition between iters (P228) | After a successful within-appetite Drain action shipped a release to npm, invoke `wr-itil-enumerate-postrelease-kv-candidates` to enumerate `.known-error.md` tickets whose `**Release vehicle**: .changeset/<name>.md` citation matches a just-shipped (deleted-from-tree) changeset, and dispatch `/wr-itil:transition-problem <NNN> verifying` per emitted `KV_CANDIDATE` line. Conditional on actual release (skipped when `push:watch` ran alone with no changeset); non-blocking on individual transition failure (logs per-ticket, continues to next candidate; persistent failures route to Step 2.5b accumulated questions). V→C remains a maintainer-only surface — this callback fires K→V only. Per ADR-022 + ADR-018 + ADR-010 amended P093 + ADR-014 + ADR-013 Rule 5 + ADR-044 + P228 + P233 + P267 + P330 (Step 6.5 Post-release K→V auto-transition subsection). |
 | Post-release plugin cache refresh between iters (P233) | After a successful within-appetite Drain action shipped a release to npm, chain `/install-updates` to refresh the plugin cache before the next iter dispatches. Conditional on actual release (skipped when `push:watch` ran alone with no changeset); non-blocking on `/install-updates` failure (degrades to cache-stays-stale, equivalent to pre-amendment behaviour). Mid-loop ask discipline preserved by treating any `/install-updates` AskUserQuestion surface AS the Non-interactive fallback dry-run path. Per ADR-013 Rule 5 + ADR-044 + P130 + P106 + P233 (Step 6.5 Post-release cache refresh subsection). |
 | CI failure during Step 6.5 drain (within-appetite branch) | Diagnose via `gh run view --log-failed`, classify against the closed fixable-in-iter allow-list (P081-class stale-grep-string, hook stub mismatch, test ID drift, environmental flake), fix-and-continue for fixable classes (each retry rides its own ADR-014 commit gate), 3-retry cap per iteration, halt for unrecoverable classes. Ambiguous classification defaults to halt. ADR-013 Rule 5 policy-authorised. Per ADR-026 grounding + ADR-044 framework-resolution boundary + P140 (Step 6.5 Failure handling). |
 | Pipeline risk above appetite (push or release >= 5/25) | Auto-apply scorer remediations incrementally (ADR-042 Rule 2). The agent reads suggestions and decides what to do. Re-score after each apply; drain when within appetite. **Never release above appetite** (ADR-042 Rule 1) — no AskUserQuestion shortcut. Halt the loop with `outcome: halted-above-appetite` if the loop exhausts without convergence (ADR-042 Rule 5). Verification Pending commits excluded from auto-revert (Rule 2b). Per ADR-042 (Step 6.5 Above-appetite branch). |

package/skills/work-problems/test/work-problems-step-0d-outbound-responses-staleness-behavioural.bats ADDED Viewed

@@ -0,0 +1,174 @@
+#!/usr/bin/env bats
+# Step 0d behavioural fixture per P220 + ADR-062 § JTBD-006 driver:
+# work-problems pre-flights /wr-itil:check-upstream-responses when the
+# outbound-responses cache is stale or missing AND there exist local
+# tickets carrying `## Reported Upstream` back-link sections. The
+# staleness decision lives in
+# `packages/itil/lib/check-outbound-responses-staleness.sh::should_promote_outbound_responses_preflight`
+# so the SKILL.md Step 0d prose is a thin source-and-call wrapper
+# around a behaviorally-testable shell function (P081 / user feedback:
+# prefer behavioural over structural-grep tests).
+#
+# Cases covered (symmetric to Step 0b cases plus the back-link discovery
+# axis that replaces channels-config):
+#   1. No tickets with `## Reported Upstream` section → "no-back-link-tickets"
+#      (downstream-adopter non-obligation; analogue to no-channels-config).
+#   2. Back-link ticket present, cache absent → "first-run-cache-absent".
+#   3. Back-link ticket present, cache present, last_checked null → "first-run-last-checked-null".
+#   4. Back-link ticket present, cache fresh within TTL → "fresh-within-ttl".
+#   5. Back-link ticket present, cache older than TTL → "ttl-expiry" (with age + ttl in the reason).
+#   6. Custom ttl_seconds in cache honored (not hardcoded default).
+#   7. Missing ttl_seconds field defaults to 86400 (24h symmetric with inbound).
+setup() {
+  REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../../.." && pwd)"
+  HELPER="$REPO_ROOT/packages/itil/lib/check-outbound-responses-staleness.sh"
+  FIXTURE="$(mktemp -d)"
+  mkdir -p "$FIXTURE/docs/problems"
+}
+teardown() {
+  rm -rf "$FIXTURE"
+}
+# Helper: write a back-link ticket fixture under docs/problems/.
+_write_backlink_ticket() {
+  local ticket_path="$1"
+  cat > "$ticket_path" <<'EOF'
+# Problem 999: example back-link fixture
+**Status**: Open
+## Description
+Fixture for Step 0d behavioural test.
+## Reported Upstream
+- **Repo**: example/upstream
+- **URL**: https://github.com/example/upstream/issues/999
+- **Filed**: 2026-06-08
+EOF
+}
+@test "helper exists at the contracted path" {
+  [ -f "$HELPER" ]
+}
+@test "case 1: no back-link tickets → no-back-link-tickets" {
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [ "$output" = "no-back-link-tickets" ]
+}
+@test "case 1b: tickets without ## Reported Upstream section → no-back-link-tickets" {
+  cat > "$FIXTURE/docs/problems/100-no-back-link.open.md" <<'EOF'
+# Problem 100: no upstream link
+## Description
+Local-only ticket.
+EOF
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [ "$output" = "no-back-link-tickets" ]
+}
+@test "case 2: back-link ticket present, cache absent → first-run-cache-absent" {
+  _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [ "$output" = "first-run-cache-absent" ]
+}
+@test "case 2b: back-link in per-state subdir layout (RFC-002) is discovered" {
+  mkdir -p "$FIXTURE/docs/problems/known-error"
+  _write_backlink_ticket "$FIXTURE/docs/problems/known-error/220-cadence-gap.md"
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [ "$output" = "first-run-cache-absent" ]
+}
+@test "case 3: cache present, last_checked null → first-run-last-checked-null" {
+  _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
+  cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<'EOF'
+{ "last_checked": null, "tickets": {} }
+EOF
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [ "$output" = "first-run-last-checked-null" ]
+}
+@test "case 4: cache fresh within TTL → fresh-within-ttl (silent-pass)" {
+  _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
+  # last_checked 1 hour ago — well within 24h default TTL.
+  local recent_iso
+  recent_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
+  cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
+{ "last_checked": "$recent_iso", "tickets": {} }
+EOF
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [ "$output" = "fresh-within-ttl" ]
+}
+@test "case 5: cache older than TTL → ttl-expiry with age + ttl in the reason" {
+  _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
+  # last_checked 2 days ago — past 24h default TTL.
+  local stale_iso
+  stale_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
+  cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
+{ "last_checked": "$stale_iso", "tickets": {} }
+EOF
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  # Format: "ttl-expiry age=<N>s ttl=<M>s"
+  [[ "$output" =~ ^ttl-expiry\ age=[0-9]+s\ ttl=86400s$ ]]
+}
+@test "case 6: custom ttl_seconds in cache is honored (not hardcoded default)" {
+  # 1-hour TTL; last_checked 90 minutes ago → stale under the custom TTL,
+  # but would be FRESH under the 86400s default. Confirms the helper reads
+  # ttl_seconds from cache rather than hardcoding 86400.
+  _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
+  local mid_iso
+  mid_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=90)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
+  cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
+{ "last_checked": "$mid_iso", "tickets": {}, "ttl_seconds": 3600 }
+EOF
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [[ "$output" =~ ^ttl-expiry\ age=[0-9]+s\ ttl=3600s$ ]]
+}
+@test "case 7: missing ttl_seconds defaults to 86400 (symmetric with inbound)" {
+  _write_backlink_ticket "$FIXTURE/docs/problems/100-back-link.open.md"
+  local recent_iso
+  recent_iso="$(python3 -c "import datetime; print((datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ'))")"
+  cat > "$FIXTURE/docs/problems/.outbound-responses-cache.json" <<EOF
+{ "last_checked": "$recent_iso", "tickets": {} }
+EOF
+  # shellcheck disable=SC1090
+  source "$HELPER"
+  run should_promote_outbound_responses_preflight "$FIXTURE"
+  [ "$status" -eq 0 ]
+  [ "$output" = "fresh-within-ttl" ]
+}