npm - @windyroad/itil - Versions diffs - 0.47.9-preview.556 → 0.47.9-preview.558 - Mend

@windyroad/itil 0.47.9-preview.556 → 0.47.9-preview.558

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/bin/wr-itil-check-fail-soft-skip-discipline +51 -0
package/package.json +1 -1
package/scripts/check-fail-soft-skip-discipline.sh +141 -0
package/scripts/test/check-fail-soft-skip-discipline.bats +172 -0
package/skills/manage-problem/SKILL.md +6 -1
package/skills/report-upstream/SKILL.md +14 -14
package/skills/report-upstream/test/report-upstream-contract.bats +50 -0
package/skills/review-problems/SKILL.md +33 -1
package/skills/work-problems/SKILL.md +17 -6
package/skills/work-problems/test/work-problems-step-6-5-cohort-graduation.bats +48 -12

package/bin/wr-itil-check-fail-soft-skip-discipline ADDED Viewed

@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+# Generated by scripts/sync-shim-wrappers.sh from
+# packages/shared/lib/shim-wrapper-template.sh. DO NOT EDIT individual
+# shim files in packages/*/bin/wr-* directly; edit the template + run
+# `npm run sync:shim-wrappers` to regenerate.
+#
+# Resolution (ADR-080):
+#   1. If the wrapper's parent dir is semver-shaped, treat as installed-
+#      cache execution and resolve to the highest-version sibling's
+#      scripts/ entry below.
+#   2. Otherwise (parent dir is e.g. `architect`), treat as source-
+#      monorepo execution and dispatch to own scripts/. The source-repo-
+#      guard `exec` is the anchor parsed by
+#      packages/retrospective/scripts/check-tarball-shipped-shims.sh.
+#   3. If the cache parent contains zero semver-shaped siblings, exit
+#      127 with a stderr message naming the cache parent (per SQ-080-2).
+#
+# @adr ADR-080 (highest-version-wins shim wrapper plugin scaffold)
+# @adr ADR-049 (plugin-bundled scripts resolve via bin/ on $PATH — amended)
+# @problem P343 (mid-session staleness window)
+set -euo pipefail
+SHIM_DIR="$(cd "$(dirname "$0")" && pwd)"
+OWN_VERSION_DIR="$(dirname "$SHIM_DIR")"
+OWN_VERSION_NAME="$(basename "$OWN_VERSION_DIR")"
+CACHE_PARENT="$(dirname "$OWN_VERSION_DIR")"
+SEMVER_RE='^[0-9]+\.[0-9]+\.[0-9]+([-+][0-9A-Za-z.-]+)?$'
+# Source-repo guard: own parent dir is NOT semver → dispatch to own scripts/.
+if ! [[ "$OWN_VERSION_NAME" =~ $SEMVER_RE ]]; then
+  exec "$SHIM_DIR/../scripts/check-fail-soft-skip-discipline.sh" "$@"
+fi
+# Cache execution: pick the highest-semver sibling under CACHE_PARENT.
+HIGHEST=""
+while IFS= read -r dir; do
+  name="$(basename "$dir")"
+  [[ "$name" =~ $SEMVER_RE ]] || continue
+  if [[ -z "$HIGHEST" ]] || [[ "$(printf '%s\n%s\n' "$HIGHEST" "$name" | sort -V | tail -1)" == "$name" ]]; then
+    HIGHEST="$name"
+  fi
+done < <(find "$CACHE_PARENT" -mindepth 1 -maxdepth 1 -type d 2>/dev/null)
+if [[ -z "$HIGHEST" ]]; then
+  printf 'wr-shim: no cached versions in %s\n' "$CACHE_PARENT" >&2
+  exit 127
+fi
+exec "$CACHE_PARENT/$HIGHEST/scripts/check-fail-soft-skip-discipline.sh" "$@"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@windyroad/itil",
-  "version": "0.47.9-preview.556",
+  "version": "0.47.9-preview.558",
   "description": "ITIL-aligned IT service management for Claude Code (problem, and future incident/change skills)",
   "bin": {
     "windyroad-itil": "./bin/install.mjs"

package/scripts/check-fail-soft-skip-discipline.sh ADDED Viewed

@@ -0,0 +1,141 @@
+#!/usr/bin/env bash
+# check-fail-soft-skip-discipline.sh — advisory lint for P351 (skills
+# fail-soft-skip when their precondition config is missing — should
+# auto-bootstrap with user input as needed rather than silently
+# skipping).
+#
+# Detects SKILL.md prose that names the fail-soft-skip pattern without
+# a paired auto-bootstrap routine. The default behaviour of skills with
+# config-file preconditions has been "fail-soft skip" when the config is
+# missing; the right behaviour (per P351) is to recognise the gap and
+# auto-bootstrap the missing config — invoking AskUserQuestion for any
+# required user input, then proceeding with the original pass. In AFK
+# mode where AskUserQuestion is unavailable, queue a config-direction
+# outstanding_question and continue other work rather than silently
+# skipping a desired capability.
+#
+# This lint walks `packages/*/skills/*/SKILL.md` files for matching
+# patterns and emits one WARN line per match. Per architect review
+# (a6747bd57c7953b14): the broad `skipping` pattern false-positives on
+# legitimate per-channel skip prose; we tighten to the
+# `skipping.*config|skipping.*not configured` shape that is the
+# load-bearing signal class.
+#
+# Usage:
+#   check-fail-soft-skip-discipline.sh [<repo-root>]
+#     <repo-root> defaults to the current working directory.
+#
+# Environment:
+#   WR_FAIL_SOFT_SKIP_WARN_ONLY=1   Phase 1 advisory (default) — exit 0
+#                                    even when violations exist.
+#   WR_FAIL_SOFT_SKIP_WARN_ONLY=0   Phase 2 load-bearing — exit 1 when
+#                                    violations exist.
+#
+# Exit codes:
+#   0 = clean OR Phase 1 advisory with violations
+#   1 = Phase 2 load-bearing with violations
+#   2 = usage / path error
+#
+# Output format (one line per violation, to stderr):
+#   WARN  <relpath>:<line>  <matched-pattern>: <line-snippet>
+#
+# Promotion criteria (Phase 1 → Phase 2):
+#   Promote `WR_FAIL_SOFT_SKIP_WARN_ONLY=0` once every affected SKILL.md
+#   has been migrated to the auto-bootstrap pattern.
+#
+# @adr ADR-040 (advisory-then-load-bearing reusable pattern)
+# @adr ADR-049 (plugin-bundled scripts; PATH shim)
+# @adr ADR-052 (behavioural-tests-default)
+# @adr ADR-013 Rule 6 (non-interactive fail-safe — advisory exit 0)
+# @jtbd JTBD-001 (Enforce Governance Without Slowing Down)
+# @jtbd JTBD-101 (Extend the Suite with New Plugins)
+# @problem P351 (skills fail-soft-skip when precondition config missing
+#                — should auto-bootstrap with user input)
+set -uo pipefail
+# Self-application: this lint grep / sed / awks SKILL.md content.
+export LC_ALL=en_US.UTF-8
+REPO_ROOT="${1:-$(pwd)}"
+WARN_ONLY="${WR_FAIL_SOFT_SKIP_WARN_ONLY:-1}"
+if [ ! -d "$REPO_ROOT" ]; then
+  echo "check-fail-soft-skip-discipline: not a directory: $REPO_ROOT" >&2
+  exit 2
+fi
+if [ ! -d "$REPO_ROOT/packages" ]; then
+  echo "check-fail-soft-skip-discipline: no packages/ subdir under $REPO_ROOT" >&2
+  exit 2
+fi
+# Patterns tightened per architect review to load-bearing signal class
+# (avoid false-positives on legitimate per-channel skip prose like
+# "skipping the failing channel/report"):
+#   1. `fail-soft skip`            — the canonical name of the pattern.
+#   2. `silently skip`             — direct synonym.
+#   3. `skipping.*config`          — skipping tied to a config artefact.
+#   4. `skipping.*not configured`  — explicit precondition-config skip.
+#   5. `not configured.*skip`      — the reverse phrasing.
+PATTERNS=(
+  'fail-soft skip'
+  'silently skip'
+  'skipping.*config'
+  'skipping.*not configured'
+  'not configured.*skip'
+)
+# Combined extended-regex for a single grep pass.
+PATTERN_RE='fail-soft skip|silently skip|skipping.*config|skipping.*not configured|not configured.*skip'
+mapfile -t TARGETS < <(
+  find "$REPO_ROOT/packages" \
+    -mindepth 4 -maxdepth 5 \
+    -type f -name 'SKILL.md' \
+    -path '*/skills/*' \
+    2>/dev/null | sort
+)
+violations=0
+scanned=0
+for file in "${TARGETS[@]}"; do
+  scanned=$((scanned + 1))
+  rel="${file#"$REPO_ROOT"/}"
+  # grep -n -E emits "<line_no>:<line>" per match. Read each match and
+  # emit a WARN line. We deliberately do NOT attempt to detect a paired
+  # auto-bootstrap routine in the same file — Phase 1 emits one WARN per
+  # raw pattern hit; SKILL.md authors disambiguate. The promotion
+  # criteria for Phase 2 is "every WARN'd file migrated to the
+  # auto-bootstrap pattern".
+  while IFS= read -r match; do
+    [ -z "$match" ] && continue
+    line_no="${match%%:*}"
+    line_text="${match#*:}"
+    # Derive which pattern hit. First-match-wins ordering.
+    matched_pattern=''
+    for pat in "${PATTERNS[@]}"; do
+      if echo "$line_text" | grep -E -i -q "$pat"; then
+        matched_pattern="$pat"
+        break
+      fi
+    done
+    # Trim the snippet to keep WARN lines readable.
+    snippet="${line_text:0:120}"
+    echo "WARN  $rel:$line_no  $matched_pattern: $snippet" >&2
+    violations=$((violations + 1))
+  done < <(grep -E -i -n "$PATTERN_RE" "$file" 2>/dev/null || true)
+done
+if [ "$violations" -gt 0 ]; then
+  echo "" >&2
+  echo "check-fail-soft-skip-discipline: $violations potential fail-soft-skip site(s) across $scanned SKILL.md file(s)" >&2
+  echo "Phase 1 advisory (WR_FAIL_SOFT_SKIP_WARN_ONLY=$WARN_ONLY). Authors should pair each site with an auto-bootstrap routine per P351." >&2
+  if [ "$WARN_ONLY" = "0" ]; then
+    exit 1
+  fi
+fi
+exit 0

package/scripts/test/check-fail-soft-skip-discipline.bats ADDED Viewed

@@ -0,0 +1,172 @@
+#!/usr/bin/env bats
+# @problem P351 — skills fail-soft-skip when their precondition config is
+#                 missing — should auto-bootstrap with user input as needed
+#                 rather than silently skipping.
+#
+# Contract: `check-fail-soft-skip-discipline.sh <repo-root>` walks
+# `<repo-root>/packages/*/skills/*/SKILL.md`, greps each file for the
+# tightened fail-soft-skip pattern set
+# (fail-soft skip|silently skip|skipping.*config|skipping.*not configured|
+#  not configured.*skip), emits one `WARN  <relpath>:<line>  <pat>: ...`
+# line per match on stderr, and exits 0 in Phase 1 advisory mode
+# regardless of whether violations were found.
+#
+# @adr ADR-049 (bin/ on PATH shim — adopter-safe script resolution)
+# @adr ADR-052 (Behavioural bats default)
+# @adr ADR-040 (Advisory-then-load-bearing reusable pattern)
+# @adr ADR-013 Rule 6 (Non-interactive fail-safe — advisory exit 0)
+# @jtbd JTBD-001 (Enforce Governance — pattern-discipline lint)
+# @jtbd JTBD-101 (Extend the Suite — extensible pattern per skill site)
+setup() {
+  SCRIPTS_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
+  SCRIPT="$SCRIPTS_DIR/check-fail-soft-skip-discipline.sh"
+  FIXTURE_ROOT="$(mktemp -d)"
+}
+teardown() {
+  rm -rf "$FIXTURE_ROOT"
+}
+# ── Existence + executable ──────────────────────────────────────────────────
+@test "check-fail-soft-skip-discipline: script exists" {
+  [ -f "$SCRIPT" ]
+}
+@test "check-fail-soft-skip-discipline: script is executable" {
+  [ -x "$SCRIPT" ]
+}
+# ── Phase 1 advisory exit-code contract ─────────────────────────────────────
+@test "Phase 1 advisory: exits 0 on a clean fixture" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/clean-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/clean-skill/SKILL.md" <<'EOF'
+# Sample clean skill
+This skill auto-bootstraps any missing precondition config via
+AskUserQuestion. No fail-soft pattern present.
+EOF
+  run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 0 ]
+}
+@test "Phase 1 advisory: exits 0 even when violations present (default WR_FAIL_SOFT_SKIP_WARN_ONLY=1)" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/dirty-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/dirty-skill/SKILL.md" <<'EOF'
+# Sample dirty skill
+This skill performs a fail-soft skip when the config is missing.
+EOF
+  run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 0 ]
+}
+# ── WARN-on-fixture: behavioural ────────────────────────────────────────────
+@test "WARN-on-fixture: matches 'fail-soft skip' literal" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/dirty-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/dirty-skill/SKILL.md" <<'EOF'
+# Dirty skill
+When the file is missing emit a fail-soft skip and continue.
+EOF
+  run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"WARN"* ]]
+  [[ "$output" == *"packages/sample/skills/dirty-skill/SKILL.md"* ]]
+  [[ "$output" == *"fail-soft skip"* ]]
+}
+@test "WARN-on-fixture: matches 'silently skip' synonym" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/silent-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/silent-skill/SKILL.md" <<'EOF'
+# Silent skill
+If config missing, silently skip the pass.
+EOF
+  run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"silently skip"* ]]
+}
+@test "WARN-on-fixture: matches tightened 'skipping.*config' shape" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/skipper-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/skipper-skill/SKILL.md" <<'EOF'
+# Skipper skill
+Skipping the pass because the channels config is absent.
+EOF
+  run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"skipper-skill/SKILL.md"* ]]
+}
+# ── CLEAN-on-fixture: behavioural negative ──────────────────────────────────
+@test "CLEAN-on-fixture: no WARN emitted when SKILL.md is fail-soft-skip free" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/auto-bootstrap-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/auto-bootstrap-skill/SKILL.md" <<'EOF'
+# Auto-bootstrap skill
+When the config file is missing, auto-bootstrap it via AskUserQuestion
+(interactive mode) or queue a config-direction outstanding_question
+(AFK mode). The skill resumes the original pass after bootstrap.
+EOF
+  run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 0 ]
+  ! [[ "$output" == *"WARN"* ]]
+}
+@test "CLEAN-on-fixture: does NOT false-positive on legitimate per-channel skip prose (architect tightening)" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/per-channel-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/per-channel-skill/SKILL.md" <<'EOF'
+# Per-channel skill
+On rate-limit, log advisory and skip that channel only — skipping the
+failing channel/report is fine because other channels proceed. This
+prose is the inverse of fail-soft-skip-on-missing-config and must NOT
+fire a WARN per the architect-tightened pattern set.
+EOF
+  run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 0 ]
+  # The "fail-soft-skip-on-missing-config" hyphenated literal in the
+  # prose does NOT contain a literal space-separated "fail-soft skip"
+  # phrase, but the descriptive text does. Verify the script catches the
+  # descriptive use and does NOT mistake the per-channel skip-prose for
+  # the load-bearing pattern.
+  # We expect EITHER zero WARNs (if the descriptive use is the only
+  # hit) OR exactly the descriptive lines. Either way the per-channel
+  # "skipping the failing channel/report" prose alone must NOT generate
+  # a WARN.
+  channel_only_warns=$(echo "$output" | grep -c "skipping the failing channel" || true)
+  [ "$channel_only_warns" = "0" ]
+}
+# ── Phase 2 promotion: behavioural ──────────────────────────────────────────
+@test "Phase 2 promotion: exits 1 when WR_FAIL_SOFT_SKIP_WARN_ONLY=0 + violations present" {
+  mkdir -p "$FIXTURE_ROOT/packages/sample/skills/dirty-skill"
+  cat > "$FIXTURE_ROOT/packages/sample/skills/dirty-skill/SKILL.md" <<'EOF'
+# Dirty skill
+Fail-soft skip when missing.
+EOF
+  WR_FAIL_SOFT_SKIP_WARN_ONLY=0 run "$SCRIPT" "$FIXTURE_ROOT"
+  [ "$status" -eq 1 ]
+}
+# ── Usage / path-error contract ─────────────────────────────────────────────
+@test "Usage: exits 2 on missing repo-root directory" {
+  run "$SCRIPT" "$FIXTURE_ROOT/does-not-exist"
+  [ "$status" -eq 2 ]
+}
+@test "Usage: exits 2 on repo-root without packages/ subdir" {
+  mkdir -p "$FIXTURE_ROOT/no-packages-here"
+  run "$SCRIPT" "$FIXTURE_ROOT/no-packages-here"
+  [ "$status" -eq 2 ]
+}

package/skills/manage-problem/SKILL.md CHANGED Viewed

@@ -666,7 +666,12 @@ Detection is intentionally **strict** (explicit label or scoped-npm package only
   2. `Defer and note in ticket` — append a pending-upstream-report line to the ticket's `## Related` section using the stable marker `- **Upstream report pending** — external dependency identified; invoke /wr-itil:report-upstream when ready`. The marker wording is fixed so subsequent runs (and the work-problems `upstream-blocked` skip path) can detect "already noted" without re-firing.
   3. `Not actually upstream` — proceed without invocation; append the same marker with text `- **Upstream report pending** — false positive; detection misfire` so the prompt does not re-fire on later reviews.
-**Non-interactive (AFK) branch** (per ADR-013 Rule 6): when `AskUserQuestion` is unavailable, default to option 2 — append the pending-upstream-report line with the stable `- **Upstream report pending** —` marker. Do NOT auto-invoke `/wr-itil:report-upstream`; its Step 6 security-path branch is interactive and would halt the orchestrator anyway (per ADR-024 Consequences). The appended line lets the user see the deferred action when they return.
+**Non-interactive (AFK) branch** (per ADR-013 Rule 6 + ADR-024 2026-06-04 (P270) amendment): when `AskUserQuestion` is unavailable, **auto-invoke `/wr-itil:report-upstream`** instead of deferring with the marker. The skill composes the report draft via its own Steps 1–5/4b/5c/6 then scores the drafted prose via the `wr-risk-scorer:external-comms` agent (ADR-028) per the ADR-024 2026-06-04 amendment's orchestrator-side pre-fire gate. Branches:
+- **Below external-comms appetite** → the skill proceeds (public-issue path Step 5, comment path Step 5c, or security path Step 6 per the existing classification routing); commits the `## Reported Upstream` back-write per Step 7 / Step 8.
+- **Above appetite** → the skill takes risk-reducing measures (per ADR-042 within-axis precedent generalised to the external-comms risk class — the measures vocabulary is **open-ended LLM judgement** per ADR-024 2026-06-04 second-amendment ratification leaf (a): the `wr-risk-scorer:external-comms` agent's own scoring picks the remedy case-by-case, matching ADR-042's open-vocabulary precedent — NOT a bounded enumeration); re-scores; if within appetite → sends; else → **queues** an `outstanding_questions` entry naming the local ticket ID + queued report path + risk-reduce attempts + residual band + remedy ("review the queued report at `/wr-itil:report-upstream <NNN> <upstream-repo-url>` on return"). The orchestrator continues (P352 queue-and-continue). The `## Queued Upstream Report` section (renamed from `## Drafted Upstream Report` per ADR-024 2026-06-04 second-amendment leaf (c) — same shape; new name reflects the queue-for-review-on-return semantics) carries the report content for the queued question's reference. Security-path routing follows leaf (b) ratification: upstream-with-`SECURITY.md` + below-appetite → file via the declared channel; upstream-without-`SECURITY.md` but with another disclosure channel → external-comms-gated assessment considering impact to (i) our repository, (ii) our reputation, (iii) the party we are reporting to.
+The legacy `- **Upstream report pending** —` marker append (the pre-2026-06-04 AFK default) is **superseded** by this auto-invoke branch for all classifications including security. Tickets that already carry the marker from prior sessions are still handled correctly by the work-problems Step 4 classifier — the new path's "already-noted check" matches the legacy marker shape and routes to the report-upstream invocation. The marker shape is retained for backward compatibility on the parking + interactive fallback paths (interactive option 2 still appends it; see options 1/2/3 above).
 **Scope**: this detection block fires at two points —

package/skills/report-upstream/SKILL.md CHANGED Viewed

@@ -149,7 +149,7 @@ fi
   2. `Comment on the existing upstream report` — route to Step 5c with the existing URL's issue number; appropriate when new evidence has emerged since the previous report.
   3. `File a new upstream issue anyway (override)` — explicit override after user has reviewed the existing record and judged the second filing warranted (e.g. previous report was closed without resolution and a fresh tracker is needed).
-**AFK / non-interactive branch** — apply the **interim static heuristic** (no subagent dispatch; the maintainer-annoyance risk evaluator is deferred per ADR-028 line 117 — see "AFK static heuristic" below). Default action: halt and save the drafted report to the local ticket's `## Drafted Upstream Report` section; do NOT auto-comment. The static heuristic remains in place until `wr-risk-scorer:external-comms` ships, at which point the AFK branch wires the gate combination (maintainer-annoyance + leak gate, both within appetite) per the ticket Direction decision (2026-04-21).
+**AFK / non-interactive branch** — apply the **interim static heuristic** (no subagent dispatch; the maintainer-annoyance risk evaluator is deferred per ADR-028 line 117 — see "AFK static heuristic" below). Default action: halt and save the drafted report to the local ticket's `## Queued Upstream Report` section; do NOT auto-comment. The static heuristic remains in place until `wr-risk-scorer:external-comms` ships, at which point the AFK branch wires the gate combination (maintainer-annoyance + leak gate, both within appetite) per the ticket Direction decision (2026-04-21).
 #### 4b.2. Third-party search
@@ -197,20 +197,20 @@ If Stage 2 produces one or more `same-problem` matches, surface them to the user
 `uncertain` matches surface alongside `same-problem` matches with their verdict labelled, so the user can review. The skill never auto-resolves an `uncertain` verdict.
-**AFK / non-interactive branch** — apply the same interim static heuristic as 4b.1: halt and save the drafted report to the local ticket's `## Drafted Upstream Report` section. The third-party-match auto-comment path requires the deferred `wr-risk-scorer:external-comms` gate (maintainer-annoyance + leak), so the AFK branch must NOT auto-comment under the static heuristic.
+**AFK / non-interactive branch** — apply the same interim static heuristic as 4b.1: halt and save the drafted report to the local ticket's `## Queued Upstream Report` section. The third-party-match auto-comment path requires the deferred `wr-risk-scorer:external-comms` gate (maintainer-annoyance + leak), so the AFK branch must NOT auto-comment under the static heuristic.
 #### AFK static heuristic (interim, until `wr-risk-scorer:external-comms` ships)
 The Direction decision (2026-04-21) pins the AFK auto-comment branch on **two gates passing together**: the maintainer-annoyance risk evaluator AND the P064 external-comms leak gate, both within RISK-POLICY.md's commit-layer appetite (Low, ≤4/25). Neither gate exists yet — ADR-028 declares the `wr-risk-scorer:external-comms` subagent type but P064's implementation is open at WSJF 3.0 (Effort L), and the maintainer-annoyance evaluator was deferred by architect review on P070 to compose with the same subagent rather than ship as a separate evaluator (per ADR-028 line 117 — *"Third evaluator (licence-compliance, etc.) adding to the same gate — when it emerges, amend this ADR's evaluator list and the composite marker's `evaluator_set` component; no new ADR expected."*).
-**Static heuristic, valid until both gates ship**: in AFK mode, both 4b.1 and 4b.2 default to **halt and save the drafted report**. No auto-comment, no auto-file. The drafted report is appended to the local ticket's `## Drafted Upstream Report` section so the user can review and act manually on return. This matches JTBD-006's "does not trust the agent to make judgement calls" stance — the conservative default is the right interim behaviour.
+**Static heuristic, valid until both gates ship**: in AFK mode, both 4b.1 and 4b.2 default to **halt and save the drafted report**. No auto-comment, no auto-file. The drafted report is appended to the local ticket's `## Queued Upstream Report` section so the user can review and act manually on return. This matches JTBD-006's "does not trust the agent to make judgement calls" stance — the conservative default is the right interim behaviour.
 **Re-wire trigger**: when `wr-risk-scorer:external-comms` lands (ADR-028 implementation, P064 closure), amend this section to invoke both evaluators and proceed with auto-comment ONLY when both verdicts return PASS within appetite. Update the AFK behaviour summary table accordingly. Until then, the static heuristic stands.
-**Drafted Upstream Report save format** (used by both 4b.1 and 4b.2 AFK halts; mirrors the security-path halt pattern from Step 6 per ADR-024 Consequences lines 116, 123):
+**Queued Upstream Report save format** (used by both 4b.1 and 4b.2 AFK halts; mirrors the security-path halt pattern from Step 6 per ADR-024 Consequences lines 116, 123. **Renamed from `## Drafted Upstream Report` per ADR-024 2026-06-04 second-amendment ratification (c)** — same shape; new name reflects post-amendment semantics, queue-for-review-on-return not loop-stopping halt-and-save):
 ```markdown
-## Drafted Upstream Report
+## Queued Upstream Report
 - **Drafted**: <YYYY-MM-DD>
 - **Target upstream**: <upstream-repo-url>
@@ -460,7 +460,7 @@ Parse for a disclosure channel:
   }
   EOF
   ```
-- **`security@` mailbox** (or any `mailto:` link): **halt** and surface the mailbox + drafted report to the user. Do NOT auto-send email — out of scope, no infra. Save the drafted report to `docs/problems/<NNN>-<title>.<status>.md`'s `## Drafted Upstream Report` appendage section so the user can copy + send.
+- **`security@` mailbox** (or any `mailto:` link): **halt** and surface the mailbox + drafted report to the user. Do NOT auto-send email — out of scope, no infra. Save the drafted report to `docs/problems/<NNN>-<title>.<status>.md`'s `## Queued Upstream Report` appendage section so the user can copy + send.
 - **Other documented channel** (Tidelift, HackerOne, vendor-specific URL): halt and surface the channel + drafted report.
 If upstream has **NO `SECURITY.md`** (404):
@@ -468,9 +468,9 @@ If upstream has **NO `SECURITY.md`** (404):
   - `(a) Open a private GitHub Security Advisory` — uses `gh api repos/.../security-advisories` against the upstream if it's GitHub-hosted.
   - `(b) Contact the maintainer out-of-band first` — halt, no automated action.
   - `(c) Downgrade the classification (your judgement)` — re-route via the public-issue path in Step 5.
-- **AFK / non-interactive context**: do NOT auto-resolve. Save the drafted report to the local ticket's `## Drafted Upstream Report` section and **halt the orchestrator** — this is a loop-stopping event per ADR-024 Consequences. AFK orchestrators must never auto-report a security-classified ticket.
+- **AFK / non-interactive context** (per ADR-024 2026-06-04 (P270) amendment — **supersedes the pre-amendment "must never auto-report a security-classified ticket" rule**): score the drafted prose via `wr-risk-scorer:external-comms` (ADR-028). **Per ADR-024 2026-06-04 second-amendment leaf (b) — ratified 2026-06-04**: if upstream has a `SECURITY.md` declaring a channel AND scored below-appetite → file via that channel (Step 6 routing). If upstream has NO `SECURITY.md` but another disclosure channel exists → run the external-comms risk assessment considering impact to (i) our repository, (ii) our reputation, (iii) the party we are reporting to. Below-appetite → proceed (save the report to `## Queued Upstream Report` if the channel is non-automatable like a mailbox — the channel-action stays user-side per the no-infra-for-email constraint) + queue `outstanding_questions` entry naming the channel. Above-appetite → risk-reduce + re-score then queue per the same shape (the risk-reducing-measures vocabulary is **open-ended LLM judgement** per leaf (a) — the `wr-risk-scorer:external-comms` agent's own scoring picks the remedy case-by-case; not a bounded enumeration). Orchestrator continues per P352 queue-and-continue. The pre-amendment "AFK orchestrators must never auto-report" / "Never auto-open a public issue for a security-classified ticket" rules are **superseded** — both are now external-comms-gated per the 2026-06-04 amendment.
-**Never auto-open a public issue for a security-classified ticket.**
+**[Legacy phrase retained for traceability — superseded by ADR-024 2026-06-04 amendment]** Never auto-open a public issue for a security-classified ticket — the pre-amendment ban is now subordinated to the external-comms-gated per-classification branching above.
 ### 7. Cross-reference back-write
@@ -496,7 +496,7 @@ After the upstream issue or advisory is created (or drafted-and-saved in the sec
 Follow the ADR-014 ordering:
-1. `git add docs/problems/<NNN>-<title>.<status>.md` (and any `## Drafted Upstream Report` appendage if security-path halt fired).
+1. `git add docs/problems/<NNN>-<title>.<status>.md` (and any `## Queued Upstream Report` appendage if security-path halt fired).
 2. Score commit/push/release risk via `wr-risk-scorer:pipeline` subagent (or fall back to `/wr-risk-scorer:assess-release` skill per ADR-015).
 3. `git commit -m "docs(problems): P<NNN> reported upstream — <one-line summary>"`.
@@ -504,14 +504,14 @@ If the cumulative pipeline risk lands above appetite and `AskUserQuestion` is un
 ## AFK behaviour summary
-Five distinct AFK branches per the architect reviews of ADR-024, ADR-013 Rule 6, and the P070 dedup amendment:
+Five distinct AFK branches; **per the ADR-024 2026-06-04 (P270) amendment, ALL pre-commit branches now route through the `wr-risk-scorer:external-comms` gate** (ADR-028) — below-appetite proceeds, above-appetite risk-reduces then queues. The legacy "halt the orchestrator" semantics for dedup-match and security-path-without-declared-channel are **superseded** by queue-and-continue per P352:
 | Branch | AFK behaviour | Authority |
 |---|---|---|
-| Public-issue path (Step 5) | Proceeds. Voice-tone gate per ADR-028 may delegate-and-retry; that is the expected extra turn. | ADR-028 line 126 |
-| Dedup match — Step 4b halt (own re-run OR third-party `same-problem`) | Save drafted report to local ticket's `## Drafted Upstream Report` section. **Halt the orchestrator** — loop-stopping event. Interim static heuristic; auto-comment branch deferred until `wr-risk-scorer:external-comms` ships (ADR-028 line 117). | ADR-024 amendment 2026-04-25 (P070); Direction decision 2026-04-21 |
-| Security path with declared channel (Step 6, GitHub Advisories) | Proceeds via `gh api .../security-advisories`. | ADR-024 Decision Outcome step 6 |
-| Security path with `security@` / other / missing-SECURITY.md (Step 6) | Save drafted report to local ticket's `## Drafted Upstream Report` section. **Halt the orchestrator** — loop-stopping event. AFK orchestrators must never auto-report a security-classified ticket. | ADR-024 Consequences lines 116, 123 |
+| Public-issue path (Step 5) | Score drafted prose via `wr-risk-scorer:external-comms` (ADR-028). Below-appetite → proceed via `gh issue create`. Above-appetite → risk-reduce + re-score; if within → proceed; else → save draft to `## Queued Upstream Report` + queue `outstanding_questions` entry; orchestrator continues. Risk-reducing measures vocabulary is **open-ended LLM judgement** per ADR-024 2026-06-04 second-amendment leaf (a) — `wr-risk-scorer:external-comms` picks the remedy case-by-case. Voice-tone gate per ADR-028 may also delegate-and-retry on the proceed path. | ADR-024 2026-06-04 amendment (P270); ADR-024 2026-06-04 second-amendment (leaf a — open vocabulary); ADR-028 line 126 |
+| Dedup match — Step 4b (own re-run OR third-party `same-problem`) | Score the proposed comment body via `wr-risk-scorer:external-comms`. Below-appetite → proceed via `gh issue comment` (Step 5c). Above-appetite → risk-reduce + re-score (open-ended LLM judgement per leaf (a)); if within → proceed; else → save draft to `## Queued Upstream Report` + queue `outstanding_questions` entry; orchestrator continues. **The 2026-04-25 (P070) "interim static heuristic in force until that subagent ships" deferral is LIFTED** — the subagent (`packages/risk-scorer/agents/external-comms.md`) has shipped. | ADR-024 2026-06-04 amendment (P270); ADR-024 2026-04-25 amendment (P070); ADR-024 2026-06-04 second-amendment (leaf a) |
+| Security path with declared channel (Step 6, GitHub Advisories — upstream has `SECURITY.md`) | Per ADR-024 2026-06-04 second-amendment leaf (b) — ratified: if upstream has `SECURITY.md` AND below-appetite → **file** via the SECURITY.md-declared channel. Above-appetite → risk-reduce + re-score (open-ended LLM judgement per leaf (a)); if within → proceed; else → save draft + queue; orchestrator continues. | ADR-024 2026-06-04 amendment (P270); ADR-024 2026-06-04 second-amendment (leaf b); ADR-024 Decision Outcome step 6 |
+| Security path with `security@` / other / missing-SECURITY.md (Step 6) | Per ADR-024 2026-06-04 second-amendment leaf (b) — ratified: when upstream has NO `SECURITY.md` but another disclosure channel exists, score drafted prose via `wr-risk-scorer:external-comms` considering impact to (i) our repository, (ii) our reputation, (iii) the party we are reporting to. Below-appetite → save drafted report to `## Queued Upstream Report` + queue `outstanding_questions` entry naming the channel the user must follow on return (the no-infra-for-email constraint still holds — the channel-action remains user-side, but the queue surface replaces the loop-stopping halt). Above-appetite → risk-reduce + re-score (open-ended LLM judgement per leaf (a)) then queue per the same shape. Orchestrator continues. The pre-2026-06-04 "AFK orchestrators must never auto-report a security-classified ticket" rule is **superseded** by the external-comms-gated per-classification branching. | ADR-024 2026-06-04 amendment (P270); ADR-024 2026-06-04 second-amendment (leaf b); ADR-024 Consequences lines 116, 123 (superseded) |
 | Above-appetite commit (Step 8) | Skip the commit, report uncommitted state. | ADR-013 Rule 6 |
 ## References

package/skills/report-upstream/test/report-upstream-contract.bats CHANGED Viewed

@@ -82,6 +82,56 @@ setup() {
   [ "$status" -eq 0 ]
 }
+# ─── ADR-024 2026-06-04 (P270) amendment: external-comms-gated AFK auto-fire ──
+#
+# The 2026-06-04 amendment reverses the pre-amendment blanket-defer in AFK
+# (line 137 "AFK orchestrators should never auto-report a security-classified
+# ticket") to a per-classification external-comms gated branching. Below-
+# appetite proceeds; above-appetite risk-reduces then queues per P352.
+@test "report-upstream: SKILL.md AFK behaviour summary cites external-comms gate as authority (ADR-024 2026-06-04 P270 amendment)" {
+  # The external-comms gate must appear in the AFK summary table as the
+  # per-classification routing authority — at minimum on the public-issue,
+  # dedup, security-with-channel, and security-without-channel rows.
+  run grep -iE 'wr-risk-scorer:external-comms' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+  # At least 4 occurrences in SKILL.md (one per AFK row in the summary plus
+  # at least one prose occurrence in Step 6 prose).
+  hits=$(grep -ic 'wr-risk-scorer:external-comms' "$SKILL_MD")
+  [ "$hits" -ge 4 ]
+}
+@test "report-upstream: SKILL.md AFK summary cites P270 amendment as authority (ADR-024 2026-06-04)" {
+  run grep -iE '2026-06-04.*P270|P270.*2026-06-04' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "report-upstream: SKILL.md AFK above-appetite branch queues per P352 queue-and-continue (P270 amendment)" {
+  # The above-appetite branch on ANY AFK row must reference queue-and-continue
+  # (P352) rather than halt-the-orchestrator semantics for the pre-amendment
+  # blanket-defer rule.
+  run grep -iE 'P352|queue-and-continue' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "report-upstream: SKILL.md marks the pre-amendment never-auto-public ban as superseded (P270 amendment)" {
+  # The legacy "Never auto-open a public issue for a security-classified
+  # ticket" prose is retained for traceability but must be marked as
+  # superseded by the 2026-06-04 amendment so readers do not act on the
+  # superseded rule.
+  run grep -iE 'superseded.*2026-06-04|2026-06-04.*supersed' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "report-upstream: SKILL.md drops the 'interim static heuristic' deferral on the dedup branch (P070 lift-condition met per P270 amendment)" {
+  # The 2026-04-25 (P070) amendment named the dedup-AFK as an "interim static
+  # heuristic in force until wr-risk-scorer:external-comms ships". The agent
+  # has shipped; the 2026-06-04 (P270) amendment lifts the deferral. The
+  # AFK summary's dedup-path row must cite the LIFT, not the deferral.
+  run grep -iE 'interim static heuristic.*lifted|deferral.*lifted|LIFTED' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
 # ─── ADR-033 problem-first classifier contract (P067) ──────────────────────────
 #
 # ADR-033 partially supersedes ADR-024 Decision Outcome Steps 3 + 5 with a

package/skills/review-problems/SKILL.md CHANGED Viewed

@@ -125,7 +125,39 @@ Per ADR-062 (peer of ADR-024). Polls configured upstream channels, runs each unm
 #### 4.5a. Read channel config + parse invocation flags
-Read `docs/problems/.upstream-channels.json`. If missing or malformed: log an advisory note (`channel config absent or malformed; inbound-discovery skipped this pass`) and skip Step 4.5 entirely. Adopters who don't ship this file inherit zero ceremony tax — the downstream-adopter non-obligation per ADR-062 § Downstream-adopter contract + JTBD-101.
+Read `docs/problems/.upstream-channels.json`. Branch on state:
+- **File exists and parses cleanly** → continue to 4.5b with the parsed `channels[]` list.
+- **File exists but is malformed JSON** → log an advisory note (`channel config malformed JSON; inbound-discovery skipped this pass — fix the file then re-invoke`) and skip Step 4.5. This is genuine fail-soft because the user already SHIPPED a config and the malformation is an editing artefact best resolved by the user reading the parse error; auto-rewriting their file would destroy their work.
+- **File does NOT exist** → run the **auto-bootstrap routine** below per P351 / JTBD-101 / JTBD-007. **Adopters who never want to configure inbound-discovery** can keep `.upstream-channels.json` absent by answering `decline` at the interactive prompt OR allowing the AFK-mode outstanding-question to lapse; the absence stays zero-ceremony-tax (ADR-062 § Downstream-adopter non-obligation), but the silent skip is replaced by an explicit one-time-per-session prompt so the adopter has a visible signal that the inbound-discovery capability exists. <!-- @jtbd JTBD-101 (Extend the Suite — deliver-installed-features signal) --> <!-- @jtbd JTBD-007 (Keep Plugins Current — process reports what configured) --> <!-- @problem P351 (auto-bootstrap on missing precondition config) -->
+**Auto-bootstrap routine (P351)**: replaces the prior "missing file → silent skip" behaviour. The routine branches on AskUserQuestion availability per ADR-013 Rule 6 + ADR-044 category 1 (direction-setting):
+- **Interactive mode** (AskUserQuestion available):
+  1. Fire **one** `AskUserQuestion` per skill invocation (NOT per pass — adopters who decline at run 1 are not re-prompted within the same session) with options: `Bootstrap now (recommended)` / `Decline (skip inbound-discovery this session)` / `Decline permanently (write empty channels stub)`.
+  2. On `Bootstrap now`: fire a second `AskUserQuestion` for channel-type (single-select: `github-issues` / `github-discussions` / `github-security-advisories`); then a third for the per-channel coordinates (repo `<owner>/<name>` for all three; `label` for github-issues; `category` for github-discussions). For the per-coordinate prompt fire ONE multi-part `AskUserQuestion` (multiple Question objects in a single call per ADR-013 Rule 1 batched ≤4) — do NOT serialise to N round-trips.
+  3. **Preview before write** (JTBD persona-fit constraint from review): emit the planned JSON contents to the agent's user-visible output so the adopter can read it before the write fires. Default `ttl_seconds: 86400` (24h — matches ADR-062's documented TTL). Channel schema mirrors the polled-channels list at 4.5c (`type` + `repo` + per-type identifier).
+  4. Write `docs/problems/.upstream-channels.json` with the bootstrapped channel + the defaulted TTL.
+  5. **Resume the original pass** at 4.5b with the freshly-written config.
+  - On `Decline (skip inbound-discovery this session)`: log advisory (`inbound-discovery bootstrap declined this session; will re-offer next invocation`) and skip Step 4.5.
+  - On `Decline permanently (write empty channels stub)`: write `{"channels": [], "ttl_seconds": 86400, "declined_at": "<ISO>"}` so future invocations parse cleanly + skip silently. Per ADR-062 § Downstream-adopter non-obligation — the empty-channels stub IS the documented "I never want this" surface.
+- **AFK mode** (AskUserQuestion unavailable, e.g. invoked from `/wr-itil:work-problems`):
+  1. Log advisory (`inbound-discovery: channel config absent; queued config-direction outstanding_question, skipping THIS pass to allow other passes to proceed`).
+  2. Queue a `direction` entry per `/wr-itil:work-problems` SKILL.md Step 5 `outstanding_questions` schema (ADR-044 category 1):
+     ```
+     {
+       category: "direction",
+       question: "Configure inbound-discovery channels in docs/problems/.upstream-channels.json? (channel-type: github-issues | github-discussions | github-security-advisories; per-channel: repo + label/category; ttl_seconds default 86400)",
+       context: "/wr-itil:review-problems Step 4.5a: precondition config missing — auto-bootstrap blocked by AFK AskUserQuestion unavailability per ADR-013 Rule 6; deferring to loop-end Step 2.5 batched AskUserQuestion",
+       ticket_id: "<the iter's ticket>"
+     }
+     ```
+  3. **Continue Step 4.5 for THIS pass** with the missing-channels skip (other passes in the review proceed normally per the Fail-soft contract at 4.5 head). Do NOT halt the iter — the loop-end Step 2.5 batched `AskUserQuestion` is the documented surfacing point per the AFK contract.
+The routine preserves the ADR-062 § Downstream-adopter non-obligation (adopters CAN decline) while honouring P351 (adopters can no longer be silently under-delivered). Phase 1 lint at `wr-itil-check-fail-soft-skip-discipline` flags this site's adjacent siblings for follow-on remediation.
 Parse `$ARGUMENTS` as a whitespace-separated token list. Recognised invocation flags for inbound-discovery:

package/skills/work-problems/SKILL.md CHANGED Viewed

@@ -335,7 +335,7 @@ Read the problem file and apply these deterministic rules:
 | Problem previously attempted twice without progress in this session | **Skip** — mark as stuck, needs interactive attention | user-answerable (direction) |
 | Open problem with outstanding user-answerable design question (naming, direction, pacing, scope) | **Skip** — surface the question at stop (Step 2.5) | user-answerable (design) |
 | Open problem needing architect design judgment (new-ADR-level question) | **Skip** — note the architect-design blocker; Step 2.5 may elevate via a pre-triggered architect call in `--deep-stop` mode | architect-design |
-| Open problem blocked on upstream dependency or Claude Code capability gap | **Skip** — but first append the pending-upstream-report marker to the ticket's `## Related` section (see P063 — run the manage-problem SKILL.md external-root-cause detection AFK fallback before skipping). The marker wording is fixed: `- **Upstream report pending** — external dependency identified; invoke /wr-itil:report-upstream when ready`. Use the already-noted check to avoid duplicates. | upstream-blocked |
+| Open problem blocked on upstream dependency or Claude Code capability gap | **Auto-invoke `/wr-itil:report-upstream` via the AFK fallback** (per ADR-024 2026-06-04 (P270) amendment — manage-problem Step 6 external-root-cause detection AFK fallback owns the actual invocation; this row routes through it). The report-upstream skill composes the draft then scores the prose via `wr-risk-scorer:external-comms` (ADR-028); below-appetite → sends; above-appetite → risk-reduces (open-ended LLM judgement per ADR-024 2026-06-04 second-amendment leaf (a)) then re-scores → sends-or-queues. Security routing per leaf (b): upstream-with-`SECURITY.md` + below-appetite → files via declared channel; upstream-without-`SECURITY.md` → external-comms-gated impact assessment to (i) our repo, (ii) our reputation, (iii) reported party. Queued reports save to `## Queued Upstream Report` (renamed from `## Drafted Upstream Report` per leaf (c)). Queue does NOT halt — outstanding_question surfaces at Step 2.4 / Step 2.5b end-of-loop per P352. Iter still classifies the ticket as `upstream-blocked` (the local ticket itself is still blocked on the upstream fix) and **skips work on it** after the report-upstream invocation completes — the report-upstream call is the action this row takes; classification stays `upstream-blocked` so Step 4 routes to skip-rather-than-work. Tickets already carrying `- **Upstream report pending** —` from prior sessions are detected via the already-noted check and routed to the report-upstream invocation (the marker shape is retained as the detection substrate per the 2026-06-04 amendment). | upstream-blocked |
 The default is to work the problem. Only skip when the rule explicitly says so. This is an AFK loop — forward progress matters more than avoiding dead ends, because dead ends are cheap (findings are saved) and interactive input is expensive (user is absent).
@@ -343,7 +343,7 @@ The default is to work the problem. Only skip when the rule explicitly says so.
 - **user-answerable** — the user can answer directly (verification, naming, direction, pacing, scope). Step 2.5 surfaces these as questions (interactive) or in the Outstanding Design Questions table (non-interactive / AFK).
 - **architect-design** — requires architect judgment first; may escalate to a new ADR. Step 2.5 can optionally pre-trigger the architect agent in `--deep-stop` mode to produce a concrete user-answerable question. Otherwise noted as "pending architect review".
-- **upstream-blocked** — external dependency, Claude Code capability gap, or waiting on third-party fix. Truly terminal for this loop — no user question would change anything. Report the blocker and move on. **Before skipping, run the manage-problem external-root-cause detection AFK fallback** (per P063): grep the ticket for the stable marker `- **Upstream report pending** —` or `- **Reported Upstream:**` / a `## Reported Upstream` section; if none is present, append `- **Upstream report pending** — external dependency identified; invoke /wr-itil:report-upstream when ready` to the ticket's `## Related` section. This preserves the outbound audit trail across AFK iterations so the user can see the deferred action on return.
+- **upstream-blocked** — external dependency, Claude Code capability gap, or waiting on third-party fix. Truly terminal for this loop — no user question would change anything. Report the blocker (now via auto-invoke of `/wr-itil:report-upstream`, per ADR-024 2026-06-04 (P270) amendment) and move on. **Before skipping, run the manage-problem external-root-cause detection AFK fallback** (per P063 amended 2026-06-04): the fallback now invokes `/wr-itil:report-upstream` rather than only appending the marker. The report-upstream skill scores the drafted prose via `wr-risk-scorer:external-comms` (ADR-028); below-appetite branches send (public-issue Step 5 / comment Step 5c / security Step 6 per classification); above-appetite branches risk-reduce + re-score; if-still-above queue an `outstanding_questions` entry per P352 queue-and-continue (orchestrator does NOT halt). Existing tickets carrying `- **Upstream report pending** —` or `- **Reported Upstream:**` / a `## Reported Upstream` section are detected via the already-noted check; the marker shape is retained for backward compatibility and as the detection substrate. The outbound audit trail across AFK iterations now reflects ACTUAL filings (or queued-for-review drafts), not just deferred intents.
 Record the category alongside the skip reason in the iteration report so Step 2.5 can read the categories deterministically.
@@ -666,14 +666,24 @@ After the iteration's commit lands but before starting the next iteration, check
 **Cohort-graduation pre-check (per ADR-061 Rule 5; P246):** when the within-appetite-with-releasable-material branch fires AND `docs/changesets-holding/` is non-empty, invoke the graduation evaluator BEFORE the Drain action. The evaluator is the deterministic Rule 1a join + Rule 2 VP carve-out + Rule 3b cohort-grouping pass shipped in `@windyroad/risk-scorer` Phase 2a/2b. **The graduation criterion is evidence-of-working-as-desired (Rule 4 per-class evidence floor), not elapsed wall-clock time** — per user direction 2026-05-17: *"Dogfooding makes sense, but it shouldn't be time based, it should be until we are happy that it's working as desired."* + *"Why are we waiting? That seems to go against the principles if you ask me."* Calendar predicates are NEVER a primary graduation trigger; the evaluator's `status=resolved` IS the graduation signal.
 1. Run the shim: `wr-risk-scorer-evaluate-graduation` (resolves to `packages/risk-scorer/scripts/evaluate-graduation.sh` per ADR-049 naming grammar). The script enumerates `docs/changesets-holding/*.md` (excluding README), applies ADR-061 Rule 1a join + Rule 2 VP carve-out + Rule 3b cohort grouping, and emits one `GRADUATION_CANDIDATE:` line per held entry plus a final `GRADUATION_SUMMARY:` line.
+   **Evaluator scope (load-bearing per P308)**: the evaluator script implements ONLY the deterministic Rule 1a + Rule 2 + Rule 3b passes. It does NOT compute release-risk and does NOT apply Rule 4 evidence-floor judgement — those are LLM-judgement surfaces owned by the orchestrator + the `wr-risk-scorer:pipeline` agent. Therefore evaluator `status=resolved` means *"the ticket-join succeeded and the entry is not VP-blocked"* only — it is **necessary but not sufficient** for graduation. The Rule 4 evidence-floor judgement (per-class evidence per ADR-061 Rule 4) MUST run as a separate orchestrator-side step before any `git mv` (P308 amendment).
 2. Parse each `GRADUATION_CANDIDATE: changeset=<basename> | ticket=<P-id> | priority=<N> | class=<3a|3b> | [cohort=<id> |] status=<resolved|vp-blocked|halt-no-resolution>` line. Branch on `status`:
-   - **`status=resolved`** — graduate. Per ADR-061 Rule 5 + ADR-013 Rule 5, this is policy-authorised silent proceed (no `AskUserQuestion`). Perform `git mv docs/changesets-holding/<basename> .changeset/<basename>`. Append the entry to `docs/changesets-holding/README.md` "Recently reinstated" with the citation: `<basename> — graduation criterion met (status=resolved per Rule 1a join to <P-id>, Priority <N>); class <3a|3b>; evidence cited`. For class=3b cohorts, ALL members of the cohort with `status=resolved` graduate together atomically (Rule 3b cohort propagation — entire cohort ships or none does). Amend the iter's main commit per ADR-042 Rule 3 amend-based folding to preserve the ADR-032 one-commit-per-iteration invariant.
+   - **`status=resolved`** — route to **Rule 4 evidence-floor judgement** (P308 amendment; see step 2a below). The evaluator's `status=resolved` is necessary-but-not-sufficient; Rule 4 judgement is the LLM-owned surface that ratifies the *evidence floor* per ADR-061 Rule 4 + ADR-044 framework-resolution boundary. Do NOT auto-graduate at this point. Per ADR-061 Rule 5: the graduation criterion authorises the *intent*; Rule 4 judgement is the precondition that admits *evaluation* of that criterion.
    - **`status=vp-blocked`** — skip. Per ADR-061 Rule 2 (Verification Pending carve-out; symmetric to ADR-042 Rule 2b). Do NOT graduate; held entry stays. The `.verifying.md` → `.closed.md` transition auto-clears the carve-out at a later Step 6.5 graduation pass.
    - **`status=halt-no-resolution`** — halt. Per ADR-061 Rule 1a terminal: when neither filename-convention join nor body-grep fallback resolves a ticket, OR the resolved ticket file is missing/unreadable, the orchestrator MUST NOT auto-graduate. Route to the **Step 6.5 cohort-graduation halt-no-resolution** halt point (framework-prescribed halt — see Mid-loop ask discipline subsection); halt-with-batched-questions per the Step 2.5b cross-reference.
+2a. **Rule 4 evidence-floor judgement (P308 — LLM-owned, NOT framework-resolved)**. For each `status=resolved` candidate, the orchestrator MUST evaluate whether the class-specific evidence floor has been met before graduating. The evidence floor (ADR-061 Rule 4) is per-class: PreToolUse:Bash gates need ≥1 gate-fire trace; UserPromptSubmit detectors need ≥1 detector firing; commit-hook-with-auto-fix needs ≥1 correctness-verified auto-fix commit; SessionStart additionalContext needs ≥1 session-trail entry. The orchestrator reads the held entry's `Reinstate criterion (evidence-based, ...)` line from `docs/changesets-holding/README.md` Currently held entry + cross-references the on-disk evidence artefact named in the criterion. Per ADR-044, Rule 4 judgement is LLM-owned (not framework-resolved). Route per AskUserQuestion availability:
+    - **Interactive (`AskUserQuestion` available)**: fire `AskUserQuestion` **per held entry** (NOT batched across cohort — each entry's evidence is independent, except for class=3b cohorts which graduate atomically per Rule 3b — when any cohort member's `AskUserQuestion` returns Graduate, ALL same-cohort members graduate together; when any returns Defer/Reject, the entire cohort stays held). Question text MUST inline (P350 brief-before-ID discipline): the held entry's basename + ticket + Priority, the per-entry evidence summary (from the README's Currently held entry — Rule 4 class-specific shape + cited artefact), and the candidate verdict. Options: **Graduate (evidence cited and met)** / **Defer (evidence not yet met — preserve hold)** / **Reject (manual intervention — surface back to user)**. Per ADR-013 Rule 1 cap (≤4 per call, sequential if >4). On `Graduate`, fall through to step 2b graduate-action. On `Defer`/`Reject`, the held entry stays in `docs/changesets-holding/`; emit one Auto-apply-trail line per ADR-061 Rule 6 citing the user's verdict + reason; proceed to next candidate. This per-entry AskUserQuestion is a framework-prescribed user-interaction surface — see Mid-loop ask discipline subsection.
+    - **AFK (`AskUserQuestion` forbidden — iter subprocess / non-interactive context per P352 / ADR-013 Rule 6)**: queue one `outstanding_question` entry (category: `direction`) per held entry to `.afk-run-state/outstanding-questions.jsonl`. The entry's `question` field inlines the same evidence summary the interactive `AskUserQuestion` would surface (P350 brief-before-ID — do NOT use opaque IDs alone). Do NOT graduate. Continue Drain action: any pre-existing `.changeset/` entries still drain per the within-appetite contract (those entries were not graduations and are unaffected). Per P352 user-ratified universal AFK default: queue-and-continue (NOT halt, NOT silent skip, NOT auto-default). Loop-end Step 2.5 surfaces the queued questions as batched `AskUserQuestion` per the existing accumulated-question discipline.
+2b. **Graduate-action (Rule 4-ratified path)**. When Rule 4 evidence-floor judgement returns `Graduate` (interactive `AskUserQuestion` Graduate verdict) for a held entry: perform `git mv docs/changesets-holding/<basename> .changeset/<basename>`. Append the entry to `docs/changesets-holding/README.md` "Recently reinstated" with the citation: `<basename> — graduation criterion met (status=resolved per Rule 1a join to <P-id>, Priority <N>; Rule 4 evidence-floor ratified per user verdict <verbatim>); class <3a|3b>; evidence cited`. For class=3b cohorts: when any cohort member's Rule 4 judgement returns `Graduate`, ALL same-cohort members with `status=resolved` graduate together atomically (Rule 3b cohort propagation — entire cohort ships or none does); when any returns Defer/Reject, the entire cohort stays held. Amend the iter's main commit per ADR-042 Rule 3 amend-based folding to preserve the ADR-032 one-commit-per-iteration invariant.
 3. After processing all candidates: if anything graduated, the just-moved entries are now in `.changeset/` and ride the existing Drain action (no separate re-entry needed — the Drain action's `release:watch` step picks them up when `.changeset/` is non-empty). Proceed to the Drain action below.
 4. **Governance gates apply (ADR-061 Rule 7)**: every graduation reinstate goes through the standard ADR-014 commit flow — architect / JTBD / risk-scorer gates ride the amend commit; gate rejection routes to ADR-042 Rule 5 halt with the rejection reason logged. The graduation criterion authorises the *intent*; the gates authorise the *action*.
-**Idempotency**: safe to invoke when holding-area is empty (script exits 1 with `GRADUATION_SUMMARY: total=0` — orchestrator skips graduation, proceeds to Drain action). Safe when no candidates resolve (all `vp-blocked`) — no `git mv` operations, no README mutation, no commit amendment.
+**Idempotency**: safe to invoke when holding-area is empty (script exits 1 with `GRADUATION_SUMMARY: total=0` — orchestrator skips graduation, proceeds to Drain action). Safe when no candidates resolve (all `vp-blocked`) — no `git mv` operations, no README mutation, no commit amendment. Safe when AFK + all `status=resolved` route to queue (no graduation performed; outstanding_questions accumulate for loop-end surfacing per P352 / ADR-013 Rule 6).
 **Audit trail (ADR-061 Rule 6)**: every `reinstate-from-holding` graduation appends one Auto-apply-trail line to the iter report AND one "Recently reinstated" line to `docs/changesets-holding/README.md` with the resolved problem-ticket ID, Priority value, graduation class (3a or 3b), and the evidence citation. The audit trail is the load-bearing artefact for ADR-026 cite + persist + uncertainty grounding.
@@ -818,7 +828,7 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
 | Commit when risk above appetite | Skip commit, report uncommitted state |
 | Pipeline risk within appetite (≤ 4/25) with releasable material (any unpushed commits OR any `.changeset/` entries OR any graduation-eligible held entries per ADR-061 Rule 1) | Drain release queue (`push:watch` then, if releasable changesets exist, `release:watch`) before next iteration — per ADR-018 (Step 6.5) as amended by P250. Trigger is *presence of releasable material*, not residual band reaching appetite. User direction 2026-05-17: "If it's low risk, you should release." |
 | Pipeline risk within appetite (≤ 4/25) AND empty queue (no unpushed commits AND no `.changeset/` AND no graduation-eligible held entries) | No drain — literally nothing to release. Proceed directly to Step 6.75. The genuine no-op fast-path per P250. |
-| Cohort-graduation pre-check fires before Drain action (within-appetite branch, `docs/changesets-holding/` non-empty) — evaluator returns `status=resolved` | Graduate. Per ADR-061 Rule 5 + ADR-013 Rule 5 policy-authorised silent proceed (no `AskUserQuestion`): `git mv docs/changesets-holding/<basename> .changeset/<basename>`, append README "Recently reinstated" entry, amend the iter's main commit per ADR-042 Rule 3. For class=3b cohorts, all cohort members with `status=resolved` graduate atomically (Rule 3b cohort propagation). Per ADR-061 Rule 5 + Rule 6 + Rule 7 + P246 (Step 6.5 Cohort-graduation pre-check). Graduation criterion is evidence-of-working-as-desired (Rule 4 evidence floor), not elapsed wall-clock time — user direction 2026-05-17: "Dogfooding makes sense, but it shouldn't be time based, it should be until we are happy that it's working as desired." |
+| Cohort-graduation pre-check fires before Drain action (within-appetite branch, `docs/changesets-holding/` non-empty) — evaluator returns `status=resolved` | Route to Rule 4 evidence-floor judgement (LLM-owned per ADR-061 Rule 4 + ADR-044 framework-resolution boundary). Evaluator's `status=resolved` is necessary-but-not-sufficient (P308 — evaluator script disclaims Rule 4 at lines 19-22). Interactive: per-held-entry `AskUserQuestion` with inline evidence summary (P350 brief-before-ID) + 3 options (Graduate / Defer / Reject). AFK: queue per-held-entry `outstanding_question` to `.afk-run-state/outstanding-questions.jsonl` (P352 / ADR-013 Rule 6 queue-and-continue universal default) — do NOT graduate, continue Drain for any pre-existing `.changeset/` entries. On Graduate verdict: `git mv docs/changesets-holding/<basename> .changeset/<basename>`, append README "Recently reinstated" entry citing the user's Rule 4 verdict, amend the iter's main commit per ADR-042 Rule 3. For class=3b cohorts, all cohort members graduate atomically on any-member Graduate verdict (Rule 3b cohort propagation); any Defer/Reject keeps entire cohort held. Per ADR-061 Rule 4 + Rule 5 + Rule 6 + Rule 7 + ADR-013 Rule 6 + P246 + P308 + P350 + P352 (Step 6.5 Cohort-graduation pre-check; step 2a Rule 4 evidence-floor judgement). Graduation criterion is evidence-of-working-as-desired (Rule 4 evidence floor), not elapsed wall-clock time — user direction 2026-05-17: "Dogfooding makes sense, but it shouldn't be time based, it should be until we are happy that it's working as desired." |
 | Cohort-graduation pre-check — evaluator returns `status=vp-blocked` | Skip. Per ADR-061 Rule 2 Verification Pending carve-out (symmetric to ADR-042 Rule 2b). Do NOT graduate; held entry stays. `.verifying.md` → `.closed.md` transition auto-clears the carve-out at a later pass. Per ADR-061 Rule 2 + P246. |
 | Cohort-graduation pre-check — evaluator returns `status=halt-no-resolution` | Halt at the framework-prescribed "Step 6.5 cohort-graduation halt-no-resolution" halt point. Per ADR-061 Rule 1a terminal: ambiguous join is a user-decision surface, not an agent-decision surface. Halt-with-batched-questions per the Step 2.5b cross-reference. Per ADR-061 Rule 1a + P246. |
 | Post-release plugin cache refresh between iters (P233) | After a successful within-appetite Drain action shipped a release to npm, chain `/install-updates` to refresh the plugin cache before the next iter dispatches. Conditional on actual release (skipped when `push:watch` ran alone with no changeset); non-blocking on `/install-updates` failure (degrades to cache-stays-stale, equivalent to pre-amendment behaviour). Mid-loop ask discipline preserved by treating any `/install-updates` AskUserQuestion surface AS the Non-interactive fallback dry-run path. Per ADR-013 Rule 5 + ADR-044 + P130 + P106 + P233 (Step 6.5 Post-release cache refresh subsection). |
@@ -833,7 +843,7 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
 | Unexpected dirty state between iterations | Halt the loop. Report the `git status --porcelain` output, the last iteration's reported outcome, and the divergence — per P036 (Step 6.75). Run Step 2.5b before emitting the halt summary if ≥1 accumulated user-answerable skip from prior iters (P126). Do NOT attempt non-interactive recovery of the dirty state itself. |
 | Iter committed cleanly + claim contradicts on-disk ADR Confirmation state (P335) | Halt the loop with `outcome: halted-iter-over-claim`. Include the `wr-itil-verify-iter-summary` stdout (the `OVER-CLAIM: ADR-NNN has N unchecked Confirmation item(s)...` lines) as the divergence detail. Run Step 2.5b before emitting the halt summary if ≥1 accumulated user-answerable skip from prior iters. Do NOT auto-correct the iter's claim — the orchestrator cannot retroactively make a false claim true; the user adjudicates on return (re-dispatch / accept partial / amend). Per ADR-013 Rule 6 + ADR-032 subprocess-boundary trust contract + P335 (Step 6.75 verify-iter-claims sub-step). |
 | External root cause detected at Open → Known Error, or at park with `upstream-blocked` reason | Append the stable `- **Upstream report pending** — external dependency identified; invoke /wr-itil:report-upstream when ready` marker to the ticket's `## Related` section; do NOT auto-invoke `/wr-itil:report-upstream` (Step 6 security-path branch is interactive — per ADR-024 Consequences). Use the already-noted grep check to avoid duplicate lines. Per P063 + ADR-013 Rule 6. |
-| Mid-loop ask between iters in the orchestrator's main turn | Forbidden except at framework-prescribed halt points (Step 0 session-continuity / fetch-failure halt; Step 2.5 / 2.5b loop-end emit; Step 6.5 above-appetite Rule 5 halt; Step 6.5 CI-failure / release:watch halt; Step 6.5 cohort-graduation halt-no-resolution halt; Step 6.75 dirty-for-unknown-reason halt). The loop's purpose is **progress + accumulation**; mechanical-stage transitions between iters are framework-resolved and MUST NOT prompt the user. Per ADR-044 framework-resolution boundary + ADR-013 Rule 1 (as amended by ADR-044) + P130. |
+| Mid-loop ask between iters in the orchestrator's main turn | Forbidden except at framework-prescribed user-interaction points (Step 0 session-continuity / fetch-failure halt; Step 2.5 / 2.5b loop-end emit; Step 6.5 above-appetite Rule 5 halt; Step 6.5 CI-failure / release:watch halt; Step 6.5 cohort-graduation halt-no-resolution halt; Step 6.5 cohort-graduation per-entry Rule 4 evidence-floor judgement (P308 — interactive only; AFK queues per P352); Step 6.75 dirty-for-unknown-reason halt). The loop's purpose is **progress + accumulation**; mechanical-stage transitions between iters are framework-resolved and MUST NOT prompt the user. Per ADR-044 framework-resolution boundary + ADR-013 Rule 1 (as amended by ADR-044) + P130. |
 ### Mid-loop ask discipline (orchestrator main turn) — P130
@@ -847,6 +857,7 @@ The orchestrator MUST NOT call `AskUserQuestion` between iterations except at th
 - **Step 6.5 above-appetite Rule 5 halt** — auto-apply loop exhausted without convergence; halt-with-batched-questions per the Step 2.5b cross-reference (Step 2.5b surfaces *prior-iter accumulated user-answerable skips only* — the halt-causing scorer-gap remains a halt-with-bug-signal per ADR-042 Rule 5).
 - **Step 6.5 CI-failure / `release:watch` failure halt** — push:watch or release:watch failed AND the failure is genuinely-unrecoverable (outside the fixable-in-iter allow-list, or 3-retry cap reached); halt-with-batched-questions per the Step 2.5b cross-reference. Failures inside the closed allow-list route to fix-and-continue per Step 6.5 Failure handling (P140), not this halt point.
 - **Step 6.5 cohort-graduation halt-no-resolution halt (P246)** — graduation evaluator returned `status=halt-no-resolution` for one or more held candidates (Rule 1a terminal: neither filename-convention join nor body-grep fallback resolved a problem ticket, OR the resolved ticket file is missing/unreadable). The orchestrator MUST NOT auto-graduate under ambiguity per ADR-061 Rule 1a; halt-with-batched-questions per the Step 2.5b cross-reference. The halt-causing ambiguity itself remains a halt-with-bug-signal (the held entry stays in `docs/changesets-holding/`; manual reinstate or ticket-file correction required); Step 2.5b surfaces *prior-iter accumulated user-answerable skips only* and does NOT ask the user to resolve the ambiguity itself.
+- **Step 6.5 cohort-graduation per-entry Rule 4 evidence-floor judgement (P308) — interactive only** — graduation evaluator returned `status=resolved` for ≥1 held candidate AND `AskUserQuestion` is available. Per ADR-061 Rule 4 + ADR-044 framework-resolution boundary, Rule 4 evidence-floor judgement is LLM-owned (not framework-resolved); the user ratifies per held entry with Graduate / Defer / Reject before any `git mv`. This is NOT a halt — the orchestrator continues the loop after the user verdict (graduate path performs git mv + README append + ADR-042 Rule 3 amend; defer/reject paths preserve the hold). When `AskUserQuestion` is unavailable (AFK path), the orchestrator queues `outstanding_question` entries per held candidate per P352 / ADR-013 Rule 6 queue-and-continue universal default — does NOT halt, does NOT silently proceed, does NOT auto-default. The held entries' user ratifications then surface at Step 2.5 loop-end via the existing accumulated-questions discipline.
 - **Step 6.75 dirty-for-unknown-reason halt** — `git status --porcelain` divergence; halt-with-batched-questions per the Step 2.5b cross-reference.
 - **Step 6.75 iter-over-claim halt (P335)** — `wr-itil-verify-iter-summary` detected the iter's commit message or `ITERATION_SUMMARY.notes` contains completion-claim language for an ADR whose `## Confirmation` section still has unchecked `- [ ]` items; halt-with-batched-questions per the Step 2.5b cross-reference. The over-claim itself remains a halt-with-bug-signal — Step 2.5b surfaces *prior-iter accumulated user-answerable skips only*; it does NOT ask the user how to remediate the false claim (re-dispatch / accept partial / amend the commit remains a user decision on return).

package/skills/work-problems/test/work-problems-step-6-5-cohort-graduation.bats CHANGED Viewed

@@ -88,19 +88,22 @@ setup() {
 # ── Three-status branching contract ───────────────────────────────────────
-@test "work-problems P246: pre-check branches on status=resolved → graduate" {
-  # The load-bearing positive contract: status=resolved means graduate
-  # via git mv + README append + ADR-042 Rule 3 amend.
-  run grep -nE '`status=resolved`.*graduate' "$SKILL_MD"
+@test "work-problems P246/P308: status=resolved routes to Rule 4 evidence-floor judgement (NOT auto-graduate)" {
+  # P308 amendment: status=resolved is necessary-but-not-sufficient.
+  # The evaluator script disclaims Rule 4 (LLM-owned). SKILL prose must
+  # surface Rule 4 evidence-floor judgement before any git mv.
+  run grep -nE '`status=resolved`.*route to.*Rule 4 evidence-floor judgement' "$SKILL_MD"
   [ "$status" -eq 0 ]
 }
-@test "work-problems P246: status=resolved branch performs git mv from holding to .changeset/" {
+@test "work-problems P308: Rule 4-ratified graduate path performs git mv from holding to .changeset/" {
+  # The git mv is downstream of the Rule 4 evidence-floor ratification,
+  # not directly on the evaluator's status=resolved.
   run grep -nE 'git mv docs/changesets-holding/<basename> \.changeset/<basename>' "$SKILL_MD"
   [ "$status" -eq 0 ]
 }
-@test "work-problems P246: status=resolved branch amends the iter commit per ADR-042 Rule 3" {
+@test "work-problems P308: Rule 4-ratified graduate path amends the iter commit per ADR-042 Rule 3" {
   run grep -nE 'Amend the iter.s main commit per ADR-042 Rule 3 amend-based folding' "$SKILL_MD"
   [ "$status" -eq 0 ]
 }
@@ -194,23 +197,49 @@ setup() {
   [ "$status" -eq 0 ]
 }
-# ── Policy authorisation (ADR-013 Rule 5 + ADR-061 Rule 5) ────────────────
+# ── Policy authorisation (ADR-013 Rules 1/5/6 + ADR-061 Rules 4/5 + P352) ─
-@test "work-problems P246: resolved-branch is policy-authorised silent proceed (no AskUserQuestion)" {
-  run grep -nE 'policy-authorised silent proceed.*no.*AskUserQuestion' "$SKILL_MD"
+@test "work-problems P308: pre-check cites ADR-061 Rule 4 as the LLM-owned evidence-floor surface" {
+  # Rule 4 evidence-floor judgement is class-specific and LLM-owned per
+  # ADR-044 framework-resolution boundary; surfacing it as AskUserQuestion
+  # (interactive) or queue (AFK) is the load-bearing delegation surface.
+  run grep -nE 'ADR-061 Rule 4' "$SKILL_MD"
   [ "$status" -eq 0 ]
 }
-@test "work-problems P246: pre-check cites ADR-013 Rule 5" {
-  run grep -nE 'ADR-013 Rule 5' "$SKILL_MD"
+@test "work-problems P308: pre-check cites ADR-013 Rule 6 as the AFK queue-and-continue fail-safe" {
+  # Per P352 user-ratified universal default: when AskUserQuestion is
+  # unavailable (AFK iter), queue the question + continue — do NOT halt,
+  # do NOT silently fail-soft, do NOT auto-default.
+  run grep -nE 'ADR-013 Rule 6.*P352|P352.*ADR-013 Rule 6' "$SKILL_MD"
   [ "$status" -eq 0 ]
 }
-@test "work-problems P246: pre-check cites ADR-061 Rule 5" {
+@test "work-problems P308: pre-check cites ADR-061 Rule 5 (graduation criterion authorises intent, gates authorise action)" {
   run grep -nE 'ADR-061 Rule 5' "$SKILL_MD"
   [ "$status" -eq 0 ]
 }
+@test "work-problems P308: interactive branch fires AskUserQuestion per-held-entry (not silent proceed)" {
+  # The amendment lifts the prior silent-proceed clause — Rule 4 judgement
+  # is LLM-owned; the user must ratify before the git mv.
+  run grep -nE 'interactive.*AskUserQuestion.*per[- ]held[- ]entry|per[- ]held[- ]entry.*AskUserQuestion' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P308: AFK branch queues outstanding_question per held entry (queue-and-continue)" {
+  run grep -nE 'AFK.*queue.*outstanding_question.*per held entry|queue.*outstanding_question.*per held entry.*AFK|AFK.*queue.*outstanding_question|outstanding_question.*do NOT graduate' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P308: AskUserQuestion option labels inline evidence + Rule 4 criterion (P350 brief-before-ID)" {
+  # P350 empathy-gap correction: AskUserQuestion text must inline the
+  # per-held-entry evidence summary + Rule 4 class-specific criterion;
+  # opaque P-ID / ADR-ID references alone are non-compliant.
+  run grep -nE 'P350|brief.*before.*ID|inline.*evidence' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
 # ── Governance gates apply (ADR-061 Rule 7) ───────────────────────────────
 @test "work-problems P246: pre-check governance gates apply per ADR-061 Rule 7" {
@@ -288,3 +317,10 @@ setup() {
   run grep -nE 'P246' "$HOLDING_README"
   [ "$status" -eq 0 ]
 }
+# ── P308 self-identification (ticket-trace) ───────────────────────────────
+@test "work-problems P308: SKILL.md self-identifies the Rule 4 evidence-floor amendment as P308" {
+  run grep -nE 'P308' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}