npm - @windyroad/itil - Versions diffs - 0.23.0 → 0.23.1-preview.252 - Mend

@windyroad/itil 0.23.0 → 0.23.1-preview.252

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.claude-plugin/plugin.json +1 -1
package/hooks/manage-problem-enforce-create.sh +16 -1
package/hooks/test/manage-problem-enforce-create.bats +61 -0
package/package.json +1 -1
package/skills/manage-problem/SKILL.md +41 -0
package/skills/manage-problem/test/manage-problem-p119-recovery-path.bats +165 -0
package/skills/work-problems/SKILL.md +31 -3
package/skills/work-problems/test/work-problems-step-6-5-fix-and-continue.bats +254 -0

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
   "name": "wr-itil",
-  "version": "0.23.0",
+  "version": "0.23.1",
   "description": "ITIL-aligned IT service management for Claude Code"
 }

package/hooks/manage-problem-enforce-create.sh CHANGED Viewed

@@ -117,5 +117,20 @@ if check_create_gate "$SESSION_ID"; then
   exit 0
 fi
-create_gate_deny "BLOCKED: Cannot Write '${BASENAME}' under docs/problems/ without running /wr-itil:manage-problem Step 2 (duplicate-check) first. New problem tickets MUST be created via the skill so the duplicate-prevention grep fires before the file lands. Invoke the Skill tool with skill='wr-itil:manage-problem' and a description of the new problem; Step 2 will grep for related existing tickets and surface any matches via AskUserQuestion before creating the new ticket. (P119)"
+# P144 / ADR-048: gate-misfire recovery hint. When SOME marker exists (for
+# any SID) but the gate denies, the agent is likely hitting the P124 Phase 3
+# helper regression — `mark_step2_complete` succeeded but the marker landed
+# under the wrong UUID. Append a recovery pointer to the deny message so
+# the agent finds the documented two-tier procedure in SKILL.md Step 2
+# substep 7 instead of reaching for the brute-force-marker anti-pattern
+# (139-marker incident, 2026-04-28 P144 driver evidence).
+#
+# Routine first-creation deny (no marker for ANY SID in this session)
+# leaves the deny message unchanged — the helper-bug signal is conditional.
+RECOVERY_HINT=""
+if compgen -G '/tmp/manage-problem-grep-*' > /dev/null 2>&1; then
+  RECOVERY_HINT=" (Helper succeeded but SID mismatch detected — see manage-problem SKILL.md Step 2 substep 7.)"
+fi
+create_gate_deny "BLOCKED: Cannot Write '${BASENAME}' under docs/problems/ without running /wr-itil:manage-problem Step 2 (duplicate-check) first. New problem tickets MUST be created via the skill so the duplicate-prevention grep fires before the file lands. Invoke the Skill tool with skill='wr-itil:manage-problem' and a description of the new problem; Step 2 will grep for related existing tickets and surface any matches via AskUserQuestion before creating the new ticket. (P119)${RECOVERY_HINT}"
 exit 0

package/hooks/test/manage-problem-enforce-create.bats CHANGED Viewed

@@ -186,3 +186,64 @@ set_marker() {
   [ "$status" -eq 0 ]
   [[ "$output" != *"BLOCKED"* ]]
 }
+# --- P144 / ADR-048: gate-misfire recovery hint on deny message ---
+#
+# When the deny fires AND any /tmp/manage-problem-grep-* marker exists for
+# SOME SID, that's the helper-bug signal (P124 Phase 3 regression — helper
+# returned wrong SID, marker exists but doesn't match runtime hook stdin).
+# The deny message appends a recovery pointer to direct the agent at the
+# documented two-tier procedure in SKILL.md Step 2 substep 7.
+#
+# Routine first-creation deny (no marker exists for any SID at all) is
+# unchanged — recovery hint MUST NOT appear.
+setup_other_sid_marker() {
+  OTHER_SID="other-sid-$$-$RANDOM"
+  : > "/tmp/manage-problem-grep-${OTHER_SID}"
+}
+teardown_other_sid_marker() {
+  if [ -n "${OTHER_SID:-}" ]; then
+    rm -f "/tmp/manage-problem-grep-${OTHER_SID}"
+  fi
+}
+@test "deny without ANY /tmp/manage-problem-grep-* marker → deny message OMITS recovery hint" {
+  # Scrub any markers so the helper-bug signal cannot fire.
+  rm -f /tmp/manage-problem-grep-*
+  run run_write_hook "$PWD/docs/problems/999-foo.open.md" "$SID"
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"BLOCKED"* ]]
+  # No marker exists for any SID → routine first-creation deny → no recovery hint.
+  [[ "$output" != *"SID mismatch"* ]]
+  [[ "$output" != *"Step 2 substep 7"* ]]
+}
+@test "deny with /tmp/manage-problem-grep-* marker for OTHER SID → deny message INCLUDES recovery hint" {
+  # Scrub other markers first, then set a marker for a different SID.
+  rm -f /tmp/manage-problem-grep-*
+  setup_other_sid_marker
+  run run_write_hook "$PWD/docs/problems/999-foo.open.md" "$SID"
+  status=$?
+  teardown_other_sid_marker
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"BLOCKED"* ]]
+  # Marker exists for OTHER SID → helper-bug signal → recovery hint appended.
+  [[ "$output" == *"SID mismatch"* ]]
+  [[ "$output" == *"Step 2 substep 7"* ]]
+}
+@test "recovery hint avoids ADR-038 jargon (no internal P-number jargon in deny string)" {
+  # ADR-038 progressive disclosure — deny stays terse + actionable. Architect
+  # advisory rejected "P124-Phase-3-regression" wording in favour of plain
+  # "Helper succeeded but SID mismatch detected".
+  rm -f /tmp/manage-problem-grep-*
+  setup_other_sid_marker
+  run run_write_hook "$PWD/docs/problems/999-foo.open.md" "$SID"
+  status=$?
+  teardown_other_sid_marker
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"BLOCKED"* ]]
+  [[ "$output" != *"P124-Phase-3-regression"* ]]
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@windyroad/itil",
-  "version": "0.23.0",
+  "version": "0.23.1-preview.252",
   "description": "ITIL-aligned IT service management for Claude Code (problem, and future incident/change skills)",
   "bin": {
     "windyroad-itil": "./bin/install.mjs"

package/skills/manage-problem/SKILL.md CHANGED Viewed

@@ -269,6 +269,47 @@ Before creating, search existing problems for similar issues. The user may not k
    **Why a helper instead of inline `${CLAUDE_SESSION_ID:-default}`**: the agent's process does NOT export `CLAUDE_SESSION_ID` today; the hook side reads `session_id` from its stdin JSON payload (per the Claude Code PreToolUse contract). The prior fallback wrote the marker under `default` while the hook checked the real UUID — mismatch caused the Write deny on every first ticket of a session until the agent ad-hoc scraped a UUID-bearing marker. The helper canonicalises that scrape so every agent context discovers the SID the same way. P124.
+   <!-- supersedes-when: P142 ships -->
+   **Recovery if hook denial persists** (P144 / ADR-048 — auto-supersedes when P142 ships):
+   The P124 helper has a Phase 3 regression in orchestrator sessions that have dispatched subprocesses: it sometimes returns a subprocess SID instead of the orchestrator SID, while the runtime hook stdin still contains the orchestrator SID. The marker lands under the wrong UUID; the next `Write` is denied even though `mark_step2_complete` succeeded. The hook deny message includes a `(Helper succeeded but SID mismatch detected — see manage-problem SKILL.md Step 2 substep 7.)` pointer when this signal is observable.
+   **Gate-misfire signal** — recovery applies ONLY when ALL three conditions hold:
+   1. The agent is **already executing** `/wr-itil:manage-problem` Step 2 in this turn (i.e., the SKILL contract has just ordered the grep for THIS ticket creation — not a marker carried over from a prior unrelated invocation in the same session).
+   2. `mark_step2_complete` succeeded (the helper exited zero — no SID-discovery error).
+   3. The subsequent `Write` to the new `.<status>.md` file is denied by the P119 hook.
+   Routine creation flow does NOT match these conditions and MUST continue through the standard `Write` path. The recovery is mechanical (deterministic from the gate-misfire signal — no `AskUserQuestion` required, per ADR-044's framework-mediated surface catalog extension).
+   **First-tier recovery — announce-marker scrape**:
+   ```bash
+   # Discover the orchestrator session UUID via the most-reliable per-session announce marker.
+   # The orchestrator SID is what the runtime hook stdin contains in the common subprocess case.
+   sid=$(ls -t /tmp/itil-assistant-gate-announced-* 2>/dev/null | head -1 | sed 's|.*itil-assistant-gate-announced-||')
+   [ -n "$sid" ] && touch "/tmp/manage-problem-grep-${sid}"
+   # Retry the Write.
+   ```
+   **Second-tier recovery — python3-via-Bash file-write** (2026-04-29 evidence: runtime hook stdin SID may not be in any announce-marker class; first-tier returns the orchestrator SID, but the runtime SID is a different per-Write SID surfaced only by `architect-reviewed-*` mtime, not by any announce-marker):
+   ```bash
+   # The hook is PreToolUse:Write; python3-in-Bash is not a Write tool call,
+   # so the hook never fires. Use only when first-tier fails.
+   python3 -c "from pathlib import Path; Path('docs/problems/<NNN>-<title>.open.md').write_text('''<file body>''')"
+   ```
+   **Audit-trail-preservation test** — the second-tier procedure is sanctioned ONLY in the audit-trail-preserved branch:
+   - ✅ **Audit-trail-preserved**: the agent is currently executing `/wr-itil:manage-problem` Step 2 for THIS ticket creation (gate-misfire signal condition 1), AND any `/tmp/manage-problem-grep-*` marker exists. The skill flow itself is the just-ran-grep witness; the marker existence corroborates it.
+   - ❌ **Audit-trail-violated**: the agent is NOT in `/wr-itil:manage-problem` Step 2 for this ticket creation, OR no marker exists for any SID. Routine first-creation flow MUST hit the gate; the recovery procedure does NOT apply.
+   **Anti-pattern bound** — the loose reading "any marker from any earlier `manage-problem` invocation in this session" would let the recovery procedure apply to a fresh ticket creation that happens to reuse a stale marker from a prior unrelated invocation. That is the P131 anti-pattern surface (gate state as a workaround target instead of as a directive). The bound holds because the recovery is invoked from inside an active manage-problem flow where Step 2 has just been ordered for THIS ticket, AND the python3-via-Bash branch is named in this substep so its invocation is itself audit-trail-emitting.
+   **DO NOT brute-force-touch markers for every announced UUID.** That pattern (139 markers in one session, 2026-04-28 P144 evidence) satisfies the marker shape while gaming the audit trail the marker is supposed to record. The user has explicitly rejected this pattern: *"WTF? Why did you bypass instead of using the skill?"* (P144 driver correction). Brute-forcing markers for SIDs that did not run Step 2 is the canonical bypass — the recovery procedure above is the canonical use of the skill.
+   **Cross-references**: P124 (helper Phase 3 regression — driver of the misfire); P142 (P124 Phase 4 — structural fix that auto-supersedes this recovery when shipped); P131 (gate-exclusions-as-write-permission — adjacent anti-pattern family); ADR-048 (sanctioning + scoping ADR); ADR-009 (gate marker lifecycle); ADR-044 (mechanical-decision framework-mediated surface catalog).
 **Search strategy**: Search problem filenames AND file content. A match on the filename (kebab-case title) or the Description/Symptoms sections counts. Cast a wide net — false positives are cheap (user chooses), but false negatives mean duplicate problems.
 **Hook contract (P119)**: writing a `.open.md` (or any `.<status>.md`) file under `docs/problems/` without first running this Step 2 grep + marker-touch is blocked by the `manage-problem-enforce-create.sh` PreToolUse hook with a `permissionDecision: deny` directing the agent back to this skill. Agents that try to bypass the skill (e.g. mid-retrospective inline capture, post-mortem wrap-up, or any "I'll just write it directly" shortcut) will hit the deny and be redirected here. Do not work around the deny by setting the marker manually — the marker exists to record that this Step 2 ran, and a marker without a grep is the audit-trail gap P119 closes.

package/skills/manage-problem/test/manage-problem-p119-recovery-path.bats ADDED Viewed

@@ -0,0 +1,165 @@
+#!/usr/bin/env bats
+#
+# packages/itil/skills/manage-problem/test/manage-problem-p119-recovery-path.bats
+#
+# Behavioural tests for manage-problem Step 2 substep 7's P119 hook-misfire
+# recovery procedure (P144 / ADR-048).
+#
+# Step 2 substep 7 documents a two-tier recovery for the case where
+# `mark_step2_complete` succeeded but the P119 PreToolUse:Write hook still
+# denies the new ticket Write — typically because the P124 helper returned
+# a subprocess SID instead of the orchestrator SID (ADR-048 Phase 3
+# regression). Without documented recovery, the agent reaches for the
+# brute-force-touch-every-marker anti-pattern (139-marker incident,
+# 2026-04-28). User correction was emphatic: "WTF? Why did you bypass
+# instead of using the skill?"
+#
+# This bats fixes the contract:
+#   - Sub-block names the gate-misfire signal (active flow + helper-succeeded
+#     + Write-denied conjunction).
+#   - Two-tier procedure named (first-tier announce-marker scrape; second-tier
+#     python3-via-Bash file-write).
+#   - Audit-trail-preservation test as the gate-on-sanctioning rule.
+#   - Anti-pattern call-out ("DO NOT brute-force") in durable form.
+#   - ADR-048, P124, P142 cross-references.
+#   - <!-- supersedes-when: P142 ships --> HTML comment for cleanup
+#     discoverability.
+#
+# tdd-review: structural-permitted (justification: skill behavioural
+# harness pending P012 + P081 Phase 2; SKILL.md contract assertions
+# bridge until then; expected to migrate to behavioural form once
+# the harness exists).
+#
+# @problem P144
+# @adr ADR-048 (Documented recovery from gate misfire is the prescribed surface, not bypass)
+# @adr ADR-009 (gate marker lifecycle)
+# @adr ADR-013 Rule 5 (policy-authorised silent proceed)
+# @adr ADR-022 (problem lifecycle status suffixes)
+# @adr ADR-037 / P081 (testing strategy — bridge during harness build)
+# @adr ADR-038 (progressive disclosure — deny message terse)
+# @adr ADR-044 (decision-delegation — recovery is mechanical)
+# @jtbd JTBD-001 / JTBD-101 / JTBD-201
+SKILL_FILE="${BATS_TEST_DIRNAME}/../SKILL.md"
+setup() {
+  [ -f "$SKILL_FILE" ]
+}
+# Bound the search to Step 2 substep 7 region (between Step 2 heading and Step 3 heading).
+step2_text() {
+  awk '/^### 2\. /,/^### 3\. /' "$SKILL_FILE"
+}
+# ── Recovery sub-block presence ─────────────────────────────────────────────
+@test "Step 2 SKILL.md contains a Recovery sub-block for hook-denial misfire" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"Recovery"* ]]
+  [[ "$output" == *"hook denial"* ]] || [[ "$output" == *"hook still denies"* ]] || [[ "$output" == *"deny"* ]]
+}
+# ── Gate-misfire signal definition ──────────────────────────────────────────
+@test "Step 2 SKILL.md names the gate-misfire signal precondition (active manage-problem flow)" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  # The signal requires that the agent is already executing manage-problem
+  # Step 2 in the current turn — not just any prior session marker.
+  [[ "$output" == *"already executing"* ]] || [[ "$output" == *"active"* ]] || [[ "$output" == *"this turn"* ]]
+}
+@test "Step 2 SKILL.md names mark_step2_complete success as part of the misfire signal" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"mark_step2_complete"* ]]
+}
+# ── Two-tier procedure ──────────────────────────────────────────────────────
+@test "Step 2 SKILL.md names the first-tier recovery (announce-marker scrape)" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"first-tier"* ]] || [[ "$output" == *"First-tier"* ]]
+  [[ "$output" == *"itil-assistant-gate-announced"* ]]
+}
+@test "Step 2 SKILL.md names the second-tier recovery (python3-via-Bash file-write)" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"second-tier"* ]] || [[ "$output" == *"Second-tier"* ]]
+  [[ "$output" == *"python3"* ]]
+  [[ "$output" == *"Bash"* ]]
+}
+# ── Audit-trail-preservation test ───────────────────────────────────────────
+@test "Step 2 SKILL.md states the audit-trail-preservation test as the sanctioning rule" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"audit-trail"* ]] || [[ "$output" == *"audit trail"* ]]
+}
+@test "Step 2 SKILL.md names the anti-pattern bound (any-marker-anywhere is NOT the test)" {
+  # Architect advisory: the bound must rule out the loose "any marker from any
+  # earlier invocation in this session" reading — that's the P131 surface.
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"this ticket"* ]] || [[ "$output" == *"THIS ticket"* ]]
+}
+# ── Anti-pattern call-out (durable surface) ─────────────────────────────────
+@test "Step 2 SKILL.md contains the explicit DO-NOT-brute-force anti-pattern wording" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"DO NOT brute-force"* ]] || [[ "$output" == *"do not brute-force"* ]] || [[ "$output" == *"Do not brute-force"* ]]
+}
+@test "Step 2 SKILL.md cites the 2026-04-28 user correction context for the anti-pattern" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"P144"* ]]
+}
+# ── Cross-references ────────────────────────────────────────────────────────
+@test "Step 2 SKILL.md cites ADR-048 for the recovery procedure scope" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"ADR-048"* ]]
+}
+@test "Step 2 SKILL.md cites P124 as the helper-bug source" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"P124"* ]]
+}
+@test "Step 2 SKILL.md cites P142 as the structural fix (supersession trigger)" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"P142"* ]]
+}
+# ── Supersession comment (CI-enforced cleanup invariant) ────────────────────
+@test "Step 2 SKILL.md carries the supersedes-when HTML comment so cleanup is discoverable" {
+  # ADR-048 Reassessment Criteria: when P142's resolution ADR is accepted,
+  # this comment must be removed from SKILL.md source. Today the comment
+  # is present and this assertion passes; once P142 lands, the cleanup
+  # signal lives here.
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"supersedes-when"* ]]
+  [[ "$output" == *"P142"* ]]
+}
+# ── Mechanical (no-AskUserQuestion) per ADR-044 ─────────────────────────────
+@test "Step 2 SKILL.md states the recovery is mechanical (no AskUserQuestion required)" {
+  run step2_text
+  [ "$status" -eq 0 ]
+  [[ "$output" == *"mechanical"* ]] || [[ "$output" == *"ADR-044"* ]]
+}

package/skills/work-problems/SKILL.md CHANGED Viewed

@@ -415,9 +415,36 @@ After the iteration's commit lands but before starting the next iteration, check
 2. If `.changeset/` is non-empty after push, run `npm run release:watch` (merge the release PR + wait for npm publish).
 3. Resume the loop only after the release lands on npm.
-**Failure handling**: If `release:watch` fails (CI failure, publish failure), stop the loop and report the failure in the AFK summary. Do not retry non-interactively — the user must intervene. **Step 2.5b cross-reference (P126)**: before emitting the final AFK summary for a Failure handling / CI failure / release:watch halt, run Step 2.5b's surfacing routine. The routine is gated on ≥1 accumulated user-answerable skip; this halt path empirically frequently has accumulated skips from prior iters (the original P126 surface), so the gate is normally satisfied and Step 2.5b's AskUserQuestion-default branch fires (`halt-paths-must-route-design-questions-through-Step-2.5b`). The CI-failure cause itself remains a halt with bug-signal — Step 2.5b surfaces *prior-iter accumulated user-answerable skips only*; it does NOT ask the user how to remediate the CI failure (that requires the user to inspect the failing CI run on return).
+**Failure handling (P140)**: When `push:watch` or `release:watch` reports a CI failure or publish failure, the orchestrator follows a diagnose-then-classify routing — fix-and-continue for the documented mechanically-fixable allow-list, halt for everything else. The previous uniform halt rule converted mechanically-fixable failures (1-line stale-grep-string updates, transient flakes) into ~45min queue stalls, regressing JTBD-006 "Progress the Backlog While I'm Away" without any governance benefit.
-`push:watch` and `release:watch` are policy-authorised actions when residual risk is within appetite per RISK-POLICY.md, so no `AskUserQuestion` is required for the drain itself (ADR-013 Rule 5).
+**Diagnostic preamble (ADR-026 grounding)**: orchestrator MUST first fetch the failed CI log via `gh run view <run-id> --log-failed` (or `gh run view --log-failed` against the most recent failure). Read the failure output and classify into ONE of the buckets below. Cite the failed test output verbatim in the fix-and-continue commit message or halt summary so future readers can audit the classification.
+**Fixable-in-iter allow-list (closed)**: the following classes are policy-authorised silent fix-and-continue per ADR-013 Rule 5. The list is **closed** — adding a new class is itself a deviation-candidate per ADR-044's framework-resolution boundary (surface to user via Step 2.5b's AskUserQuestion-default branch; do NOT auto-extend at agent discretion).
+- **P081-class stale-grep-string** — structural test runs `grep -F '<literal>'` (or `grep -nE '<pattern>'`) against a SKILL.md / ADR / source file; non-zero return because source was edited and the test's grep string was not. Fix: update the grep string to current source phrasing. Composes with P081 (structural-tests-are-wasteful root cause); fix-and-continue is the stop-gap, P081's full retrofit is the structural elimination.
+- **Hook stub mismatch** — test's mock-stdin field doesn't match current hook expectation (e.g. renamed JSON key, renamed event type). Fix: update the stub.
+- **Test ID drift** — assertion message grep doesn't match a recently-renamed function or symbol. Fix: sed in the test.
+- **Environmental flake** — CI runner intermittent issue (npm registry timeout, GitHub API rate limit, transient infra). Fix: re-trigger the workflow.
+**Ambiguous classification defaults to halt.** If the failure does not unambiguously match one of the above, the orchestrator halts. No diagnose-then-guess.
+**Fix-and-continue branch**: for a fixable class:
+1. Apply the fix (typically a single `Edit` change).
+2. Commit the fix through the **standard ADR-014 commit gate flow** — architect / JTBD / risk-scorer review per retry. A gate rejection routes to the halt branch (no retry budget restoration). Each fix-and-continue commit is its own discrete unit of work and rides its own commit through gates per ADR-014 + ADR-042 Rule 3 precedent (retries each ride their own commit).
+3. `git push` and re-run `npm run push:watch` (or `release:watch` if the failure was on the release-PR side) to wait for CI re-trigger.
+4. If CI passes, resume the loop (Step 6.75).
+5. If CI fails again, increment the per-iteration retry counter and return to step 1.
+**3-retry cap (per iteration, not per failure-class)**: after 3 fix-and-continue attempts in a single Step 6.5 invocation, the orchestrator routes to the halt branch regardless of failure class. Repeated failures of the "same" class are evidence the diagnosis was wrong; halt and surface for user judgment. The cap is per-iteration — a 4th distinct fixable failure in the same drain still halts.
+**Halt branch (genuinely unrecoverable)**: halt the loop and report the failure in the AFK summary. Do not retry non-interactively. Genuinely-unrecoverable classes include: auth failure (npm token, GitHub credentials), npm publish rejection (version conflict, package access denied), semantic test failure requiring user judgment (not literal-string drift), repeated transient failures (3+ retries, per the cap above), and any failure outside the fixable-in-iter allow-list.
+**Step 2.5b cross-reference (P126)**: before emitting the final AFK summary for a Failure handling / CI failure / release:watch halt, run Step 2.5b's surfacing routine. The routine is gated on ≥1 accumulated user-answerable skip; this halt path empirically frequently has accumulated skips from prior iters (the original P126 surface), so the gate is normally satisfied and Step 2.5b's AskUserQuestion-default branch fires (`halt-paths-must-route-design-questions-through-Step-2.5b`). The CI-failure cause itself remains a halt with bug-signal — Step 2.5b surfaces *prior-iter accumulated user-answerable skips only*; it does NOT ask the user how to remediate the CI failure (that requires the user to inspect the failing CI run on return).
+`push:watch` and `release:watch` are policy-authorised actions when residual risk is within appetite per RISK-POLICY.md, so no `AskUserQuestion` is required for the drain itself (ADR-013 Rule 5). The fix-and-continue branch is itself policy-authorised by the closed allow-list above, satisfying ADR-013 Rule 5 without an `AskUserQuestion` round-trip.
+**Composition notes**: fix-and-continue is the inverse of P132 (over-ask in interactive sessions) on the failure-handling surface — both arise from over-defensive uniform routing where a documented class-policy would empower silent action. Composes with P130 (orchestrator main-turn ask discipline — fix-and-continue does NOT introduce mid-iter asks; the closed allow-list resolves the decision per ADR-044). Cross-references: P081 (stop-gap composition — most fixables are P081-class), P135 (decision-delegation contract — the closed allow-list IS the framework-resolved policy).
 #### Above-appetite branch (per ADR-042)
@@ -497,6 +524,7 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
 | Commit when risk within appetite | Auto-commit (manage-problem step 9e fallback) |
 | Commit when risk above appetite | Skip commit, report uncommitted state |
 | Pipeline risk at appetite (push or release = 4/25) | Drain release queue (`push:watch` then `release:watch`) before next iteration — per ADR-018 (Step 6.5) |
+| CI failure during Step 6.5 drain (within-appetite branch) | Diagnose via `gh run view --log-failed`, classify against the closed fixable-in-iter allow-list (P081-class stale-grep-string, hook stub mismatch, test ID drift, environmental flake), fix-and-continue for fixable classes (each retry rides its own ADR-014 commit gate), 3-retry cap per iteration, halt for unrecoverable classes. Ambiguous classification defaults to halt. ADR-013 Rule 5 policy-authorised. Per ADR-026 grounding + ADR-044 framework-resolution boundary + P140 (Step 6.5 Failure handling). |
 | Pipeline risk above appetite (push or release >= 5/25) | Auto-apply scorer remediations incrementally (ADR-042 Rule 2). The agent reads suggestions and decides what to do. Re-score after each apply; drain when within appetite. **Never release above appetite** (ADR-042 Rule 1) — no AskUserQuestion shortcut. Halt the loop with `outcome: halted-above-appetite` if the loop exhausts without convergence (ADR-042 Rule 5). Verification Pending commits excluded from auto-revert (Rule 2b). Per ADR-042 (Step 6.5 Above-appetite branch). |
 | Origin diverged before start | Pull `--ff-only` if trivial; stop with report (`git log HEAD..origin/<base>` and reverse) if non-fast-forward — per ADR-019 (Step 0) |
 | Prior-session partial work detected at start (session-continuity dirty: untracked `docs/decisions/*.proposed.md` / `docs/problems/*.md`, `.afk-run-state/iter-*.json` with `is_error: true` or `api_error_status >= 400`, stale `.claude/worktrees/*`, uncommitted SKILL.md/source/ADR edits) | Halt the loop with a structured Prior-Session State report in the AFK summary. Do NOT attempt non-interactive resume. Interactive invocations prompt via `AskUserQuestion` with 4 options (resume / discard / leave-and-lower-priority / halt). Per P109 + ADR-013 Rule 6 (Step 0 session-continuity detection pass). |
@@ -517,7 +545,7 @@ The orchestrator MUST NOT call `AskUserQuestion` between iterations except at th
 - **Step 0 fetch-failure halt** — `git fetch origin` network failure; halt-with-report so the user retries on return.
 - **Step 2.5 / Step 2.5b loop-end emit** — accumulated `outstanding_questions` queue presented as batched `AskUserQuestion` (or fallback Outstanding Design Questions table per ADR-013 Rule 6). This is the framework's prescribed user-interaction point; do NOT dilute it by asking earlier.
 - **Step 6.5 above-appetite Rule 5 halt** — auto-apply loop exhausted without convergence; halt-with-batched-questions per the Step 2.5b cross-reference (Step 2.5b surfaces *prior-iter accumulated user-answerable skips only* — the halt-causing scorer-gap remains a halt-with-bug-signal per ADR-042 Rule 5).
-- **Step 6.5 CI-failure / `release:watch` failure halt** — push:watch or release:watch failed; halt-with-batched-questions per the Step 2.5b cross-reference.
+- **Step 6.5 CI-failure / `release:watch` failure halt** — push:watch or release:watch failed AND the failure is genuinely-unrecoverable (outside the fixable-in-iter allow-list, or 3-retry cap reached); halt-with-batched-questions per the Step 2.5b cross-reference. Failures inside the closed allow-list route to fix-and-continue per Step 6.5 Failure handling (P140), not this halt point.
 - **Step 6.75 dirty-for-unknown-reason halt** — `git status --porcelain` divergence; halt-with-batched-questions per the Step 2.5b cross-reference.
 **No mid-iter ask points.** Every other point in the orchestrator's main turn (between Step 5 dispatch completing and Step 6.5 release-cadence check; between Step 6.75 verification and Step 7 loop-back; between Step 7 and Step 1 next-iteration; between consecutive iters generally) is a mechanical-stage transition that the framework has already resolved. Do NOT introduce ad-hoc `AskUserQuestion` calls at those points to confirm "is it OK to proceed?" or "want me to start the next iter?" — proceeding IS the framework-resolved default. Continue iterating until quota or stop-condition #1/#2/#3 fires.

package/skills/work-problems/test/work-problems-step-6-5-fix-and-continue.bats ADDED Viewed

@@ -0,0 +1,254 @@
+#!/usr/bin/env bats
+# P140: /wr-itil:work-problems Step 6.5 Failure handling subsection must
+# document diagnose-then-classify routing — fix-and-continue for the
+# documented mechanically-fixable allow-list, halt for everything else.
+#
+# Prior behaviour was a uniform halt-on-CI-failure rule that converted
+# 1-line stale-grep-string updates and transient flakes into ~45min queue
+# stalls, regressing JTBD-006 "Progress the Backlog While I'm Away"
+# without any governance benefit. P140's Phase 1 amendment replaces that
+# uniform rule with a closed allow-list policy authorising silent
+# fix-and-continue per ADR-013 Rule 5, capped at 3 retries per iteration
+# before falling back to the halt branch.
+#
+# Doc-lint contract assertions per ADR-037 Permitted Exception
+# (contract-assertion class — same shape as the P130 / P126 / P135
+# sibling fixtures). The asserted prose IS the load-bearing policy
+# surface — re-reading the SKILL.md is the only way an AFK reader (and
+# the iteration subprocess) learns the fixable-class taxonomy and the
+# retry cap. Behavioural verification is impossible until Phase 2's
+# advisory classifier ships (deferred per the ticket Fix Strategy —
+# observe over 30 days).
+#
+# @problem P140
+# @adr ADR-013 (Rule 5 — policy-authorised silent action)
+# @adr ADR-014 (one-commit-per-iter; retries each ride their own commit)
+# @adr ADR-018 (inter-iteration release cadence; this refines its
+#       Failure handling clause)
+# @adr ADR-026 (agent output grounding — diagnostic preamble citation)
+# @adr ADR-037 (skill-testing strategy — contract-assertion class)
+# @adr ADR-042 (above-appetite branch — Rule 3 commit-gate-per-retry
+#       precedent composes with this fix-and-continue branch)
+# @adr ADR-044 (decision-delegation contract — framework-resolution
+#       boundary; closed allow-list extensions are deviation-candidates)
+# @jtbd JTBD-006 (Progress the Backlog While I'm Away — primary)
+# @jtbd JTBD-001 (Enforce Governance Without Slowing Down — composes;
+#       per-retry gates preserve governance)
+setup() {
+  REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../../.." && pwd)"
+  SKILL_MD="$REPO_ROOT/packages/itil/skills/work-problems/SKILL.md"
+}
+@test "work-problems P140: SKILL.md exists" {
+  [ -f "$SKILL_MD" ]
+}
+# ── Failure handling subsection identity ───────────────────────────────────
+@test "work-problems P140: Step 6.5 Failure handling subsection cites P140" {
+  # The amendment must self-identify so future readers tracing back from
+  # the ticket find the load-bearing prose without keyword-guessing.
+  run grep -nE 'Failure handling.*P140|P140.*Failure handling' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Diagnostic preamble (ADR-026 grounding) ────────────────────────────────
+@test "work-problems P140: Failure handling cites gh run view --log-failed as the diagnostic preamble" {
+  # ADR-026 grounding: the orchestrator MUST read the actual failure
+  # output before classifying. Without this, classification degrades to
+  # guess-from-context.
+  run grep -nE 'gh run view.*--log-failed' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Failure handling cites ADR-026 (grounding) on the diagnostic preamble" {
+  # The grounding requirement should cite ADR-026 explicitly so the
+  # connection is auditable.
+  run grep -nE 'ADR-026' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Fixable-in-iter allow-list (closed) ────────────────────────────────────
+@test "work-problems P140: Failure handling names P081-class stale-grep-string as a fixable class" {
+  run grep -nE 'P081-class stale-grep-string|stale-grep-string' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Failure handling names hook stub mismatch as a fixable class" {
+  run grep -niE 'hook stub mismatch' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Failure handling names test ID drift as a fixable class" {
+  run grep -niE 'test ID drift' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Failure handling names environmental flake as a fixable class" {
+  run grep -niE 'environmental flake' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: allow-list is framed as 'closed' (not extensible at agent discretion)" {
+  # JTBD review guard-rail: persona could misread "fix-and-continue" as
+  # "auto-fix anything" without the closed framing. Future agent edits
+  # must not drift the allow-list open without explicit user direction.
+  run grep -niE 'allow-list.*closed|closed.*allow-list' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: extending the allow-list is framed as a deviation-candidate per ADR-044" {
+  # ADR-044 framework-resolution boundary: the closed list IS the
+  # framework-resolved policy. Adding a class is a direction-setting
+  # decision, not a mechanical fix.
+  run grep -niE 'deviation-candidate.*ADR-044|ADR-044.*deviation' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: ambiguous classification defaults to halt (no diagnose-then-guess)" {
+  # JTBD review guard-rail (b): without this, the persona-misread risk
+  # of "auto-fix anything" re-enters via fuzzy classification.
+  run grep -niE 'Ambiguous classification defaults to halt|ambiguous.*halt' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Fix-and-continue branch ────────────────────────────────────────────────
+@test "work-problems P140: Failure handling documents a fix-and-continue branch" {
+  run grep -niE 'Fix-and-continue branch|fix-and-continue branch' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: each fix-and-continue retry rides standard ADR-014 commit gate flow (architect / JTBD / risk-scorer)" {
+  # Architect-flagged invariant: governance gates MUST run on every
+  # retry. The fix-and-continue branch does NOT bypass gates.
+  run grep -niE 'standard ADR-014 commit gate flow|ADR-014.*commit gate' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: ADR-042 Rule 3 commit-gate-per-retry precedent is cross-referenced" {
+  # ADR-042 already establishes that retries each ride their own
+  # commit through full gate flow. P140 composes with that precedent
+  # rather than inventing a new commit-cardinality rule.
+  run grep -niE 'ADR-042 Rule 3' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── 3-retry cap (per iteration) ────────────────────────────────────────────
+@test "work-problems P140: Failure handling caps fix-and-continue at 3 retries" {
+  run grep -niE '3-retry cap|3 retr|three retr' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: 3-retry cap is per-iteration, not per-failure-class" {
+  # Without this clarification, an agent could reset the counter on
+  # each new failure class and drain budget indefinitely.
+  run grep -niE 'per[- ]iteration, not per[- ]failure[- ]class|cap is per[- ]iteration' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Halt branch preserved ──────────────────────────────────────────────────
+@test "work-problems P140: Halt branch preserved for genuinely-unrecoverable failures" {
+  run grep -niE 'genuinely-unrecoverable|genuinely unrecoverable' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Halt branch enumerates auth failure / npm publish rejection / semantic test as unrecoverable" {
+  # The halt branch's allow-list mirror — naming the unrecoverable
+  # classes makes the boundary auditable.
+  run grep -niE 'auth failure|npm publish rejection|semantic test.*judgment' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Step 2.5b cross-reference preserved (P126) ─────────────────────────────
+@test "work-problems P140: Halt branch routes through Step 2.5b surfacing routine (P126 preserved)" {
+  # The halt branch's existing P126 cross-reference must survive the
+  # amendment — surfacing accumulated user-answerable skips before
+  # emitting the halt summary remains the contract.
+  run grep -nE 'Step 2\.5b cross-reference \(P126\)' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── ADR-013 Rule 5 policy-authorised silent action ─────────────────────────
+@test "work-problems P140: fix-and-continue branch is policy-authorised per ADR-013 Rule 5" {
+  # ADR-044's framework-mediated surface includes "policy-authorised
+  # silent proceed" — the closed allow-list IS the policy. Future
+  # readers must find the citation to confirm this is not an ad-hoc
+  # bypass of Rule 1.
+  run grep -nE 'ADR-013 Rule 5|Rule 5 policy-authorised|policy-authorised.*ADR-013' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Composition cross-references ───────────────────────────────────────────
+@test "work-problems P140: Failure handling cross-references P081 (stop-gap composition)" {
+  # P081 is the structural-tests-are-wasteful root cause. Most
+  # P081-class stale-grep-string failures are P081's territory.
+  # Fix-and-continue is the stop-gap; P081's full retrofit is the
+  # structural elimination.
+  run grep -nE 'P081' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Failure handling cross-references P135 (decision-delegation contract)" {
+  # P135 + ADR-044 frame the closed allow-list as the
+  # framework-resolved policy.
+  run grep -nE 'P135' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Failure handling cross-references P130 (orchestrator main-turn ask discipline)" {
+  # P130 ensures fix-and-continue does NOT introduce mid-iter asks —
+  # the closed allow-list resolves the decision per ADR-044's
+  # framework-resolution boundary.
+  run grep -nE 'P130' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Failure handling cross-references P132 (over-ask in interactive sessions)" {
+  # P140 is the inverse of P132 on the failure-handling surface — both
+  # arise from over-defensive uniform routing. Naming the symmetry
+  # protects against future drift.
+  run grep -nE 'P132' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Mid-loop ask discipline halt-point bullet narrowed ─────────────────────
+@test "work-problems P140: Step 6.5 CI-failure halt-point bullet narrows to outside-allow-list / cap-reached scope" {
+  # The Mid-loop ask discipline subsection enumerates Step 6.5 CI-
+  # failure as a halt point. After P140 the halt fires only on
+  # unrecoverable failures — the bullet must reflect that narrower
+  # scope, otherwise future readers conclude all CI failures still
+  # halt.
+  run grep -nE 'fixable-in-iter allow-list|3-retry cap reached|outside the.*allow-list' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+# ── Non-Interactive Decision Making table row ──────────────────────────────
+@test "work-problems P140: Decision Making table carries a CI-failure-during-Step-6.5-drain row" {
+  # The decision table is the AFK reader's quick-reference; without a
+  # row here the failure-handling refinement is buried 80 lines up in
+  # Step 6.5.
+  run grep -nE '\| CI failure during Step 6\.5 drain' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Decision Making table row cites the closed fixable-in-iter allow-list" {
+  run grep -nE 'closed fixable-in-iter allow-list' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}
+@test "work-problems P140: Decision Making table row cites the 3-retry cap" {
+  run grep -nE 'CI failure during Step 6\.5.*3-retry cap|3-retry cap.*CI failure' "$SKILL_MD"
+  [ "$status" -eq 0 ]
+}