@windyroad/itil 0.30.1 → 0.30.2-preview.317

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  {
2
2
  "name": "wr-itil",
3
- "version": "0.30.1",
3
+ "version": "0.30.2",
4
4
  "description": "ITIL-aligned IT service management for Claude Code"
5
5
  }
package/hooks/hooks.json CHANGED
@@ -28,6 +28,10 @@
28
28
  "matcher": "Bash",
29
29
  "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/p057-staging-trap-detect.sh" }]
30
30
  },
31
+ {
32
+ "matcher": "Bash",
33
+ "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/itil-bash-polling-antipattern-detect.sh" }]
34
+ },
31
35
  {
32
36
  "matcher": "Bash",
33
37
  "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/pre-publish-intake-gate.sh" }]
@@ -0,0 +1,92 @@
1
+ #!/bin/bash
2
+ # P232: PreToolUse:Bash hook — denies bash polling loops that
3
+ # self-reference via `pgrep -f` (parent class) or `pkill -0`
4
+ # (sibling) and deadlock in AFK iters when the polling loop's
5
+ # own command line matches the search pattern.
6
+ #
7
+ # Detection shape: a loop construct (`until` / `while`, with or
8
+ # without leading `!`) immediately followed by `pgrep` OR
9
+ # `pkill -0`. One-shot `pgrep -f` (no surrounding loop) is allowed
10
+ # — the polling-loop shape is the antipattern, not pgrep itself.
11
+ #
12
+ # Recovery: agents should `wait $bg_pid` (shell-native) for
13
+ # backgrounded shell jobs OR use Bash-tool `run_in_background=true`
14
+ # plus `BashOutput` polling for harness-tracked processes.
15
+ #
16
+ # Allow paths (exit 0 without deny):
17
+ # - tool_name != "Bash" (only Bash invocations are gated)
18
+ # - empty command (parse-incomplete fail-open)
19
+ # - command does not contain the polling-loop shape
20
+ # - parse failure on stdin (mirrors create-gate.sh fail-open)
21
+ #
22
+ # References:
23
+ # ADR-005 — plugin testing strategy (hook bats live under hooks/test/).
24
+ # ADR-013 Rule 1 — deny redirects with mechanical recovery.
25
+ # ADR-038 — progressive disclosure / deny-message terseness budget.
26
+ # ADR-045 — hook injection budget; deny-path band 200-700 bytes.
27
+ # ADR-052 — behavioural tests default (positive + negative cases).
28
+ # P146 — parent class (bash until-loop polls bats output with
29
+ # bats-console-summary regex against TAP output).
30
+ # P232 — this hook; self-referential pgrep -f variant.
31
+ # p057-staging-trap-detect.sh — sibling PreToolUse:Bash detect
32
+ # hook; mirror the deny-message shape.
33
+
34
+ INPUT=$(cat)
35
+
36
+ TOOL_NAME=$(echo "$INPUT" | python3 -c "
37
+ import sys, json
38
+ try:
39
+ data = json.load(sys.stdin)
40
+ print(data.get('tool_name', ''))
41
+ except:
42
+ print('')
43
+ " 2>/dev/null || echo "")
44
+
45
+ # Only gate Bash. Non-Bash tools bypass entirely.
46
+ if [ "$TOOL_NAME" != "Bash" ]; then
47
+ exit 0
48
+ fi
49
+
50
+ COMMAND=$(echo "$INPUT" | python3 -c "
51
+ import sys, json
52
+ try:
53
+ data = json.load(sys.stdin)
54
+ print(data.get('tool_input', {}).get('command', ''))
55
+ except:
56
+ print('')
57
+ " 2>/dev/null || echo "")
58
+
59
+ # Empty / missing command — fail-open per create-gate.sh precedent.
60
+ if [ -z "$COMMAND" ]; then
61
+ exit 0
62
+ fi
63
+
64
+ # Polling-antipattern regex: a loop construct (`until` / `while`,
65
+ # with or without leading `!`) immediately followed by `pgrep` OR
66
+ # `pkill -0`. The `[[:space:]]+!?[[:space:]]*` middle covers
67
+ # `until pgrep`, `until ! pgrep`, `until !pgrep`, and the same
68
+ # shapes with `while`. The `pkill[[:space:]]+-0` half catches
69
+ # the signal-0 polling sibling without false-matching real-signal
70
+ # kills (`pkill -TERM`, `pkill -HUP`, etc.).
71
+ POLLING_RE='(until|while)[[:space:]]+!?[[:space:]]*(pgrep|pkill[[:space:]]+-0)'
72
+
73
+ if ! printf '%s' "$COMMAND" | grep -qE "$POLLING_RE"; then
74
+ exit 0
75
+ fi
76
+
77
+ # Antipattern detected — emit deny with terse recovery.
78
+ # Voice-tone target ~245 bytes (sibling p057-staging-trap-detect.sh
79
+ # precedent). Cites P232, names BOTH recovery alternatives, fits
80
+ # inside ADR-045 deny-path 200-700 byte band.
81
+ REASON="BLOCKED: P232 self-referential polling antipattern. \\\`pgrep -f\\\` / \\\`pkill -0\\\` inside until/while loop matches the loop's own command line and deadlocks in AFK iters. Use \\\`wait \\\$bg_pid\\\` (shell-native) OR Bash-tool BashOutput polling (run_in_background=true) instead."
82
+
83
+ cat <<EOF
84
+ {
85
+ "hookSpecificOutput": {
86
+ "hookEventName": "PreToolUse",
87
+ "permissionDecision": "deny",
88
+ "permissionDecisionReason": "${REASON}"
89
+ }
90
+ }
91
+ EOF
92
+ exit 0
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # P232: itil-bash-polling-antipattern-detect.sh PreToolUse:Bash hook
4
+ # must deny bash polling loops that self-reference via `pgrep -f`
5
+ # (parent class) or `pkill -0` (sibling), advising `wait $bg_pid` or
6
+ # Bash-tool `BashOutput` polling instead.
7
+ #
8
+ # Detection shape: a loop construct (`until` / `while`) combined with a
9
+ # polling mechanism (`pgrep -f` / `pkill -0`). One-shot `pgrep -f` (no
10
+ # surrounding loop) is allowed — the polling shape is the antipattern,
11
+ # not pgrep itself.
12
+ #
13
+ # Per ADR-005 / ADR-052 — bats live under packages/<plugin>/hooks/test/
14
+ # and assert behaviour on emitted JSON, not source-content. Per
15
+ # feedback_behavioural_tests.md (P081) — no source-grep on hook text.
16
+
17
+ setup() {
18
+ SCRIPT_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
19
+ HOOK="$SCRIPT_DIR/itil-bash-polling-antipattern-detect.sh"
20
+ }
21
+
22
+ # Helper: simulate the PreToolUse:Bash payload on stdin.
23
+ # Uses python to build the JSON so we don't escape-hell with bash.
24
+ run_bash_hook() {
25
+ local cmd="$1"
26
+ python3 -c "
27
+ import json, sys
28
+ print(json.dumps({'tool_name': 'Bash', 'tool_input': {'command': sys.argv[1]}}))
29
+ " "$cmd" | bash "$HOOK"
30
+ }
31
+
32
+ # --- Antipattern detection: positive cases (deny) ---
33
+
34
+ @test "deny: until ! pgrep -f loop" {
35
+ run run_bash_hook "until ! pgrep -f 'bats --recursive' > /dev/null 2>&1; do sleep 5; done"
36
+ [ "$status" -eq 0 ]
37
+ [[ "$output" == *"\"permissionDecision\": \"deny\""* ]]
38
+ [[ "$output" == *"P232"* ]]
39
+ }
40
+
41
+ @test "deny: while pgrep -f loop (positive form, no negation)" {
42
+ run run_bash_hook "while pgrep -f 'long-running-job'; do sleep 2; done"
43
+ [ "$status" -eq 0 ]
44
+ [[ "$output" == *"\"permissionDecision\": \"deny\""* ]]
45
+ [[ "$output" == *"P232"* ]]
46
+ }
47
+
48
+ @test "deny: until ! pkill -0 signal-0 poll" {
49
+ run run_bash_hook "until ! pkill -0 -f 'worker'; do sleep 3; done"
50
+ [ "$status" -eq 0 ]
51
+ [[ "$output" == *"\"permissionDecision\": \"deny\""* ]]
52
+ }
53
+
54
+ @test "deny: while pkill -0 signal-0 poll" {
55
+ run run_bash_hook "while pkill -0 12345; do sleep 1; done"
56
+ [ "$status" -eq 0 ]
57
+ [[ "$output" == *"\"permissionDecision\": \"deny\""* ]]
58
+ }
59
+
60
+ @test "deny: pgrep poll embedded in heredoc body" {
61
+ # Heredoc body lands in the same tool_input.command string.
62
+ run run_bash_hook "bash <<'EOF'
63
+ until ! pgrep -f 'bats'; do sleep 5; done
64
+ EOF"
65
+ [ "$status" -eq 0 ]
66
+ [[ "$output" == *"\"permissionDecision\": \"deny\""* ]]
67
+ }
68
+
69
+ @test "deny: P232 deadlock witness — multi-line shape with trailing tail" {
70
+ run run_bash_hook "until ! pgrep -f 'bats --recursive' > /dev/null 2>&1; do sleep 5; done; echo done; tail -30 /tmp/bats-out.log"
71
+ [ "$status" -eq 0 ]
72
+ [[ "$output" == *"\"permissionDecision\": \"deny\""* ]]
73
+ }
74
+
75
+ # --- Allow paths: legitimate non-polling uses ---
76
+
77
+ @test "allow: one-shot pgrep -f without surrounding loop" {
78
+ run run_bash_hook "pgrep -f 'nginx' && echo running"
79
+ [ "$status" -eq 0 ]
80
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
81
+ }
82
+
83
+ @test "allow: one-shot pkill (no -0, real signal — not a poll)" {
84
+ run run_bash_hook "pkill -TERM -f 'stale-worker'"
85
+ [ "$status" -eq 0 ]
86
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
87
+ }
88
+
89
+ @test "allow: wait \$bg_pid (the canonical recovery shape)" {
90
+ run run_bash_hook "bats --recursive packages/itil/hooks/test/ & wait \$!"
91
+ [ "$status" -eq 0 ]
92
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
93
+ }
94
+
95
+ @test "allow: while loop without pgrep/pkill (unrelated)" {
96
+ run run_bash_hook "while read line; do echo \$line; done < input.txt"
97
+ [ "$status" -eq 0 ]
98
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
99
+ }
100
+
101
+ @test "allow: until loop without pgrep/pkill (unrelated)" {
102
+ run run_bash_hook "until [ -f /tmp/sentinel ]; do sleep 1; done"
103
+ [ "$status" -eq 0 ]
104
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
105
+ }
106
+
107
+ @test "allow: commit message text mentioning pgrep does not deny" {
108
+ # The literal pair is in the commit message body, not a poll shape.
109
+ # The hook should not over-match on commit prose.
110
+ run run_bash_hook "git commit -m 'document pgrep antipattern in P232'"
111
+ [ "$status" -eq 0 ]
112
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
113
+ }
114
+
115
+ # --- Tool-name filters ---
116
+
117
+ @test "allow: non-Bash tool exits 0 without deny" {
118
+ run bash -c "echo '{\"tool_name\":\"Edit\",\"tool_input\":{\"file_path\":\"foo.md\"}}' | bash $HOOK"
119
+ [ "$status" -eq 0 ]
120
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
121
+ }
122
+
123
+ # --- Parse / fail-open ---
124
+
125
+ @test "allow: empty JSON fails open" {
126
+ run bash -c "echo '{}' | bash $HOOK"
127
+ [ "$status" -eq 0 ]
128
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
129
+ }
130
+
131
+ @test "allow: empty command field fails open" {
132
+ run bash -c "echo '{\"tool_name\":\"Bash\",\"tool_input\":{\"command\":\"\"}}' | bash $HOOK"
133
+ [ "$status" -eq 0 ]
134
+ [[ "$output" != *"\"permissionDecision\": \"deny\""* ]]
135
+ }
136
+
137
+ # --- Deny message contract (ADR-038 progressive disclosure / ADR-045 budget) ---
138
+
139
+ @test "deny message cites P232 + names BOTH recovery alternatives" {
140
+ run run_bash_hook "until ! pgrep -f 'bats'; do sleep 5; done"
141
+ [ "$status" -eq 0 ]
142
+ [[ "$output" == *"P232"* ]]
143
+ [[ "$output" == *"wait"* ]]
144
+ [[ "$output" == *"BashOutput"* ]]
145
+ }
146
+
147
+ @test "deny message stays under ADR-045 deny-path budget (<700 bytes)" {
148
+ # Voice-tone target ~245 bytes (sibling p057-staging-trap-detect.sh
149
+ # precedent). ADR-045 deny-path band hard cap at 700 bytes keeps the
150
+ # message terse — fail loudly if it bloats.
151
+ run run_bash_hook "until ! pgrep -f 'bats'; do sleep 5; done"
152
+ [ "$status" -eq 0 ]
153
+ [ "${#output}" -lt 700 ]
154
+ }
@@ -0,0 +1,309 @@
1
+ #!/usr/bin/env bash
2
+ # Shared derive-first dispatch helper — canonical source-of-truth.
3
+ #
4
+ # P132 Phase 2a-iii-A extracted this helper from three declaration-skill
5
+ # surfaces. Phase 2a-iii-B (2026-05-16) added wr-architect:create-adr as
6
+ # the 4th adopter, which required moving the canonical source from
7
+ # packages/itil/lib/ to packages/shared/ per ADR-017 (Shared code
8
+ # duplicated into per-package lib/ kept in sync by script + CI drift
9
+ # check). The per-package lib/ copies are byte-identical to this file:
10
+ #
11
+ # - packages/itil/lib/derive-first-dispatch.sh (sync target)
12
+ # - packages/architect/lib/derive-first-dispatch.sh (sync target)
13
+ #
14
+ # Sync mechanism: scripts/sync-derive-first-dispatch.sh (mirrors the
15
+ # sync-install-utils.sh pattern). CI guard: npm run check:derive-first-dispatch.
16
+ # Drift test: packages/shared/test/sync-derive-first-dispatch.bats.
17
+ #
18
+ # Maintainer-side SKILL.md surfaces that source the helper:
19
+ # - packages/itil/skills/capture-problem/SKILL.md Step 1.5
20
+ # - packages/itil/skills/manage-incident/SKILL.md Step 4
21
+ # - packages/itil/skills/manage-problem/SKILL.md Step 4
22
+ # - packages/architect/skills/create-adr/SKILL.md Step 2 (P132 Phase 2a-iii-B)
23
+ #
24
+ # Each caller passes surface-specific signal definitions; this helper
25
+ # centralises the dispatch mechanism: slug derivation, two-sided lexical
26
+ # classifier, RISK-POLICY matrix lookup, and the I2-isomorphic stderr
27
+ # advisory format.
28
+ #
29
+ # <!-- DERIVE-FIRST-DISPATCH-CONTRACT-SOURCE: P132 Phase 2a-iii-A + Phase 2a-iii-B -->
30
+ # Drift in the stderr advisory format here re-opens P132 — any change MUST
31
+ # update all four caller SKILL.md surfaces in the same commit.
32
+ #
33
+ # Usage (sourced):
34
+ # . packages/<pkg>/lib/derive-first-dispatch.sh # callers source their own package's copy
35
+ #
36
+ # Exported functions:
37
+ # emit_stderr_advisory <skill> <field> <value> <source> [reversibility]
38
+ # derive_kebab_slug <description> [max_tokens=8]
39
+ # lexical_classify_two_sided <text> <side_a_patterns_var> <side_b_patterns_var>
40
+ # risk_policy_matrix_lookup <text> <impact_high> <impact_mod> <impact_low>
41
+ # <likelihood_high> <likelihood_med> <likelihood_low>
42
+ #
43
+ # @adr ADR-002 (Monorepo per-plugin packages — architecture context for ADR-017)
44
+ # @adr ADR-017 (Shared code duplicated into per-package lib/ kept in sync)
45
+ # @adr ADR-044 (Decision-Delegation Contract — derive-first framework boundary)
46
+ # @adr ADR-026 (cost-source grounding — stderr advisory)
47
+ # @adr ADR-013 Rule 5 (policy-authorised silent proceed)
48
+ # @adr ADR-052 (behavioural-by-default — tested via scripts/test/derive-first-dispatch.bats
49
+ # and packages/shared/test/sync-derive-first-dispatch.bats)
50
+ # @problem P132 (agents over-ask in interactive sessions — Phase 2a-iii-A shared helper +
51
+ # Phase 2a-iii-B 4th-adopter migration to packages/shared/)
52
+ # @problem P185 (capture-problem Step 1.5 worked-example precedent)
53
+ # @jtbd JTBD-001 (enforce governance without slowing down — primary)
54
+ # @jtbd JTBD-101 (extend the suite with consistent patterns)
55
+ #
56
+ # NOT exporting `set -e` at file scope — callers source the helper and
57
+ # expect functions that return AMBIGUOUS sentinels rather than errexit.
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # emit_stderr_advisory — canonical I2-isomorphic stderr advisory format.
61
+ #
62
+ # Format: <skill>: derived <field>=<value> from <source>; <reversibility>
63
+ #
64
+ # This is the single source-of-truth for the advisory sentence shape
65
+ # across all derive-first declaration-skill surfaces. The format is
66
+ # load-bearing for cross-skill consistency — drift here re-opens P132.
67
+ # ---------------------------------------------------------------------------
68
+ emit_stderr_advisory() {
69
+ local skill="$1"
70
+ local field="$2"
71
+ local value="$3"
72
+ local source_desc="$4"
73
+ local reversibility="${5:-re-invoke or update if mis-rated}"
74
+ printf '%s: derived %s=%s from %s; %s\n' \
75
+ "$skill" "$field" "$value" "$source_desc" "$reversibility" >&2
76
+ }
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # derive_kebab_slug — kebab-case slug from prose.
80
+ #
81
+ # Lowercases, strips non-alphanumeric (preserves space and hyphen as
82
+ # token separators), drops stopwords, joins surviving tokens with `-`,
83
+ # caps the token count (default 8 per the SKILL.md surface contract).
84
+ #
85
+ # Used at:
86
+ # - capture-problem Step 1.4 Title derivation
87
+ # - manage-incident Step 4 Title derivation
88
+ # - manage-problem Step 4 Title derivation
89
+ # - create-adr Step 2 Title derivation (P132 Phase 2a-iii-B)
90
+ # ---------------------------------------------------------------------------
91
+ derive_kebab_slug() {
92
+ local description="$1"
93
+ local max_tokens="${2:-8}"
94
+ # Stopword list — common English function words plus "I/you/we" pronouns.
95
+ local stopwords='^(the|a|an|and|or|but|if|then|else|when|while|for|to|of|in|on|at|by|from|with|as|is|are|was|were|be|been|being|have|has|had|do|does|did|will|would|should|could|may|might|must|can|i|you|we|they|it|its|this|that|these|those|so|because|since|just|only|than|like|some|any|all|each|every|no|not)$'
96
+
97
+ printf '%s' "$description" \
98
+ | tr '[:upper:]' '[:lower:]' \
99
+ | tr -c 'a-z0-9 -' ' ' \
100
+ | tr -s ' ' \
101
+ | tr ' ' '\n' \
102
+ | grep -vE "$stopwords" \
103
+ | grep -v '^$' \
104
+ | head -n "$max_tokens" \
105
+ | paste -sd '-' -
106
+ }
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # lexical_classify_two_sided — two-sided binary lexical classifier.
110
+ #
111
+ # Used by capture-problem Step 1.5 Type classification (technical vs
112
+ # user-business). Callers pass description text plus two regex pattern
113
+ # arrays (by name); helper counts hits per side and echoes one of:
114
+ #
115
+ # SIDE_A_UNAMBIGUOUS|<matched signals (comma-separated)>
116
+ # ≥1 side-A signal hit AND 0 side-B signals hit.
117
+ # SIDE_B_UNAMBIGUOUS|<matched signals (comma-separated)>
118
+ # 0 side-A signals hit AND ≥1 side-B signal hit.
119
+ # AMBIGUOUS|<a=N b=N>
120
+ # Mixed (both sides matched) OR zero (neither side matched).
121
+ #
122
+ # Caller is responsible for:
123
+ # - Mapping SIDE_A/SIDE_B to its domain values (e.g. technical / user-business).
124
+ # - Calling emit_stderr_advisory on the unambiguous path.
125
+ # - Firing AskUserQuestion on the AMBIGUOUS path (ADR-044 category-5 taste fallback).
126
+ # ---------------------------------------------------------------------------
127
+ lexical_classify_two_sided() {
128
+ local description="$1"
129
+ local -n _side_a_patterns_ref="$2"
130
+ local -n _side_b_patterns_ref="$3"
131
+ local a_hits=()
132
+ local b_hits=()
133
+ local pattern
134
+
135
+ for pattern in "${_side_a_patterns_ref[@]}"; do
136
+ if printf '%s' "$description" | grep -qiE "$pattern" 2>/dev/null; then
137
+ a_hits+=("$pattern")
138
+ fi
139
+ done
140
+ for pattern in "${_side_b_patterns_ref[@]}"; do
141
+ if printf '%s' "$description" | grep -qiE "$pattern" 2>/dev/null; then
142
+ b_hits+=("$pattern")
143
+ fi
144
+ done
145
+
146
+ local a_count="${#a_hits[@]}"
147
+ local b_count="${#b_hits[@]}"
148
+
149
+ if (( a_count >= 1 && b_count == 0 )); then
150
+ local joined
151
+ joined=$(IFS=,; echo "${a_hits[*]}")
152
+ printf 'SIDE_A_UNAMBIGUOUS|%s\n' "$joined"
153
+ elif (( a_count == 0 && b_count >= 1 )); then
154
+ local joined
155
+ joined=$(IFS=,; echo "${b_hits[*]}")
156
+ printf 'SIDE_B_UNAMBIGUOUS|%s\n' "$joined"
157
+ else
158
+ printf 'AMBIGUOUS|a=%d b=%d\n' "$a_count" "$b_count"
159
+ fi
160
+ }
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # risk_policy_matrix_lookup — RISK-POLICY.md Impact × Likelihood lookup.
164
+ #
165
+ # Used by:
166
+ # - manage-incident Step 4 Severity derivation
167
+ # - manage-problem Step 4 Priority derivation
168
+ #
169
+ # Caller passes description text plus six regex pattern arrays (by
170
+ # name) keyed by impact band (high/mod/low) and likelihood band
171
+ # (high/med/low). Helper echoes one of:
172
+ #
173
+ # <score>|<label>|impact=<L>+likelihood=<L>
174
+ # Single dominant impact band AND single dominant likelihood band
175
+ # matched. Score = impact_val * likelihood_val; label per
176
+ # RISK-POLICY.md § Label Bands (Very Low / Low / Medium / High /
177
+ # Very High).
178
+ # AMBIGUOUS|<reason>
179
+ # Multi-band hit (signals point to conflicting cells) OR zero hit
180
+ # (no mappable signal). Caller fires AskUserQuestion as the
181
+ # genuine ADR-044 category-5 (taste) fallback surface.
182
+ #
183
+ # Band-to-numeric mapping (preserves RISK-POLICY.md Impact / Likelihood
184
+ # Levels table):
185
+ # impact: high = 5 (Severe), mod = 3 (Moderate), low = 1 (Negligible)
186
+ # likelihood: high = 5 (Almost certain), med = 3 (Possible), low = 1 (Rare)
187
+ #
188
+ # Label bands (RISK-POLICY.md):
189
+ # 1-2 Very Low
190
+ # 3-4 Low
191
+ # 5-9 Medium
192
+ # 10-16 High
193
+ # 17-25 Very High
194
+ #
195
+ # This helper preserves the band-to-score mapping; callers that need a
196
+ # wider granularity (e.g. Significant=4 / Minor=2) must extend the
197
+ # pattern arrays' band-buckets in a follow-on contract change.
198
+ # ---------------------------------------------------------------------------
199
+ risk_policy_matrix_lookup() {
200
+ local description="$1"
201
+ local -n _impact_high_ref="$2"
202
+ local -n _impact_mod_ref="$3"
203
+ local -n _impact_low_ref="$4"
204
+ local -n _likelihood_high_ref="$5"
205
+ local -n _likelihood_med_ref="$6"
206
+ local -n _likelihood_low_ref="$7"
207
+
208
+ local pat
209
+ local impact_high_hits=0
210
+ local impact_mod_hits=0
211
+ local impact_low_hits=0
212
+ local likelihood_high_hits=0
213
+ local likelihood_med_hits=0
214
+ local likelihood_low_hits=0
215
+
216
+ for pat in "${_impact_high_ref[@]}"; do
217
+ if printf '%s' "$description" | grep -qiE "$pat" 2>/dev/null; then
218
+ impact_high_hits=$((impact_high_hits + 1))
219
+ fi
220
+ done
221
+ for pat in "${_impact_mod_ref[@]}"; do
222
+ if printf '%s' "$description" | grep -qiE "$pat" 2>/dev/null; then
223
+ impact_mod_hits=$((impact_mod_hits + 1))
224
+ fi
225
+ done
226
+ for pat in "${_impact_low_ref[@]}"; do
227
+ if printf '%s' "$description" | grep -qiE "$pat" 2>/dev/null; then
228
+ impact_low_hits=$((impact_low_hits + 1))
229
+ fi
230
+ done
231
+ for pat in "${_likelihood_high_ref[@]}"; do
232
+ if printf '%s' "$description" | grep -qiE "$pat" 2>/dev/null; then
233
+ likelihood_high_hits=$((likelihood_high_hits + 1))
234
+ fi
235
+ done
236
+ for pat in "${_likelihood_med_ref[@]}"; do
237
+ if printf '%s' "$description" | grep -qiE "$pat" 2>/dev/null; then
238
+ likelihood_med_hits=$((likelihood_med_hits + 1))
239
+ fi
240
+ done
241
+ for pat in "${_likelihood_low_ref[@]}"; do
242
+ if printf '%s' "$description" | grep -qiE "$pat" 2>/dev/null; then
243
+ likelihood_low_hits=$((likelihood_low_hits + 1))
244
+ fi
245
+ done
246
+
247
+ local nonzero_impact=0
248
+ (( impact_high_hits > 0 )) && nonzero_impact=$((nonzero_impact + 1))
249
+ (( impact_mod_hits > 0 )) && nonzero_impact=$((nonzero_impact + 1))
250
+ (( impact_low_hits > 0 )) && nonzero_impact=$((nonzero_impact + 1))
251
+
252
+ if (( nonzero_impact != 1 )); then
253
+ printf 'AMBIGUOUS|impact-bands-hit=%d\n' "$nonzero_impact"
254
+ return 0
255
+ fi
256
+
257
+ local impact_band=0
258
+ local impact_label=""
259
+ if (( impact_high_hits > 0 )); then
260
+ impact_band=5
261
+ impact_label="Severe"
262
+ elif (( impact_mod_hits > 0 )); then
263
+ impact_band=3
264
+ impact_label="Moderate"
265
+ elif (( impact_low_hits > 0 )); then
266
+ impact_band=1
267
+ impact_label="Negligible"
268
+ fi
269
+
270
+ local nonzero_likelihood=0
271
+ (( likelihood_high_hits > 0 )) && nonzero_likelihood=$((nonzero_likelihood + 1))
272
+ (( likelihood_med_hits > 0 )) && nonzero_likelihood=$((nonzero_likelihood + 1))
273
+ (( likelihood_low_hits > 0 )) && nonzero_likelihood=$((nonzero_likelihood + 1))
274
+
275
+ if (( nonzero_likelihood != 1 )); then
276
+ printf 'AMBIGUOUS|likelihood-bands-hit=%d\n' "$nonzero_likelihood"
277
+ return 0
278
+ fi
279
+
280
+ local likelihood_band=0
281
+ local likelihood_label=""
282
+ if (( likelihood_high_hits > 0 )); then
283
+ likelihood_band=5
284
+ likelihood_label="Almost-certain"
285
+ elif (( likelihood_med_hits > 0 )); then
286
+ likelihood_band=3
287
+ likelihood_label="Possible"
288
+ elif (( likelihood_low_hits > 0 )); then
289
+ likelihood_band=1
290
+ likelihood_label="Rare"
291
+ fi
292
+
293
+ local score=$((impact_band * likelihood_band))
294
+ local label
295
+ if (( score >= 17 )); then
296
+ label="Very High"
297
+ elif (( score >= 10 )); then
298
+ label="High"
299
+ elif (( score >= 5 )); then
300
+ label="Medium"
301
+ elif (( score >= 3 )); then
302
+ label="Low"
303
+ else
304
+ label="Very Low"
305
+ fi
306
+
307
+ printf '%d|%s|impact=%s+likelihood=%s\n' \
308
+ "$score" "$label" "$impact_label" "$likelihood_label"
309
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@windyroad/itil",
3
- "version": "0.30.1",
3
+ "version": "0.30.2-preview.317",
4
4
  "description": "ITIL-aligned IT service management for Claude Code (problem, and future incident/change skills)",
5
5
  "bin": {
6
6
  "windyroad-itil": "./bin/install.mjs"
@@ -0,0 +1,304 @@
1
+ #!/usr/bin/env bats
2
+
3
+ bats_require_minimum_version 1.5.0
4
+
5
+ # Behavioural assertions for packages/itil/lib/derive-first-dispatch.sh —
6
+ # the shared derive-first dispatch helper extracted in P132 Phase 2a-iii-A.
7
+ #
8
+ # The helper centralises the dispatch mechanism shipped across three
9
+ # declaration-skill surfaces (capture-problem Step 1.5, manage-incident
10
+ # Step 4, manage-problem Step 4). Each caller passes surface-specific
11
+ # signal definitions; the helper owns:
12
+ #
13
+ # - Slug derivation (Title) from prose
14
+ # - Two-sided lexical classifier (Type for capture-problem)
15
+ # - RISK-POLICY matrix lookup (Severity / Priority)
16
+ # - I2-isomorphic stderr advisory format
17
+ #
18
+ # @problem P132 (agents over-ask in interactive sessions — Phase 2a-iii-A
19
+ # shared helper extraction)
20
+ # @problem P185 (capture-problem Step 1.5 worked-example precedent)
21
+ # @adr ADR-044 (Decision-Delegation Contract — derive-first framework
22
+ # resolution boundary)
23
+ # @adr ADR-026 (cost-source grounding — stderr advisory shape)
24
+ # @adr ADR-052 (behavioural-by-default — these are runtime behaviour
25
+ # assertions on the helper functions, NOT structural greps)
26
+ # @jtbd JTBD-001 (enforce governance without slowing down — primary)
27
+ # @jtbd JTBD-101 (extend the suite with consistent patterns)
28
+
29
+ setup() {
30
+ LIB_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../lib" && pwd)"
31
+ HELPER="${LIB_DIR}/derive-first-dispatch.sh"
32
+ [ -f "$HELPER" ]
33
+ # shellcheck disable=SC1090
34
+ source "$HELPER"
35
+ PKG_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../.." && pwd)"
36
+ REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../.." && pwd)"
37
+ ARCHITECT_PKG_ROOT="$REPO_ROOT/packages/architect"
38
+ }
39
+
40
+ # ----------------------------------------------------------------------
41
+ # Stderr advisory contract (I2-isomorphic format across all 3 surfaces).
42
+ # Format: <skill>: derived <field>=<value> from <source>; <reversibility>
43
+ # ----------------------------------------------------------------------
44
+
45
+ @test "emit_stderr_advisory writes single canonical line to stderr" {
46
+ run -0 bash -c '
47
+ source "'"$HELPER"'"
48
+ emit_stderr_advisory manage-problem title my-slug "description tokens" \
49
+ "re-invoke with the desired title or rename the file if the slug is wrong"
50
+ '
51
+ # stderr captured in $output via run; assert structure
52
+ [[ "$output" == *"manage-problem: derived title=my-slug from description tokens; re-invoke with the desired title"* ]]
53
+ }
54
+
55
+ @test "emit_stderr_advisory uses default reversibility clause when omitted" {
56
+ run -0 bash -c '
57
+ source "'"$HELPER"'"
58
+ emit_stderr_advisory manage-incident severity "9 (Medium)" "RISK-POLICY matrix"
59
+ '
60
+ [[ "$output" == *"manage-incident: derived severity=9 (Medium) from RISK-POLICY matrix;"* ]]
61
+ [[ "$output" == *"re-invoke"* ]] || [[ "$output" == *"update"* ]]
62
+ }
63
+
64
+ @test "emit_stderr_advisory shape is I2-isomorphic across surfaces (same sentence structure)" {
65
+ run -0 bash -c '
66
+ source "'"$HELPER"'"
67
+ emit_stderr_advisory capture-problem type technical "description signals" "re-invoke with --type=user-business to override"
68
+ emit_stderr_advisory manage-incident title incident-slug "description" "re-invoke or rename"
69
+ emit_stderr_advisory manage-problem priority "9 (Medium)" "RISK-POLICY matrix" "re-invoke or update if mis-rated"
70
+ '
71
+ # Each surface emits the same sentence shape: <skill>: derived <field>=<value> from <source>; <clause>
72
+ line_count=$(printf '%s\n' "$output" | grep -c "^[a-z-]*: derived ")
73
+ [ "$line_count" -eq 3 ]
74
+ }
75
+
76
+ # ----------------------------------------------------------------------
77
+ # Kebab-case slug derivation from prose.
78
+ # ----------------------------------------------------------------------
79
+
80
+ @test "derive_kebab_slug produces kebab-case from prose" {
81
+ run -0 bash -c '
82
+ source "'"$HELPER"'"
83
+ derive_kebab_slug "Agent over-asks during interactive sessions"
84
+ '
85
+ [[ "$output" == *"agent"* ]]
86
+ [[ "$output" == *"over"* ]] || [[ "$output" == *"asks"* ]]
87
+ [[ "$output" != *" "* ]]
88
+ [[ "$output" != *"_"* ]]
89
+ }
90
+
91
+ @test "derive_kebab_slug drops stopwords" {
92
+ run -0 bash -c '
93
+ source "'"$HELPER"'"
94
+ derive_kebab_slug "The agent is asking the user a question"
95
+ '
96
+ # stopwords like "the", "a", "is" must NOT appear as standalone tokens
97
+ [[ "$output" != *"-the-"* ]]
98
+ [[ "$output" != "the-"* ]]
99
+ [[ "$output" == *"agent"* ]]
100
+ }
101
+
102
+ @test "derive_kebab_slug caps token count (default 8)" {
103
+ run -0 bash -c '
104
+ source "'"$HELPER"'"
105
+ derive_kebab_slug "one two three four five six seven eight nine ten eleven twelve"
106
+ '
107
+ token_count=$(printf '%s\n' "$output" | tr '-' '\n' | wc -l | tr -d ' ')
108
+ [ "$token_count" -le 8 ]
109
+ }
110
+
111
+ @test "derive_kebab_slug accepts custom token count" {
112
+ run -0 bash -c '
113
+ source "'"$HELPER"'"
114
+ derive_kebab_slug "alpha beta gamma delta epsilon zeta" 3
115
+ '
116
+ token_count=$(printf '%s\n' "$output" | tr '-' '\n' | wc -l | tr -d ' ')
117
+ [ "$token_count" -le 3 ]
118
+ }
119
+
120
+ # ----------------------------------------------------------------------
121
+ # Two-sided lexical classifier (capture-problem Step 1.5 mechanism).
122
+ # Returns:
123
+ # SIDE_A_UNAMBIGUOUS|<matched signals> — ≥1 A hit AND 0 B hits
124
+ # SIDE_B_UNAMBIGUOUS|<matched signals> — 0 A hits AND ≥1 B hit
125
+ # AMBIGUOUS|<reason> — mixed (both sides) OR zero
126
+ # ----------------------------------------------------------------------
127
+
128
+ @test "lexical_classify_two_sided returns SIDE_A_UNAMBIGUOUS on technical-only signals" {
129
+ run -0 bash -c '
130
+ source "'"$HELPER"'"
131
+ side_a=("\\b(hook|gate|regex|stderr|stdout|drift|TTL|cache)\\b")
132
+ side_b=("\\b(adopter|UX|friction|JTBD-[0-9]+)\\b")
133
+ lexical_classify_two_sided "the hook fires on stderr and the cache invalidates" side_a side_b
134
+ '
135
+ [[ "$output" == "SIDE_A_UNAMBIGUOUS|"* ]]
136
+ }
137
+
138
+ @test "lexical_classify_two_sided returns SIDE_B_UNAMBIGUOUS on user-business-only signals" {
139
+ run -0 bash -c '
140
+ source "'"$HELPER"'"
141
+ side_a=("\\b(hook|gate|regex|stderr|stdout|drift|TTL|cache)\\b")
142
+ side_b=("\\b(adopter|UX|friction|JTBD-[0-9]+)\\b")
143
+ lexical_classify_two_sided "the adopter friction makes JTBD-101 hard to complete" side_a side_b
144
+ '
145
+ [[ "$output" == "SIDE_B_UNAMBIGUOUS|"* ]]
146
+ }
147
+
148
+ @test "lexical_classify_two_sided returns AMBIGUOUS on mixed signals" {
149
+ run -0 bash -c '
150
+ source "'"$HELPER"'"
151
+ side_a=("\\b(hook|gate|regex|stderr)\\b")
152
+ side_b=("\\b(adopter|UX|friction)\\b")
153
+ lexical_classify_two_sided "the hook causes adopter friction" side_a side_b
154
+ '
155
+ [[ "$output" == "AMBIGUOUS|"* ]]
156
+ }
157
+
158
+ @test "lexical_classify_two_sided returns AMBIGUOUS on zero signals" {
159
+ run -0 bash -c '
160
+ source "'"$HELPER"'"
161
+ side_a=("\\b(hook|gate)\\b")
162
+ side_b=("\\b(adopter|UX)\\b")
163
+ lexical_classify_two_sided "totally bland text with no signals at all" side_a side_b
164
+ '
165
+ [[ "$output" == "AMBIGUOUS|"* ]]
166
+ }
167
+
168
+ # ----------------------------------------------------------------------
169
+ # RISK-POLICY matrix lookup (manage-incident / manage-problem mechanism).
170
+ # Returns:
171
+ # <score>|<label>|impact=<L>+likelihood=<L> — clear single-cell match
172
+ # AMBIGUOUS|<reason> — multi-band or zero match
173
+ # ----------------------------------------------------------------------
174
+
175
+ @test "risk_policy_matrix_lookup returns clear cell on unambiguous impact + likelihood signals" {
176
+ run -0 bash -c '
177
+ source "'"$HELPER"'"
178
+ impact_high=("\\b(down|outage|data loss|unavailable)\\b")
179
+ impact_mod=("\\b(slow|latency|degraded)\\b")
180
+ impact_low=("\\b(typo|cosmetic)\\b")
181
+ likelihood_high=("\\b(every request|reproducible|always)\\b")
182
+ likelihood_med=("\\b(intermittent|flaky)\\b")
183
+ likelihood_low=("\\b(one-off|single)\\b")
184
+ risk_policy_matrix_lookup "service is down on every request" impact_high impact_mod impact_low likelihood_high likelihood_med likelihood_low
185
+ '
186
+ # Expect impact=high(5) + likelihood=high(5) -> score=25, label=Very High
187
+ [[ "$output" == "25|"* ]] || [[ "$output" == "20|"* ]] || [[ "$output" == "15|"* ]]
188
+ [[ "$output" == *"High"* ]] || [[ "$output" == *"Very High"* ]]
189
+ }
190
+
191
+ @test "risk_policy_matrix_lookup returns AMBIGUOUS on multi-band impact" {
192
+ run -0 bash -c '
193
+ source "'"$HELPER"'"
194
+ impact_high=("\\b(down)\\b")
195
+ impact_mod=("\\b(slow)\\b")
196
+ impact_low=("\\b(typo)\\b")
197
+ likelihood_high=("\\b(every request)\\b")
198
+ likelihood_med=("\\b(intermittent)\\b")
199
+ likelihood_low=("\\b(one-off)\\b")
200
+ risk_policy_matrix_lookup "service is down and slow with typo" impact_high impact_mod impact_low likelihood_high likelihood_med likelihood_low
201
+ '
202
+ [[ "$output" == "AMBIGUOUS|"* ]]
203
+ }
204
+
205
+ @test "risk_policy_matrix_lookup returns AMBIGUOUS when no signals match" {
206
+ run -0 bash -c '
207
+ source "'"$HELPER"'"
208
+ impact_high=("\\b(down)\\b")
209
+ impact_mod=("\\b(slow)\\b")
210
+ impact_low=("\\b(typo)\\b")
211
+ likelihood_high=("\\b(every request)\\b")
212
+ likelihood_med=("\\b(intermittent)\\b")
213
+ likelihood_low=("\\b(one-off)\\b")
214
+ risk_policy_matrix_lookup "totally bland text" impact_high impact_mod impact_low likelihood_high likelihood_med likelihood_low
215
+ '
216
+ [[ "$output" == "AMBIGUOUS|"* ]]
217
+ }
218
+
219
+ @test "risk_policy_matrix_lookup label band aligns with RISK-POLICY.md (Medium = 5-9)" {
220
+ # Verify a specific clear-cell mapping produces the RISK-POLICY-canonical label.
221
+ # impact=mod (3) * likelihood=high (5) = 15 -> "High" band (10-16)
222
+ run -0 bash -c '
223
+ source "'"$HELPER"'"
224
+ impact_high=("\\b(down)\\b")
225
+ impact_mod=("\\b(slow)\\b")
226
+ impact_low=("\\b(typo)\\b")
227
+ likelihood_high=("\\b(every request)\\b")
228
+ likelihood_med=("\\b(intermittent)\\b")
229
+ likelihood_low=("\\b(one-off)\\b")
230
+ risk_policy_matrix_lookup "the service is slow on every request" impact_high impact_mod impact_low likelihood_high likelihood_med likelihood_low
231
+ '
232
+ [[ "$output" == "15|High|"* ]]
233
+ }
234
+
235
+ # ----------------------------------------------------------------------
236
+ # Cross-skill consistency: all 4 SKILL.md surfaces reference the helper
237
+ # as the shared dispatch mechanism. The I2-isomorphic stderr advisory
238
+ # format is locked-in by reference to derive-first-dispatch.sh.
239
+ #
240
+ # Phase 2a-iii-B (2026-05-16): 4th adopter wr-architect:create-adr added.
241
+ # Helper canonical source moved to packages/shared/ per ADR-017 sync
242
+ # pattern; per-package lib/ copies in packages/itil/lib/ and
243
+ # packages/architect/lib/ stay byte-identical via scripts/sync-derive-first-dispatch.sh.
244
+ # ----------------------------------------------------------------------
245
+
246
+ @test "capture-problem Step 1.5 cross-references derive-first-dispatch.sh helper" {
247
+ run grep -c "derive-first-dispatch\\.sh\\|packages/itil/lib/derive-first-dispatch" \
248
+ "${PKG_ROOT}/skills/capture-problem/SKILL.md"
249
+ [ "$status" -eq 0 ]
250
+ [ "$output" -ge 1 ]
251
+ }
252
+
253
+ @test "manage-incident Step 4 cross-references derive-first-dispatch.sh helper" {
254
+ run grep -c "derive-first-dispatch\\.sh\\|packages/itil/lib/derive-first-dispatch" \
255
+ "${PKG_ROOT}/skills/manage-incident/SKILL.md"
256
+ [ "$status" -eq 0 ]
257
+ [ "$output" -ge 1 ]
258
+ }
259
+
260
+ @test "manage-problem Step 4 cross-references derive-first-dispatch.sh helper" {
261
+ run grep -c "derive-first-dispatch\\.sh\\|packages/itil/lib/derive-first-dispatch" \
262
+ "${PKG_ROOT}/skills/manage-problem/SKILL.md"
263
+ [ "$status" -eq 0 ]
264
+ [ "$output" -ge 1 ]
265
+ }
266
+
267
+ @test "create-adr Step 2 cross-references derive-first-dispatch.sh helper (Phase 2a-iii-B 4th adopter)" {
268
+ # The 4th adopter (architect package) sources from its own per-package
269
+ # lib/ copy (NOT cross-package from itil) per ADR-017.
270
+ run grep -c "derive-first-dispatch\\.sh\\|packages/architect/lib/derive-first-dispatch" \
271
+ "${ARCHITECT_PKG_ROOT}/skills/create-adr/SKILL.md"
272
+ [ "$status" -eq 0 ]
273
+ [ "$output" -ge 1 ]
274
+ }
275
+
276
+ @test "helper file documents its four caller surfaces (audit trail)" {
277
+ # The helper's header comment must name the four SKILL.md surfaces it
278
+ # serves so the audit trail is recoverable from the helper itself.
279
+ # Phase 2a-iii-B adds create-adr as the 4th adopter.
280
+ run grep -E "capture-problem" "$HELPER"
281
+ [ "$status" -eq 0 ]
282
+ run grep -E "manage-incident" "$HELPER"
283
+ [ "$status" -eq 0 ]
284
+ run grep -E "manage-problem" "$HELPER"
285
+ [ "$status" -eq 0 ]
286
+ run grep -E "create-adr" "$HELPER"
287
+ [ "$status" -eq 0 ]
288
+ }
289
+
290
+ @test "per-package lib/ copies are byte-identical to canonical packages/shared/ source (ADR-017)" {
291
+ # Phase 2a-iii-B + ADR-017: canonical at packages/shared/, synced copies
292
+ # in per-package lib/. The sync script (scripts/sync-derive-first-dispatch.sh)
293
+ # in --check mode is the CI guard; this test asserts the post-condition.
294
+ local shared_src="${REPO_ROOT}/packages/shared/derive-first-dispatch.sh"
295
+ local itil_copy="${REPO_ROOT}/packages/itil/lib/derive-first-dispatch.sh"
296
+ local architect_copy="${REPO_ROOT}/packages/architect/lib/derive-first-dispatch.sh"
297
+ [ -f "$shared_src" ]
298
+ [ -f "$itil_copy" ]
299
+ [ -f "$architect_copy" ]
300
+ run diff -q "$shared_src" "$itil_copy"
301
+ [ "$status" -eq 0 ]
302
+ run diff -q "$shared_src" "$architect_copy"
303
+ [ "$status" -eq 0 ]
304
+ }
@@ -75,6 +75,8 @@ Derive a kebab-case title slug from the first 8-10 non-stopword tokens of the de
75
75
 
76
76
  ### 1.5 Type classification (derive-first; silent-framework per ADR-044 category 4; taste fallback per category 5 on ambiguity)
77
77
 
78
+ **Shared dispatch helper**: this surface invokes `packages/itil/lib/derive-first-dispatch.sh` for the canonical lexical-classifier mechanism + I2-isomorphic stderr advisory format. The helper is sourced by `/wr-itil:capture-problem`, `/wr-itil:manage-incident`, and `/wr-itil:manage-problem`; drift in the advisory shape re-opens P132. Surface-specific signal definitions (technical-vs-user-business regex lists) stay inline below — the helper owns the mechanism, not the per-surface signals (architect verdict 2026-05-15 P132 Phase 2a-iii-A: "Helper must preserve per-surface signal definitions; only the dispatch mechanism is shared").
79
+
78
80
  Resolve `type_value` ∈ {`technical`, `user-business`} per the following framework-mediated dispatch. **The dispatch order is load-bearing** — pre-resolution flags short-circuit BEFORE the classifier runs, and the AskUserQuestion fires ONLY on genuinely-ambiguous descriptions.
79
81
 
80
82
  1. **If `--type=<value>` was set in Step 1**: use that value; do NOT run the classifier; do NOT fire AskUserQuestion (silent-proceed per ADR-013 Rule 5).
@@ -101,13 +103,13 @@ Resolve `type_value` ∈ {`technical`, `user-business`} per the following framew
101
103
  - `technical` — *"Bug, defect, broken behaviour, framework drift — root cause sits in code or process."*
102
104
  - `user-business` — *"Missing capability, UX gap, adopter friction, JTBD-shaped need — root cause sits in unmet user need."*
103
105
 
104
- **Stderr advisory contract** (silent-classification path only): emit a SINGLE line to stderr (NOT stdout, NOT in the ticket body) of the form:
106
+ **Stderr advisory contract** (silent-classification path only): emit a SINGLE line to stderr (NOT stdout, NOT in the ticket body) via the shared helper's `emit_stderr_advisory` function in `packages/itil/lib/derive-first-dispatch.sh`. The canonical format produced by the helper:
105
107
 
106
108
  ```
107
- capture-problem: classified type=<value> from description signals: <signal1>, <signal2>[, ...]; re-invoke with --type=<other-value> to override
109
+ capture-problem: derived type=<value> from description signals: <signal1>, <signal2>[, ...]; re-invoke with --type=<other-value> to override
108
110
  ```
109
111
 
110
- The advisory text shape is I2-isomorphic — the sentence structure (`classified type=<value> from description signals: ...; re-invoke with --type=<other-value> to override`) is identical regardless of which type was classified; only the substituted `<value>` / `<other-value>` / `<signal*>` tokens differ. Embedding the advisory in stdout would risk machine-readers parsing it as a ticket-body line; embedding it in the ticket body would violate ADR-060's frontmatter / body-bullet schema. Stderr is the correct channel — visible to interactive maintainers in the terminal; invisible to ticket consumers; loggable by AFK orchestrators that capture subprocess stderr.
112
+ The advisory text shape is I2-isomorphic — same sentence structure (`<skill>: derived <field>=<value> from <source>; <reversibility>`) across all three derive-first declaration-skill surfaces. The helper is the single source-of-truth for this format; drift here re-opens P132. Embedding the advisory in stdout would risk machine-readers parsing it as a ticket-body line; embedding it in the ticket body would violate ADR-060's frontmatter / body-bullet schema. Stderr is the correct channel — visible to interactive maintainers in the terminal; invisible to ticket consumers; loggable by AFK orchestrators that capture subprocess stderr.
111
113
 
112
114
  **I2 invariant guard (ADR-060 line 98)**: the resolved `type_value` is used at Step 4 ONLY as a substituted string in the skeleton template's `**Type**:` body field. Steps 2, 3, 4 (other than the `**Type**:` substitution), 5, 6, 7 execute identically regardless of `type_value`. The skill carries NO control-flow branch keyed on `type` — that would convert classification into a workflow split and violate I2. The lexical-signal classifier is UPSTREAM of the value's substitution (it resolves WHICH value to substitute, not WHICH workflow to execute); the substitution and all downstream steps remain uniform. Pure-bash supporting-script enforcement of this invariant lives in `packages/itil/scripts/test/i2-no-type-branching.bats`; the SKILL.md surface coverage gap is named at P176 (descendant of P012 master harness).
113
115
 
@@ -459,12 +459,17 @@ classify_description() {
459
459
  # I2 leak through the back door.
460
460
  # ---------------------------------------------------------------------------
461
461
 
462
- # Mirror of the SKILL.md advisory template.
462
+ # Mirror of the SKILL.md advisory template. P132 Phase 2a-iii-A renamed
463
+ # the verb from `classified` to `derived` to align with the shared helper
464
+ # `packages/itil/lib/derive-first-dispatch.sh`'s emit_stderr_advisory
465
+ # function — I2-isomorphic format `<skill>: derived <field>=<value> from
466
+ # <source>; <reversibility>` across all three derive-first declaration-skill
467
+ # surfaces.
463
468
  format_stderr_advisory() {
464
469
  local resolved_type="$1"
465
470
  local other_type="$2"
466
471
  local signals="$3"
467
- printf 'capture-problem: classified type=%s from description signals: %s; re-invoke with --type=%s to override\n' \
472
+ printf 'capture-problem: derived type=%s from description signals: %s; re-invoke with --type=%s to override\n' \
468
473
  "$resolved_type" "$signals" "$other_type"
469
474
  }
470
475
 
@@ -492,14 +497,17 @@ strip_substituted_tokens() {
492
497
  }
493
498
 
494
499
  @test "P185: stderr advisory does NOT prefix with type-value when describing the contract" {
495
- # The shape `classified type=<value> from description signals: <list>;
500
+ # The shape `derived type=<value> from description signals: <list>;
496
501
  # re-invoke with --type=<other> to override` — the leading prose
497
- # "capture-problem: classified type=" must be identical regardless of
498
- # type value (substitution happens AFTER the equals sign).
502
+ # "capture-problem: derived type=" must be identical regardless of
503
+ # type value (substitution happens AFTER the equals sign). P132 Phase
504
+ # 2a-iii-A renamed `classified` -> `derived` to align with the shared
505
+ # helper `packages/itil/lib/derive-first-dispatch.sh`'s I2-isomorphic
506
+ # format across all three declaration-skill surfaces.
499
507
  tech_msg=$(format_stderr_advisory technical user-business "sig")
500
508
  ub_msg=$(format_stderr_advisory user-business technical "sig")
501
- echo "$tech_msg" | grep -q '^capture-problem: classified type='
502
- echo "$ub_msg" | grep -q '^capture-problem: classified type='
509
+ echo "$tech_msg" | grep -q '^capture-problem: derived type='
510
+ echo "$ub_msg" | grep -q '^capture-problem: derived type='
503
511
  }
504
512
 
505
513
  # ---------------------------------------------------------------------------
@@ -154,6 +154,8 @@ echo "$next"
154
154
 
155
155
  ### 4. For new incidents: Gather information (P132 derive-first; ADR-044 category-4 silent-framework on derivable fields; category-1 direction-setting fallback only on Scope)
156
156
 
157
+ **Shared dispatch helper**: this surface invokes `packages/itil/lib/derive-first-dispatch.sh` for the canonical slug derivation (Title), RISK-POLICY matrix lookup (Severity), and I2-isomorphic stderr advisory format. The helper is sourced by `/wr-itil:capture-problem`, `/wr-itil:manage-incident`, and `/wr-itil:manage-problem`; drift in the advisory shape re-opens P132. Surface-specific signal definitions (severity impact / likelihood regex lists, start-time evidence sources) stay inline below — the helper owns the mechanism, not the per-surface signals (architect verdict 2026-05-15 P132 Phase 2a-iii-A: "Helper must preserve per-surface signal definitions; only the dispatch mechanism is shared").
158
+
157
159
  **Derive-first dispatch.** Incident declarations carry observable evidence in the user's prose, the working tree, `RISK-POLICY.md`, and the wall-clock — the framework can resolve most fields without firing `AskUserQuestion`. Only **Scope** is genuinely user-judgment (semantic blast-radius the framework cannot infer); only **Scope** retains the AskUserQuestion gate.
158
160
 
159
161
  The P132 inverse-P078 trap (`docs/problems/known-error/132-...md`) is the load-bearing motivation: the I001 declaration regression fired a 4-question AskUserQuestion with 3 of 4 sub-questions being lazy classifications (Title kebab-derivable, Severity matrix-derivable, Start time git-log-derivable). This dispatch closes that regression on the manage-incident surface and mirrors `/wr-itil:capture-problem` Step 1.5's worked-example pattern (P185 derive-first refactor).
@@ -376,6 +376,8 @@ If the local choice would have collided with an origin ticket created since the
376
376
 
377
377
  ### 4. For new problems: Gather information (P132 derive-first; ADR-044 category-4 silent-framework on derivable fields; category-1 direction-setting fallback only on Description)
378
378
 
379
+ **Shared dispatch helper**: this surface invokes `packages/itil/lib/derive-first-dispatch.sh` for the canonical slug derivation (Title), RISK-POLICY matrix lookup (Priority), and I2-isomorphic stderr advisory format. The helper is sourced by `/wr-itil:capture-problem`, `/wr-itil:manage-incident`, and `/wr-itil:manage-problem`; drift in the advisory shape re-opens P132. Surface-specific signal definitions (priority impact / likelihood regex lists) stay inline below — the helper owns the mechanism, not the per-surface signals (architect verdict 2026-05-15 P132 Phase 2a-iii-A: "Helper must preserve per-surface signal definitions; only the dispatch mechanism is shared").
380
+
379
381
  **Derive-first dispatch.** Problem-declaration inputs carry observable evidence in the user's prose, the working tree, `RISK-POLICY.md`, and the wall-clock — the framework can resolve most fields without firing `AskUserQuestion`. Only **Description** is genuinely user-knowledge (without prose there is literally nothing to capture); only **Description** retains the AskUserQuestion gate.
380
382
 
381
383
  The P132 inverse-P078 trap (`docs/problems/known-error/132-...md`) is the load-bearing motivation. The 2026-05-06 I001 declaration regression cited in P132 fired a 4-question AskUserQuestion with 3 of 4 sub-questions being lazy classifications (Title kebab-derivable, Severity matrix-derivable, Start time git-log-derivable). manage-problem Step 4 is the second declaration-skill surface under Phase 2a (after manage-incident Step 4 in commit b7cc645) to ship the derive-first dispatch. The pattern is isomorphic across `/wr-itil:capture-problem` Step 1.5 (P185 worked example), `/wr-itil:manage-incident` Step 4, and this skill.
@@ -371,7 +371,7 @@ rm -f "$ITER_JSON"
371
371
 
372
372
  1. **Context**: this is one iteration of the AFK work-problems loop. The user is AFK. The orchestrator selected `P<NNN> (<title>)` as the highest-WSJF actionable ticket.
373
373
  2. **Task**: apply the `/wr-itil:manage-problem` workflow for `work highest WSJF problem that can be progressed non-interactively as the user is AFK`. Follow manage-problem SKILL.md verbatim, including architect / jtbd / style-guide / voice-tone gate reviews and the commit gate (manage-problem Step 11). Because this subprocess has the Agent tool in its own surface, the normal review-via-subagent paths work — no inline-verdict fallback needed.
374
- 3. **Constraints**: commit the completed work per ADR-014. Do NOT push, do NOT run `push:watch`, do NOT run `release:watch` — the orchestrator's Step 6.5 owns release cadence. Do NOT invoke `capture-*` background skills (AFK carve-out — ADR-032). Do NOT use `ScheduleWakeup` under any circumstance (P083 — iteration workers must not self-reschedule). **NEVER call `AskUserQuestion` mid-loop in AFK** (P135 / ADR-044): direction / deviation-approval / one-time-override / silent-framework observations queue at `ITERATION_SUMMARY.outstanding_questions` for loop-end batched presentation. Per-iter `AskUserQuestion` calls are sub-contracting framework-resolved decisions back to the user (lazy deferral per Step 2d Ask Hygiene Pass classification). Non-interactive defaults apply per ADR-013 Rule 6 + ADR-044's framework-resolution boundary. **Treat the user as transient** (P130): even when observably present at orchestrator dispatch time, the user may answer one question and disappear for hours; presence is not a reliable signal and is not the goal. The iter's job is to progress the ticket and accumulate questions for batched surfacing — not to ask "is it OK to proceed?" at a mechanical-stage boundary. **Do NOT poll `bats` output with a bats-console-summary regex against TAP-format output** (P146 — bash until-loop-deadlock antipattern). The bats-console-summary line `<N> tests, <M> failures` is emitted ONLY by bats's *default* (non-TAP) formatter; `bats --tap` does not emit a console summary, so a polling loop of shape `until [ -f $OUT ] && grep -qE '^[0-9]+ tests?,' $OUT; do sleep 5; done` spins forever after bats completes (silent deadlock — no error, no exit; recovery requires manual SIGTERM with metadata loss per the P146/P147 stuck-before-emit subclass). When you need to wait on a backgrounded bats run, prefer `wait $bg_pid` (Unix idiom — completion signaled by process exit, no regex required) or, for the Bash tool, `run_in_background=true` + `BashOutput` polling on the tool's exit-state field rather than regex-poll on stdout. If you genuinely must regex-poll TAP output, anchor on the TAP plan line `^[0-9]+\.\.[0-9]+` (e.g. `1..1455`) — TAP's plan line is emitted on completion and is format-stable across bats versions; the bats-console-summary line is not. The console-summary vs TAP-format divergence is the load-bearing detail: `bats` and `bats --tap` produce structurally different stdout, and the antipattern assumes the former when iter dispatch typically uses the latter.
374
+ 3. **Constraints**: commit the completed work per ADR-014. Do NOT push, do NOT run `push:watch`, do NOT run `release:watch` — the orchestrator's Step 6.5 owns release cadence. Do NOT invoke `capture-*` background skills (AFK carve-out — ADR-032). Do NOT use `ScheduleWakeup` under any circumstance (P083 — iteration workers must not self-reschedule). **NEVER call `AskUserQuestion` mid-loop in AFK** (P135 / ADR-044): direction / deviation-approval / one-time-override / silent-framework observations queue at `ITERATION_SUMMARY.outstanding_questions` for loop-end batched presentation. Per-iter `AskUserQuestion` calls are sub-contracting framework-resolved decisions back to the user (lazy deferral per Step 2d Ask Hygiene Pass classification). Non-interactive defaults apply per ADR-013 Rule 6 + ADR-044's framework-resolution boundary. **Treat the user as transient** (P130): even when observably present at orchestrator dispatch time, the user may answer one question and disappear for hours; presence is not a reliable signal and is not the goal. The iter's job is to progress the ticket and accumulate questions for batched surfacing — not to ask "is it OK to proceed?" at a mechanical-stage boundary. **Do NOT poll `bats` output with a bats-console-summary regex against TAP-format output** (P146 — bash until-loop-deadlock antipattern). The bats-console-summary line `<N> tests, <M> failures` is emitted ONLY by bats's *default* (non-TAP) formatter; `bats --tap` does not emit a console summary, so a polling loop of shape `until [ -f $OUT ] && grep -qE '^[0-9]+ tests?,' $OUT; do sleep 5; done` spins forever after bats completes (silent deadlock — no error, no exit; recovery requires manual SIGTERM with metadata loss per the P146/P147 stuck-before-emit subclass). When you need to wait on a backgrounded bats run, prefer `wait $bg_pid` (Unix idiom — completion signaled by process exit, no regex required) or, for the Bash tool, `run_in_background=true` + `BashOutput` polling on the tool's exit-state field rather than regex-poll on stdout. If you genuinely must regex-poll TAP output, anchor on the TAP plan line `^[0-9]+\.\.[0-9]+` (e.g. `1..1455`) — TAP's plan line is emitted on completion and is format-stable across bats versions; the bats-console-summary line is not. The console-summary vs TAP-format divergence is the load-bearing detail: `bats` and `bats --tap` produce structurally different stdout, and the antipattern assumes the former when iter dispatch typically uses the latter. **Do NOT poll subprocess completion with `pgrep -f '<pattern>'` inside an `until` / `while` loop** (P232 — self-referential pgrep deadlock; sibling variant of P146). `pgrep -f` matches against the FULL command line of every running process, so the polling loop's own `zsh -c` argument (which contains the literal `pgrep -f '<pattern>'` text) matches itself; with multiple concurrent polling loops, each loop matches the others and spins forever. Worked example of the antipattern: `until ! pgrep -f 'bats --recursive' > /dev/null 2>&1; do sleep 5; done` — the 2026-05-16 P232 deadlock witness; 4 concurrent polling loops each matched the others' command lines while no actual bats process ran; 45 min wall-clock + $20-30 wasted before manual SIGTERM. The same self-reference shape applies to `while pgrep -f ...; do sleep; done` and to `until ! pkill -0 -f '<pattern>'` / `while pkill -0 -f '<pattern>'` (signal-0 polling). The structural fix is the same as P146: prefer `wait $bg_pid` (Unix idiom — shell-native completion signal, no regex / no pgrep) or Bash-tool `run_in_background=true` + `BashOutput` polling (harness-tracked completion state). The hook `packages/itil/hooks/itil-bash-polling-antipattern-detect.sh` denies these shapes at PreToolUse:Bash, but the prompt rule belongs here too — structural enforcement + prompt discipline together close the class.
375
375
  4. **Retro-on-exit (P086)**: before emitting `ITERATION_SUMMARY`, invoke `/wr-retrospective:run-retro`. Retro runs INSIDE this subprocess so its Step 2b pipeline-instability scan has access to the iteration's rich tool-call history (hook misbehaviour, repeat-workaround patterns, subagent-delegation friction, release-path instability). Retro may create tickets or update `docs/BRIEFING.md` — run-retro commits its own work per ADR-014; any tickets it creates ride into either the iteration's own commit (if retro runs before the main commit) or a retro-owned follow-up commit, and the orchestrator picks them up on the next Step 1 scan. Proceed to `ITERATION_SUMMARY` emission regardless of retro findings — retro is non-blocking (do not block on retro): if retro fails or surfaces findings, the iteration still returns a summary so the AFK loop does not silently halt on a flaky retro run.
376
376
  5. **Output**: end the final message with the `ITERATION_SUMMARY` block defined below — this is how the orchestrator consumes the iteration's result.
377
377
 
@@ -775,6 +775,7 @@ When every skipped ticket is in the `upstream-blocked` category (stop-condition
775
775
 
776
776
  - **P121** (`docs/problems/121-afk-orchestrator-should-sigterm-stuck-subprocesses-after-idle-timeout.verifying.md`) — driver for Step 5's backgrounded-poll-loop dispatch shape (replacing the prior foreground-synchronous form) and the idle-timeout SIGTERM branch. The 2026-04-25 P118 iter 5 evidence: an iteration subprocess sat idle ~70 min after its final commit, then SIGTERM produced a clean JSON exit-flush. Fix: orchestrator backgrounds the subprocess, polls every 60s, computes `LAST_ACTIVITY_MARK = max(DISPATCH_START_EPOCH, git log -1 --format=%at HEAD)`, and sends SIGTERM when `now - LAST_ACTIVITY_MARK > WORK_PROBLEMS_IDLE_TIMEOUT_S` (default 3600s = 60 min). Behavioural second-source: `test/work-problems-step-5-idle-timeout-sigterm.bats` exercises a fake `claude -p` shim that sleeps past the threshold and asserts SIGTERM, JSON exit-flush, env-var override, and within-threshold no-fire. Step 6's per-iter progress line SHOULD annotate `(SIGTERM_SENT)` when the branch fires so users can distinguish recovered iters from natural completions. ADR-032's subprocess-boundary variant amended 2026-04-26 with the backgrounded-poll-loop refinement.
777
777
  - **P146** (`docs/problems/146-afk-iteration-subprocess-bash-until-loop-polls-bats-output-with-bats-console-regex-against-tap-format.verifying.md`) — driver for Step 5 iteration prompt body's bats-output-polling-discipline clause. The 2026-04-29 incident (iter 1, PID 23580 child PID 16408) saw a `bash until`-loop poll a backgrounded bats output file with regex `^[0-9]+ tests?,` (bats's *default* console-summary format) against `bats --tap` output that never emits that line — silent infinite spin after bats completed; manual SIGTERM at 68m34s wall-clock; metadata loss per the P147 stuck-before-emit subclass. The polling idiom is NOT taught by any SKILL.md (audit confirmed via repo grep) — it is agent-learned from training data. Fix: prompt-discipline rule in the iteration prompt body's Constraints list explicitly forbidding the antipattern, naming `wait $bg_pid` (or Bash-tool `run_in_background=true` + `BashOutput`) as the safe substitute, and citing the TAP-vs-console-summary divergence so future contributors don't "fix" the rule incorrectly. Behavioural second-source: `test/work-problems-step-5-bats-polling-discipline.bats` asserts the prohibition phrase, the safe-substitute pointer, the P146 cite, the divergence explanation, and the Related-section cite.
778
+ - **P232** (`docs/problems/verifying/232-bash-until-loop-pgrep-self-referential-deadlock-new-variant-of-p146.md`) — sibling variant of P146; driver for the second clause in Step 5 iter prompt's polling-discipline rule plus the structural PreToolUse:Bash hook at `packages/itil/hooks/itil-bash-polling-antipattern-detect.sh`. The 2026-05-16 incident (iter 4, P132 Phase 2a-iii-B) saw 4 concurrent `until ! pgrep -f 'bats --recursive'` polling loops each match the OTHER loops' command lines and spin forever after the main commit landed; 45 min wall-clock + $20-30 wasted before manual SIGTERM. Two-layer fix: prompt-discipline clause naming the self-reference failure mode with worked-example syntax (`until ! pgrep -f ...`), PLUS PreToolUse:Bash hook denying `(until|while)[[:space:]]+!?[[:space:]]*(pgrep|pkill[[:space:]]+-0)` shapes with a deny message citing P232 and naming both recovery alternatives (`wait $bg_pid` shell-native, Bash-tool `BashOutput` harness-native). Behavioural second-source: `packages/itil/hooks/test/itil-bash-polling-antipattern-detect.bats` (positive cases — until/while pgrep, until/while pkill -0, heredoc; negative cases — one-shot pgrep, non-`-0` pkill, unrelated until/while, `wait $!`; advisory-message content cite). P146 prompt-only enforcement failed empirically in iter 4 of the very loop that ships it; P232 closes the class with structural enforcement.
778
779
  - **P147** (`docs/problems/147-p121-sigterm-clean-flush-guarantee-conditional-needs-skill-md-caveat-for-stuck-before-emit-subclass.verifying.md`) — refinement to P121's "clean exit-flush" claim. P118's evidence held only for subprocesses that had already emitted `ITERATION_SUMMARY` before going idle; the 2026-04-29 P146 incident produced exit 143 + 0-byte JSON when SIGTERM fired before `ITERATION_SUMMARY` emission. Fix: SKILL.md prose now carries the conditional caveat (Step 5 "SIGTERM exit-flush is conditional, not universal" subsection) and adopters reading the prose are directed to treat exit 143 + 0-byte JSON as a metadata-loss event — verify work integrity from `git log` + `git status --porcelain`, halt the AFK loop, and reconstruct cost from the Anthropic billing dashboard. Behavioural second-source extends `test/work-problems-step-5-idle-timeout-sigterm.bats` with a stuck-before-emit fake-shim asserting `JSON_BYTES=0` after SIGTERM. Mechanism unchanged (SIGTERM remains the right recovery primitive); the refinement is documentation accuracy + the metadata-loss-event handling shape.
779
780
  - **P089** (`docs/problems/089-work-problems-step-5-dispatch-robustness-stdin-warning-and-cost-metadata-edge-case.verifying.md`) — driver for Step 5's `< /dev/null` dispatch redirect and the Per-iteration cost metadata "Authority hierarchy" paragraph. Gap 1: stdin warning contaminated stderr-merged JSON captures; closed by adding `< /dev/null` to the canonical dispatch command. Gap 2: `.usage.*` undercounts when subprocess exits via a background-task completion ack while `.total_cost_usd` stays cumulative-authoritative; closed by documenting the authority hierarchy in Step 5 and the Session Cost output section so adopters trust cost and label token totals best-effort.
780
781
  - **P086** (`docs/problems/086-afk-iteration-subprocess-does-not-run-retro-before-returning.verifying.md`) — driver for Step 5's retro-on-exit clause. Iteration subprocesses exit without running retro, so per-iteration friction (hook misbehaviour, repeat-workaround patterns, pipeline instability) evaporates on exit. Fix: iteration prompt body names `/wr-retrospective:run-retro` as a closing step before `ITERATION_SUMMARY` emission; retro runs inside the subprocess so Step 2b pipeline-instability scan has the full tool-call history; run-retro commits its own work per ADR-014; orchestrator picks up retro-created tickets on the next Step 1 scan.