@windyroad/itil 0.18.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/hooks/hooks.json +6 -0
- package/hooks/itil-assistant-output-gate.sh +69 -0
- package/hooks/itil-assistant-output-review.sh +72 -0
- package/hooks/lib/detectors.sh +90 -0
- package/hooks/lib/session-marker.sh +46 -0
- package/hooks/test/itil-assistant-output-gate.bats +114 -0
- package/hooks/test/itil-assistant-output-review.bats +136 -0
- package/package.json +1 -1
- package/skills/work-problems/SKILL.md +25 -0
- package/skills/work-problems/test/work-problems-preflight-session-continuity.bats +139 -0
- package/skills/work-problems/test/work-problems-step-5-delegation.bats +32 -0
package/hooks/hooks.json
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
"hooks": {
|
|
3
3
|
"SessionStart": [
|
|
4
4
|
{ "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/bin/check-deps.sh wr-itil wr-risk-scorer" }] }
|
|
5
|
+
],
|
|
6
|
+
"UserPromptSubmit": [
|
|
7
|
+
{ "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/itil-assistant-output-gate.sh" }] }
|
|
8
|
+
],
|
|
9
|
+
"Stop": [
|
|
10
|
+
{ "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/itil-assistant-output-review.sh" }] }
|
|
5
11
|
]
|
|
6
12
|
}
|
|
7
13
|
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# P085 / ADR-013 / ADR-038: itil UserPromptSubmit gate.
|
|
3
|
+
#
|
|
4
|
+
# When the incoming user prompt contains a direction-pinning signal
|
|
5
|
+
# (yes / go ahead / just do it / act now / proceed / ...), inject a
|
|
6
|
+
# MANDATORY reminder so the assistant acts instead of surfacing a
|
|
7
|
+
# prose consent gate. Once-per-session full block; terse reminder on
|
|
8
|
+
# subsequent direction-pin prompts.
|
|
9
|
+
#
|
|
10
|
+
# Companion: itil-assistant-output-review.sh (Stop hook) catches any
|
|
11
|
+
# prose-ask that slipped through on the way out.
|
|
12
|
+
|
|
13
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
14
|
+
# shellcheck source=lib/session-marker.sh
|
|
15
|
+
source "$SCRIPT_DIR/lib/session-marker.sh"
|
|
16
|
+
# shellcheck source=lib/detectors.sh
|
|
17
|
+
source "$SCRIPT_DIR/lib/detectors.sh"
|
|
18
|
+
|
|
19
|
+
INPUT=$(cat)
|
|
20
|
+
SESSION_ID=$(echo "$INPUT" | jq -r '.session_id // empty' 2>/dev/null || echo "")
|
|
21
|
+
PROMPT=$(echo "$INPUT" | jq -r '.prompt // empty' 2>/dev/null || echo "")
|
|
22
|
+
|
|
23
|
+
# Guard: if the prompt carries no direction-pinning signal, do nothing.
|
|
24
|
+
# Burning the announcement marker on non-direction prompts would waste
|
|
25
|
+
# the once-per-session budget on no-op context.
|
|
26
|
+
if [ -z "$PROMPT" ] || ! echo "$PROMPT" | detect_direction_pin >/dev/null; then
|
|
27
|
+
exit 0
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
if has_announced "itil-assistant-gate" "$SESSION_ID"; then
|
|
31
|
+
cat <<'HOOK_OUTPUT'
|
|
32
|
+
MANDATORY: direction pinned. Act on obvious next step; use AskUserQuestion tool for genuine ambiguity; NEVER prose-ask. See ADR-013.
|
|
33
|
+
HOOK_OUTPUT
|
|
34
|
+
else
|
|
35
|
+
cat <<'HOOK_OUTPUT'
|
|
36
|
+
INSTRUCTION: MANDATORY — act on obvious decisions; NEVER prose-ask.
|
|
37
|
+
DETECTED: incoming user prompt contains a direction-pinning signal
|
|
38
|
+
(yes / go / proceed / act / just do it / ...).
|
|
39
|
+
|
|
40
|
+
NON-OPTIONAL RULES:
|
|
41
|
+
|
|
42
|
+
1. If the next step is obvious from the user's direction, session
|
|
43
|
+
context, or RISK-POLICY.md appetite, ACT. Do NOT surface a consent
|
|
44
|
+
gate. Do NOT ask "Want me to...?" or "Should I...?".
|
|
45
|
+
|
|
46
|
+
2. If the decision is genuinely ambiguous (multiple-valid-paths, none
|
|
47
|
+
clearly better per direction/policy), use the AskUserQuestion tool.
|
|
48
|
+
Never prose-ask. Prose questions are unanswerable under AFK
|
|
49
|
+
notifications and violate ADR-013 Rule 1.
|
|
50
|
+
|
|
51
|
+
3. Canonical prose-ask phrasings to AVOID in your next response:
|
|
52
|
+
"Want me to", "Should I", "Would you like me to", "Shall we",
|
|
53
|
+
"Let me know if", "Do you want to", "Option A or Option B?",
|
|
54
|
+
"(a) / (b) / (c)?". If one of these is about to appear at the end
|
|
55
|
+
of your turn, stop and re-route via AskUserQuestion or just act.
|
|
56
|
+
|
|
57
|
+
4. The combined rule: obvious default => act; genuine ambiguity =>
|
|
58
|
+
AskUserQuestion tool; NEVER prose-ask.
|
|
59
|
+
|
|
60
|
+
See:
|
|
61
|
+
- ~/.claude/projects/.../memory/feedback_act_on_obvious_decisions.md
|
|
62
|
+
- docs/decisions/013-structured-user-interaction-for-governance-decisions.*.md
|
|
63
|
+
- Companion Stop hook (itil-assistant-output-review.sh) scans your
|
|
64
|
+
emitted turn for the patterns above and nudges if any slip through.
|
|
65
|
+
HOOK_OUTPUT
|
|
66
|
+
mark_announced "itil-assistant-gate" "$SESSION_ID"
|
|
67
|
+
fi
|
|
68
|
+
|
|
69
|
+
exit 0
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# P085 / ADR-013: itil Stop hook.
|
|
3
|
+
#
|
|
4
|
+
# Reads the last assistant turn from transcript_path on stdin and scans
|
|
5
|
+
# for canonical prose-ask phrasings. If a prose-ask is detected (and
|
|
6
|
+
# the turn does NOT contain an AskUserQuestion tool_use call), emits
|
|
7
|
+
# a stopReason nudge instructing the assistant to re-emit via
|
|
8
|
+
# AskUserQuestion — or act, if the decision was obvious.
|
|
9
|
+
#
|
|
10
|
+
# Stop hooks cannot rewrite the emitted turn; the nudge biases the
|
|
11
|
+
# next turn. Pairs with UserPromptSubmit gate for defence in depth.
|
|
12
|
+
|
|
13
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
14
|
+
# shellcheck source=lib/detectors.sh
|
|
15
|
+
source "$SCRIPT_DIR/lib/detectors.sh"
|
|
16
|
+
|
|
17
|
+
INPUT=$(cat)
|
|
18
|
+
TRANSCRIPT_PATH=$(echo "$INPUT" | jq -r '.transcript_path // empty' 2>/dev/null || echo "")
|
|
19
|
+
|
|
20
|
+
# Graceful fallback: no transcript_path or file missing means nothing
|
|
21
|
+
# to review. Exit clean — the hook is advisory, never blocking.
|
|
22
|
+
if [ -z "$TRANSCRIPT_PATH" ] || [ ! -f "$TRANSCRIPT_PATH" ]; then
|
|
23
|
+
exit 0
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
# Extract the last assistant turn's concatenated text content. Claude
|
|
27
|
+
# Code transcript format: JSONL, each line a {type, message} object;
|
|
28
|
+
# assistant `message.content` is an array of content blocks (text,
|
|
29
|
+
# tool_use, thinking, ...). We want the concatenation of `text` blocks
|
|
30
|
+
# from the last `type: assistant` line.
|
|
31
|
+
LAST_ASSISTANT=$(grep -E '"type"[[:space:]]*:[[:space:]]*"assistant"' "$TRANSCRIPT_PATH" 2>/dev/null | tail -n 1 || true)
|
|
32
|
+
if [ -z "$LAST_ASSISTANT" ]; then
|
|
33
|
+
exit 0
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
# If the last assistant turn used AskUserQuestion, the assistant chose
|
|
37
|
+
# the structured path — don't nudge.
|
|
38
|
+
if echo "$LAST_ASSISTANT" | jq -e '.message.content | map(select(.type == "tool_use" and .name == "AskUserQuestion")) | length > 0' >/dev/null 2>&1; then
|
|
39
|
+
exit 0
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# Concatenate every text block in the turn.
|
|
43
|
+
ASSISTANT_TEXT=$(echo "$LAST_ASSISTANT" | jq -r '
|
|
44
|
+
.message.content
|
|
45
|
+
| if type == "array" then map(select(.type == "text") | .text) | join("\n")
|
|
46
|
+
elif type == "string" then .
|
|
47
|
+
else "" end
|
|
48
|
+
' 2>/dev/null || echo "")
|
|
49
|
+
|
|
50
|
+
if [ -z "$ASSISTANT_TEXT" ]; then
|
|
51
|
+
exit 0
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
# Scan for prose-ask patterns. If none match, exit silently.
|
|
55
|
+
MATCH=$(echo "$ASSISTANT_TEXT" | detect_prose_ask 2>/dev/null) || true
|
|
56
|
+
if [ -z "$MATCH" ]; then
|
|
57
|
+
exit 0
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
# Emit stopReason. Structured JSON so Claude Code injects the nudge
|
|
61
|
+
# into the next assistant context. The user does not see this — the
|
|
62
|
+
# next turn does.
|
|
63
|
+
jq -n --arg match "$MATCH" '{
|
|
64
|
+
stopReason: (
|
|
65
|
+
"PROSE-ASK DETECTED in your last turn (pattern: \"" + $match + "\"). " +
|
|
66
|
+
"If the decision is obvious from direction / policy / session context, ACT — do not ask. " +
|
|
67
|
+
"If genuinely ambiguous, re-emit via the AskUserQuestion tool. " +
|
|
68
|
+
"Never prose-ask. See ADR-013 Rule 1 + feedback_act_on_obvious_decisions.md."
|
|
69
|
+
)
|
|
70
|
+
}'
|
|
71
|
+
|
|
72
|
+
exit 0
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# P085 detector registry: assistant-output-gate + assistant-output-review.
|
|
3
|
+
#
|
|
4
|
+
# Two detection functions sharing a single library so the UserPromptSubmit
|
|
5
|
+
# pre-generation reminder (gate) and the Stop post-hoc review use the
|
|
6
|
+
# same canonical phrasing list — one place to update when a new prose-ask
|
|
7
|
+
# pattern lands.
|
|
8
|
+
#
|
|
9
|
+
# Composition: this registry is the shape P078 (correction->ticket) and
|
|
10
|
+
# future itil assistant-output validators extend. Each detector is a
|
|
11
|
+
# pure function — takes text on stdin or as $1, exits 0 on match,
|
|
12
|
+
# non-zero on no-match.
|
|
13
|
+
|
|
14
|
+
# Canonical prose-ask phrasings — the patterns that should be emitted
|
|
15
|
+
# via AskUserQuestion instead. Extracted from P085 ticket + memory
|
|
16
|
+
# feedback_act_on_obvious_decisions.md.
|
|
17
|
+
#
|
|
18
|
+
# Case-insensitive, anchored to word boundaries where meaningful.
|
|
19
|
+
# Grep -E extended regex. Each entry is a separate alternation group
|
|
20
|
+
# in case future detectors want to report which phrase matched.
|
|
21
|
+
PROSE_ASK_PATTERNS=(
|
|
22
|
+
'Want me to'
|
|
23
|
+
'Should I\b'
|
|
24
|
+
'Would you like me to'
|
|
25
|
+
'Shall we\b'
|
|
26
|
+
'Shall I\b'
|
|
27
|
+
'Let me know if'
|
|
28
|
+
'Do you want (me )?to'
|
|
29
|
+
'Do you want to'
|
|
30
|
+
'Option [A-Z][:.]? .*Option [A-Z]'
|
|
31
|
+
'\([a-c]\).*\([a-c]\).*\([a-c]\)'
|
|
32
|
+
'\([a-c]\) ?/ ?\([a-c]\)'
|
|
33
|
+
'\(1\) .*\(2\)'
|
|
34
|
+
'Which (do you|option|one|path) .*\?'
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Direction-pinning patterns — signals in the user's incoming prompt
|
|
38
|
+
# that the next step is obvious and the assistant should act, not ask.
|
|
39
|
+
# Extracted from feedback_act_on_obvious_decisions.md.
|
|
40
|
+
DIRECTION_PIN_PATTERNS=(
|
|
41
|
+
'\byes\b'
|
|
42
|
+
'\bgo ahead\b'
|
|
43
|
+
'\bjust do it\b'
|
|
44
|
+
'\bjust go\b'
|
|
45
|
+
'\bproceed\b'
|
|
46
|
+
'\bact now\b'
|
|
47
|
+
'\bact on\b'
|
|
48
|
+
'\bdo it\b'
|
|
49
|
+
'\bmake it so\b'
|
|
50
|
+
'\bdrain\b'
|
|
51
|
+
'\bship it\b'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# detect_prose_ask: scans text on stdin for canonical prose-ask
|
|
55
|
+
# phrasings. Exits 0 if any pattern matches, 1 otherwise. Writes the
|
|
56
|
+
# first matched phrase to stdout (for observability in the Stop hook
|
|
57
|
+
# stopReason).
|
|
58
|
+
#
|
|
59
|
+
# Usage:
|
|
60
|
+
# if echo "$text" | detect_prose_ask > /dev/null; then ... fi
|
|
61
|
+
detect_prose_ask() {
|
|
62
|
+
local text
|
|
63
|
+
text=$(cat)
|
|
64
|
+
local pattern
|
|
65
|
+
for pattern in "${PROSE_ASK_PATTERNS[@]}"; do
|
|
66
|
+
if echo "$text" | grep -Eqi -- "$pattern"; then
|
|
67
|
+
echo "$pattern"
|
|
68
|
+
return 0
|
|
69
|
+
fi
|
|
70
|
+
done
|
|
71
|
+
return 1
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# detect_direction_pin: scans text on stdin for direction-pinning
|
|
75
|
+
# signals. Exits 0 if any pattern matches, 1 otherwise.
|
|
76
|
+
#
|
|
77
|
+
# Usage:
|
|
78
|
+
# if echo "$prompt" | detect_direction_pin > /dev/null; then ... fi
|
|
79
|
+
detect_direction_pin() {
|
|
80
|
+
local text
|
|
81
|
+
text=$(cat)
|
|
82
|
+
local pattern
|
|
83
|
+
for pattern in "${DIRECTION_PIN_PATTERNS[@]}"; do
|
|
84
|
+
if echo "$text" | grep -Eqi -- "$pattern"; then
|
|
85
|
+
echo "$pattern"
|
|
86
|
+
return 0
|
|
87
|
+
fi
|
|
88
|
+
done
|
|
89
|
+
return 1
|
|
90
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Shared session-announcement marker helpers (P095 / ADR-038).
|
|
3
|
+
#
|
|
4
|
+
# Used by UserPromptSubmit hooks to gate verbose MANDATORY instruction
|
|
5
|
+
# prose behind a once-per-session check. First prompt of a session emits
|
|
6
|
+
# the full block AND calls mark_announced; subsequent prompts see the
|
|
7
|
+
# marker via has_announced and emit only a terse reminder.
|
|
8
|
+
#
|
|
9
|
+
# Why no TTL or drift check (unlike review-gate.sh): announcement is
|
|
10
|
+
# bookkeeping for prose verbosity, not enforcement. PreToolUse gates
|
|
11
|
+
# still block unauthorised edits regardless of announcement state; the
|
|
12
|
+
# delegated agent re-reads policy when it runs. Extending the marker's
|
|
13
|
+
# lifetime across policy changes mid-session is safe — the gate, not
|
|
14
|
+
# the announcement, is load-bearing.
|
|
15
|
+
#
|
|
16
|
+
# Marker path convention: /tmp/${SYSTEM}-announced-${SESSION_ID}
|
|
17
|
+
# (mirrors the /tmp/${SYSTEM}-reviewed-${SESSION_ID} convention from
|
|
18
|
+
# style-guide/voice-tone/risk-scorer review-gate.sh; the -announced-
|
|
19
|
+
# suffix distinguishes announcement markers from clearance markers).
|
|
20
|
+
#
|
|
21
|
+
# Empty SESSION_ID fallback: has_announced returns 1 (not announced,
|
|
22
|
+
# full block emits) and mark_announced is a no-op (no file written).
|
|
23
|
+
# This covers manual hook invocation, test harnesses, and any rare
|
|
24
|
+
# case where Claude Code does not pass a session_id on stdin.
|
|
25
|
+
|
|
26
|
+
# Returns 0 if the hook for SYSTEM has already announced in SESSION_ID,
|
|
27
|
+
# 1 otherwise. Empty SESSION_ID => returns 1 (never announced).
|
|
28
|
+
#
|
|
29
|
+
# Usage: has_announced "architect" "$SESSION_ID"
|
|
30
|
+
has_announced() {
|
|
31
|
+
local SYSTEM="$1"
|
|
32
|
+
local SESSION_ID="$2"
|
|
33
|
+
[ -n "$SESSION_ID" ] || return 1
|
|
34
|
+
[ -f "/tmp/${SYSTEM}-announced-${SESSION_ID}" ]
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Writes the announcement marker for SYSTEM in SESSION_ID. Empty
|
|
38
|
+
# SESSION_ID => no-op. Safe to call more than once per session.
|
|
39
|
+
#
|
|
40
|
+
# Usage: mark_announced "architect" "$SESSION_ID"
|
|
41
|
+
mark_announced() {
|
|
42
|
+
local SYSTEM="$1"
|
|
43
|
+
local SESSION_ID="$2"
|
|
44
|
+
[ -n "$SESSION_ID" ] || return 0
|
|
45
|
+
: > "/tmp/${SYSTEM}-announced-${SESSION_ID}"
|
|
46
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
# P085: itil-assistant-output-gate.sh UserPromptSubmit hook must detect
|
|
4
|
+
# when the user's incoming prompt pins a direction / confirms a prior
|
|
5
|
+
# ask / issues an act-verb, and inject a once-per-session MANDATORY
|
|
6
|
+
# reminder instructing the assistant to act without asking — or use
|
|
7
|
+
# AskUserQuestion for genuine ambiguity — and NEVER prose-ask.
|
|
8
|
+
#
|
|
9
|
+
# Per ADR-038: full block emits once per session; subsequent prompts
|
|
10
|
+
# emit a terse reminder (<250 bytes) that keeps the MANDATORY signal,
|
|
11
|
+
# the gate name, and the AskUserQuestion affordance.
|
|
12
|
+
#
|
|
13
|
+
# Per feedback_behavioural_tests.md (P081): these are behavioural
|
|
14
|
+
# assertions — they simulate the hook's payload on stdin and assert
|
|
15
|
+
# on what the hook emits, not on the source text of the hook file.
|
|
16
|
+
|
|
17
|
+
setup() {
|
|
18
|
+
REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../.." && pwd)"
|
|
19
|
+
HOOK="$REPO_ROOT/packages/itil/hooks/itil-assistant-output-gate.sh"
|
|
20
|
+
SID="itil-gate-test-$$-$RANDOM"
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
teardown() {
|
|
24
|
+
rm -f "/tmp/itil-assistant-gate-announced-${SID}"
|
|
25
|
+
rm -f "/tmp/itil-assistant-gate-announced-${SID}-alt"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
run_hook() {
|
|
29
|
+
local sid="$1"
|
|
30
|
+
local prompt="$2"
|
|
31
|
+
echo "{\"session_id\":\"$sid\",\"prompt\":$(printf '%s' "$prompt" | jq -Rs .)}" | bash "$HOOK"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
@test "gate: emits full MANDATORY block on first direction-pin prompt" {
|
|
35
|
+
run run_hook "$SID" "yes, update P084 and verify the subagent tools thing"
|
|
36
|
+
[ "$status" -eq 0 ]
|
|
37
|
+
[ "${#output}" -gt 400 ]
|
|
38
|
+
[[ "$output" == *"MANDATORY"* ]]
|
|
39
|
+
[[ "$output" == *"AskUserQuestion"* ]]
|
|
40
|
+
[[ "$output" == *"obvious"* ]] || [[ "$output" == *"act"* ]]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
@test "gate: writes the announcement marker on first emission" {
|
|
44
|
+
run run_hook "$SID" "go ahead and do it"
|
|
45
|
+
[ "$status" -eq 0 ]
|
|
46
|
+
[ -f "/tmp/itil-assistant-gate-announced-${SID}" ]
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
@test "gate: second direction-pin prompt in same session emits terse reminder only" {
|
|
50
|
+
run_hook "$SID" "yes, please proceed" >/dev/null
|
|
51
|
+
# Second prompt also pins direction (required for the gate to fire).
|
|
52
|
+
run run_hook "$SID" "yes, go ahead"
|
|
53
|
+
[ "$status" -eq 0 ]
|
|
54
|
+
[ "${#output}" -lt 250 ]
|
|
55
|
+
[[ "$output" == *"AskUserQuestion"* ]]
|
|
56
|
+
# Full block is NOT re-emitted
|
|
57
|
+
[[ "$output" != *"Canonical prose-ask phrasings"* ]]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@test "gate: terse reminder preserves MANDATORY / REQUIRED signal word" {
|
|
61
|
+
run_hook "$SID" "yes" >/dev/null
|
|
62
|
+
run run_hook "$SID" "act on this"
|
|
63
|
+
[ "$status" -eq 0 ]
|
|
64
|
+
[[ "$output" == *"MANDATORY"* ]] || [[ "$output" == *"REQUIRED"* ]] || [[ "$output" == *"NON-OPTIONAL"* ]]
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
@test "gate: different session_id re-emits the full block" {
|
|
68
|
+
run_hook "$SID" "yes" >/dev/null
|
|
69
|
+
local SID2="${SID}-alt"
|
|
70
|
+
run run_hook "$SID2" "yes"
|
|
71
|
+
[ "$status" -eq 0 ]
|
|
72
|
+
[ "${#output}" -gt 400 ]
|
|
73
|
+
rm -f "/tmp/itil-assistant-gate-announced-${SID2}"
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
@test "gate: empty session_id emits the full block and writes no marker" {
|
|
77
|
+
run run_hook "" "yes, go"
|
|
78
|
+
[ "$status" -eq 0 ]
|
|
79
|
+
[ "${#output}" -gt 400 ]
|
|
80
|
+
[ ! -f "/tmp/itil-assistant-gate-announced-" ]
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
@test "gate: non-direction-pinning prompt does not emit a block" {
|
|
84
|
+
# A conversational prompt with no direction/act-verb/yes signal
|
|
85
|
+
# should not burn the session-marker budget.
|
|
86
|
+
run run_hook "$SID" "what does ADR-013 say about ambiguous decisions?"
|
|
87
|
+
[ "$status" -eq 0 ]
|
|
88
|
+
# May emit a short neutral note OR nothing, but must not emit the
|
|
89
|
+
# full MANDATORY block on a non-direction prompt.
|
|
90
|
+
[[ "$output" != *"MANDATORY: act"* ]] || [ "${#output}" -lt 250 ]
|
|
91
|
+
# Must not have written the announcement marker either.
|
|
92
|
+
[ ! -f "/tmp/itil-assistant-gate-announced-${SID}" ]
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
@test "gate: direction-pin via 'act now' verb triggers the block" {
|
|
96
|
+
run run_hook "$SID" "act now and close the ticket"
|
|
97
|
+
[ "$status" -eq 0 ]
|
|
98
|
+
[ "${#output}" -gt 400 ]
|
|
99
|
+
[[ "$output" == *"MANDATORY"* ]]
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
@test "gate: direction-pin via 'just do it' triggers the block" {
|
|
103
|
+
run run_hook "$SID" "just do it"
|
|
104
|
+
[ "$status" -eq 0 ]
|
|
105
|
+
[ "${#output}" -gt 400 ]
|
|
106
|
+
[[ "$output" == *"MANDATORY"* ]]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
@test "gate: terse reminder references AskUserQuestion" {
|
|
110
|
+
run_hook "$SID" "yes" >/dev/null
|
|
111
|
+
run run_hook "$SID" "proceed"
|
|
112
|
+
[ "$status" -eq 0 ]
|
|
113
|
+
[[ "$output" == *"AskUserQuestion"* ]]
|
|
114
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
# P085: itil-assistant-output-review.sh Stop hook reads the last
|
|
4
|
+
# assistant turn from the transcript at `transcript_path` on stdin and
|
|
5
|
+
# scans for canonical prose-ask phrasings ("Want me to", "Should I",
|
|
6
|
+
# "Option A or Option B", etc.). When a prose-ask is detected, the
|
|
7
|
+
# hook emits a stopReason JSON object with a nudge instructing the
|
|
8
|
+
# assistant to re-emit via AskUserQuestion (or to act, if the decision
|
|
9
|
+
# was obvious). Clean turns pass silently (no stopReason field).
|
|
10
|
+
#
|
|
11
|
+
# The Stop hook cannot rewrite the emitted turn — only nudge the next
|
|
12
|
+
# turn — so its job is post-hoc detection + durable signal, per the
|
|
13
|
+
# architect verdict for P085.
|
|
14
|
+
|
|
15
|
+
setup() {
|
|
16
|
+
REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../.." && pwd)"
|
|
17
|
+
HOOK="$REPO_ROOT/packages/itil/hooks/itil-assistant-output-review.sh"
|
|
18
|
+
TMPDIR_="$(mktemp -d)"
|
|
19
|
+
TRANSCRIPT="$TMPDIR_/transcript.jsonl"
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
teardown() {
|
|
23
|
+
rm -rf "$TMPDIR_"
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Writes a JSONL transcript with a user message followed by an
|
|
27
|
+
# assistant message. Claude Code transcript format: each line is a
|
|
28
|
+
# JSON object with `type: user|assistant` and `message.content` being
|
|
29
|
+
# either a string or an array of content blocks (text/tool_use).
|
|
30
|
+
write_transcript() {
|
|
31
|
+
local user_text="$1"
|
|
32
|
+
local assistant_text="$2"
|
|
33
|
+
{
|
|
34
|
+
printf '{"type":"user","message":{"role":"user","content":%s}}\n' "$(printf '%s' "$user_text" | jq -Rs .)"
|
|
35
|
+
printf '{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":%s}]}}\n' "$(printf '%s' "$assistant_text" | jq -Rs .)"
|
|
36
|
+
} > "$TRANSCRIPT"
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
run_hook() {
|
|
40
|
+
echo "{\"session_id\":\"stop-test\",\"transcript_path\":$(printf '%s' "$TRANSCRIPT" | jq -Rs .)}" | bash "$HOOK"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
@test "review: 'Want me to' in assistant text triggers stopReason nudge" {
|
|
44
|
+
write_transcript "update the ticket" "Done. Want me to also commit now or wait?"
|
|
45
|
+
run run_hook
|
|
46
|
+
[ "$status" -eq 0 ]
|
|
47
|
+
[[ "$output" == *"stopReason"* ]]
|
|
48
|
+
[[ "$output" == *"AskUserQuestion"* ]]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@test "review: 'Should I' in assistant text triggers stopReason nudge" {
|
|
52
|
+
write_transcript "look at this" "I see the issue. Should I fix it now or wait for review?"
|
|
53
|
+
run run_hook
|
|
54
|
+
[ "$status" -eq 0 ]
|
|
55
|
+
[[ "$output" == *"stopReason"* ]]
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
@test "review: 'Would you like me to' triggers stopReason nudge" {
|
|
59
|
+
write_transcript "ok" "Ticket updated. Would you like me to open a PR next?"
|
|
60
|
+
run run_hook
|
|
61
|
+
[ "$status" -eq 0 ]
|
|
62
|
+
[[ "$output" == *"stopReason"* ]]
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@test "review: 'Option A or Option B' triggers stopReason nudge" {
|
|
66
|
+
write_transcript "plan this" "Two paths: Option A: do it inline. Or Option B: split into two commits. Which do you prefer?"
|
|
67
|
+
run run_hook
|
|
68
|
+
[ "$status" -eq 0 ]
|
|
69
|
+
[[ "$output" == *"stopReason"* ]]
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
@test "review: '(a) / (b) / (c)?' triggers stopReason nudge" {
|
|
73
|
+
write_transcript "go" "Three choices: (a) ship now, (b) wait for review, or (c) add tests first?"
|
|
74
|
+
run run_hook
|
|
75
|
+
[ "$status" -eq 0 ]
|
|
76
|
+
[[ "$output" == *"stopReason"* ]]
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
@test "review: clean informative turn does not trigger stopReason" {
|
|
80
|
+
write_transcript "what changed" "The last commit added three files: detectors.sh, gate.sh, and review.sh. No new decisions were introduced."
|
|
81
|
+
run run_hook
|
|
82
|
+
[ "$status" -eq 0 ]
|
|
83
|
+
# Clean output: either empty or a JSON object WITHOUT stopReason.
|
|
84
|
+
[[ "$output" != *"stopReason"* ]]
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
@test "review: assistant turn containing AskUserQuestion tool_use does NOT flag" {
|
|
88
|
+
# If the assistant used the AskUserQuestion tool, that's the compliant
|
|
89
|
+
# path — we must not nudge them for using prose inside the tool's
|
|
90
|
+
# rendered question text (the tool surface is structured).
|
|
91
|
+
{
|
|
92
|
+
printf '{"type":"user","message":{"role":"user","content":"plan this"}}\n'
|
|
93
|
+
cat <<'JSON'
|
|
94
|
+
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"Here is the choice:"},{"type":"tool_use","name":"AskUserQuestion","input":{"question":"Which option?","options":[{"label":"A"},{"label":"B"}]}}]}}
|
|
95
|
+
JSON
|
|
96
|
+
} > "$TRANSCRIPT"
|
|
97
|
+
run run_hook
|
|
98
|
+
[ "$status" -eq 0 ]
|
|
99
|
+
[[ "$output" != *"stopReason"* ]]
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
@test "review: missing transcript_path exits cleanly without error" {
|
|
103
|
+
run bash -c 'echo "{\"session_id\":\"sid\"}" | bash "$1"' -- "$HOOK"
|
|
104
|
+
[ "$status" -eq 0 ]
|
|
105
|
+
[[ "$output" != *"stopReason"* ]]
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
@test "review: non-existent transcript file exits cleanly without error" {
|
|
109
|
+
run bash -c "echo '{\"session_id\":\"sid\",\"transcript_path\":\"/tmp/does-not-exist-$RANDOM\"}' | bash '$HOOK'"
|
|
110
|
+
[ "$status" -eq 0 ]
|
|
111
|
+
[[ "$output" != *"stopReason"* ]]
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
@test "review: ask-when-obvious pattern triggers stopReason" {
|
|
115
|
+
# Prior user message pins direction ("yes, update the ticket"), next
|
|
116
|
+
# assistant turn ends with a question mark on an obvious-next-step.
|
|
117
|
+
# This is Facet A of P085 — asking when the answer is obvious.
|
|
118
|
+
write_transcript "yes, update the ticket with the findings" "Updated. Want me to commit and close the ticket now, or leave it open for review?"
|
|
119
|
+
run run_hook
|
|
120
|
+
[ "$status" -eq 0 ]
|
|
121
|
+
[[ "$output" == *"stopReason"* ]]
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
@test "review: 'Shall we' triggers stopReason nudge" {
|
|
125
|
+
write_transcript "look at options" "That's one route. Shall we go with it?"
|
|
126
|
+
run run_hook
|
|
127
|
+
[ "$status" -eq 0 ]
|
|
128
|
+
[[ "$output" == *"stopReason"* ]]
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
@test "review: 'Do you want to' triggers stopReason nudge" {
|
|
132
|
+
write_transcript "plan" "I can split into two PRs. Do you want to go that route?"
|
|
133
|
+
run run_hook
|
|
134
|
+
[ "$status" -eq 0 ]
|
|
135
|
+
[[ "$output" == *"stopReason"* ]]
|
|
136
|
+
}
|
package/package.json
CHANGED
|
@@ -36,6 +36,29 @@ Before opening the work loop, reconcile local state with origin so the orchestra
|
|
|
36
36
|
|
|
37
37
|
**Cross-cutting**: this rule applies to every AFK orchestrator skill. The next-ID collision guard (ADR-019 confirmation criterion 2) belongs in the ticket-creator skills (`manage-problem` and `wr-architect:create-adr`), not here — see the related problem ticket for that work.
|
|
38
38
|
|
|
39
|
+
#### Session-continuity detection pass (per P109)
|
|
40
|
+
|
|
41
|
+
After the fetch/divergence check, Step 0 MUST run a session-continuity detection pass. The divergence check handles "did origin move under us"; this pass handles the distinct failure mode "did the prior session leave partial work that changes what iter 1 should do". A prior AFK subprocess can exit mid-ticket (quota 429, user-cancel, subprocess crash) and leave observable state in the working tree that the orchestrator must classify before opening the work loop.
|
|
42
|
+
|
|
43
|
+
**Signals to enumerate** (each maps to one `git status --porcelain` / filesystem / `git worktree` probe):
|
|
44
|
+
|
|
45
|
+
| Signal | Detection |
|
|
46
|
+
|---|---|
|
|
47
|
+
| Untracked `docs/decisions/*.proposed.md` | `git status --porcelain docs/decisions/` filtered for `??` entries ending `.proposed.md` — drafted but unlanded ADRs from a prior iter. |
|
|
48
|
+
| Untracked `docs/problems/*.md` | `git status --porcelain docs/problems/` filtered for `??` entries ending `.md` — drafted but unlanded problem tickets. |
|
|
49
|
+
| `.afk-run-state/iter-*.json` error markers | Files under `.afk-run-state/` containing `"is_error": true` OR `"api_error_status" >= 400` — prior iteration hit quota or API error; its work is likely partial. Success files (`"is_error": false`) are ignored. Contract source: ADR-032 subprocess artefact. |
|
|
50
|
+
| Stale `.claude/worktrees/*` dirs + matching `claude/*` branches | `git worktree list` filtered on `claude/*` branches adjacent to `.claude/worktrees/*` directories — prior subagent worktrees that were not cleaned up. Detection only — mutation (cleanup) is out of scope and requires a separate ADR. |
|
|
51
|
+
| Uncommitted modifications to SKILL.md / source / ADR files | `git status --porcelain` filtered for `M ` / ` M` entries on `packages/*/skills/*/SKILL.md`, `packages/*/hooks/*`, `docs/decisions/*.proposed.md`, or other source paths the prior session was mid-authoring. |
|
|
52
|
+
|
|
53
|
+
**Classification**: when any signal is present, build a structured Prior-Session State report listing each hit (signal category, path, one-line summary). An empty signal set means clean pass-through to Step 1.
|
|
54
|
+
|
|
55
|
+
**Routing on interactive-vs-AFK (per ADR-013 Rule 1 / Rule 6):**
|
|
56
|
+
|
|
57
|
+
- **Interactive** (`AskUserQuestion` is available AND the loop was not started in AFK mode): prompt the user with the Prior-Session State report and four options — **Resume the prior work** (land the drafted files as iter 1), **Discard the draft** and restart from scratch, **Leave-and-lower-priority** (skip the dirty paths and work the next backlog item that doesn't touch them), **Halt the loop** (too much dirty state to proceed non-interactively). Route the chosen branch before opening Step 1.
|
|
58
|
+
- **Non-interactive / AFK** (default for this skill per JTBD-006): do NOT call `AskUserQuestion`. Halt the loop with the structured Prior-Session State report in the AFK summary. Per ADR-013 Rule 6 fail-safe: ambiguous session-continuity state requires user input; non-interactive recovery would mask the bug this check is meant to surface. This matches Step 6.75's "dirty for unknown reason → halt" stance at the Step 0 layer — the orchestrator does not silently proceed past partial work.
|
|
59
|
+
|
|
60
|
+
Step 6.75 treats a Step-0-resolved-with-user-confirmation state as `dirty-for-known-reason`: if the interactive branch's Resume option landed the drafted ADR as iter 1, the iter's commit clears the dirty state and the rest of the loop proceeds normally.
|
|
61
|
+
|
|
39
62
|
### Step 1: Scan the backlog
|
|
40
63
|
|
|
41
64
|
Read `docs/problems/README.md` if it exists and is fresh (check via git history — see manage-problem step 9 for the cache freshness check). If stale or missing, scan all `.open.md` and `.known-error.md` files in `docs/problems/`, extract their WSJF scores, and rank them.
|
|
@@ -339,6 +362,7 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
|
|
|
339
362
|
| Pipeline risk at appetite (push or release = 4/25) | Drain release queue (`push:watch` then `release:watch`) before next iteration — per ADR-018 (Step 6.5) |
|
|
340
363
|
| Pipeline risk above appetite (push or release >= 5/25) | Auto-apply scorer remediations incrementally (ADR-042 Rule 2). The agent reads suggestions and decides what to do. Re-score after each apply; drain when within appetite. **Never release above appetite** (ADR-042 Rule 1) — no AskUserQuestion shortcut. Halt the loop with `outcome: halted-above-appetite` if the loop exhausts without convergence (ADR-042 Rule 5). Verification Pending commits excluded from auto-revert (Rule 2b). Per ADR-042 (Step 6.5 Above-appetite branch). |
|
|
341
364
|
| Origin diverged before start | Pull `--ff-only` if trivial; stop with report (`git log HEAD..origin/<base>` and reverse) if non-fast-forward — per ADR-019 (Step 0) |
|
|
365
|
+
| Prior-session partial work detected at start (session-continuity dirty: untracked `docs/decisions/*.proposed.md` / `docs/problems/*.md`, `.afk-run-state/iter-*.json` with `is_error: true` or `api_error_status >= 400`, stale `.claude/worktrees/*`, uncommitted SKILL.md/source/ADR edits) | Halt the loop with a structured Prior-Session State report in the AFK summary. Do NOT attempt non-interactive resume. Interactive invocations prompt via `AskUserQuestion` with 4 options (resume / discard / leave-and-lower-priority / halt). Per P109 + ADR-013 Rule 6 (Step 0 session-continuity detection pass). |
|
|
342
366
|
| Fix verification needed | Skip problem, add to "needs verification" list |
|
|
343
367
|
| Stop-condition #2 with user-answerable skip-reasons | Emit Outstanding Design Questions table in summary (do NOT call AskUserQuestion). The persona is AFK by definition — per JTBD-006 and ADR-013 Rule 6 — so the table is the default. Interactive invocations may batch up to 4 questions through AskUserQuestion instead — per ADR-013 Rule 1 (Step 2.5). |
|
|
344
368
|
| Unexpected dirty state between iterations | Halt the loop. Report the `git status --porcelain` output, the last iteration's reported outcome, and the divergence — per P036 (Step 6.75). Do NOT attempt non-interactive recovery. |
|
|
@@ -416,6 +440,7 @@ When every skipped ticket is in the `upstream-blocked` category (stop-condition
|
|
|
416
440
|
- **P083** (`docs/problems/083-work-problems-iteration-worker-prompt-does-not-forbid-schedulewakeup.open.md`) — iteration prompt body forbids `ScheduleWakeup`. Applies equally to subprocess-dispatched iterations.
|
|
417
441
|
- **P036** — inter-iteration verification (Step 6.75); remains in the orchestrator's main turn.
|
|
418
442
|
- **P040** — origin-fetch preflight (Step 0); unchanged.
|
|
443
|
+
- **P109** — session-continuity detection pass added to Step 0 after the fetch/divergence check. Enumerates five signals (untracked `docs/decisions/*.proposed.md`, untracked `docs/problems/*.md`, `.afk-run-state/iter-*.json` error markers, stale `.claude/worktrees/*` dirs, uncommitted SKILL.md/source/ADR edits). Routes interactive via `AskUserQuestion` with 4 options, AFK via halt-with-report per ADR-013 Rule 6.
|
|
419
444
|
- **P041** — release-cadence drain (Step 6.5); remains in the orchestrator's main turn.
|
|
420
445
|
- **P053** — Outstanding Design Questions surfacing at stop-condition #2 (Step 2.5); fed by the iteration subagent's `outstanding_questions` field.
|
|
421
446
|
- **ADR-013** (`docs/decisions/013-structured-user-interaction-for-governance-decisions.proposed.md`) — Rule 6 non-interactive fail-safe applies to every iteration-subagent decision surface.
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
# Contract-assertion bats for work-problems Step 0 session-continuity
|
|
3
|
+
# detection (the extension per P109).
|
|
4
|
+
#
|
|
5
|
+
# Per ADR-037 SKILL.md is a contract document; these assertions check the
|
|
6
|
+
# contract strings the skill prose authoritatively pins for the Step 0
|
|
7
|
+
# session-continuity detection pass. Follows the split pattern established
|
|
8
|
+
# by work-problems-preflight.bats (fetch/divergence assertions) — this file
|
|
9
|
+
# covers the second invariant family: prior-session partial-work signals +
|
|
10
|
+
# interactive/AFK routing.
|
|
11
|
+
#
|
|
12
|
+
# Cross-reference:
|
|
13
|
+
# @problem P109 (work-problems preflight does not detect prior-session partial-work state)
|
|
14
|
+
# ADR-019 (AFK orchestrator preflight — extension scope)
|
|
15
|
+
# ADR-013 (structured user interaction — Rule 1 interactive, Rule 6 non-interactive fail-safe)
|
|
16
|
+
# ADR-032 (governance skill invocation patterns — .afk-run-state/iter-*.json contract)
|
|
17
|
+
# ADR-037 (skill testing strategy — contract-assertion framing)
|
|
18
|
+
# @jtbd JTBD-006 (Progress the Backlog While I'm Away — session-continuity detection belongs in Step 0)
|
|
19
|
+
|
|
20
|
+
setup() {
|
|
21
|
+
SKILL_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
|
|
22
|
+
SKILL_FILE="${SKILL_DIR}/SKILL.md"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
@test "SKILL.md exists" {
|
|
26
|
+
[ -f "$SKILL_FILE" ]
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
@test "SKILL.md Step 0 cites P109 (session-continuity driver)" {
|
|
30
|
+
# Contract criterion: the extension is traceable to its driver ticket.
|
|
31
|
+
run grep -n "P109" "$SKILL_FILE"
|
|
32
|
+
[ "$status" -eq 0 ]
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@test "SKILL.md Step 0 names the session-continuity detection pass" {
|
|
36
|
+
# Contract criterion: the new detection pass is named as a discrete
|
|
37
|
+
# concept in the Step 0 prose (not buried under the divergence check).
|
|
38
|
+
run grep -niE "session.continuity" "$SKILL_FILE"
|
|
39
|
+
[ "$status" -eq 0 ]
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
@test "SKILL.md Step 0 enumerates untracked docs/decisions/*.proposed.md signal" {
|
|
43
|
+
# Contract criterion: drafted-but-unlanded ADRs are one of the signals
|
|
44
|
+
# the session-continuity detection pass MUST enumerate.
|
|
45
|
+
run grep -nE "docs/decisions/\*\.proposed\.md|docs/decisions/.*\.proposed\.md" "$SKILL_FILE"
|
|
46
|
+
[ "$status" -eq 0 ]
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
@test "SKILL.md Step 0 enumerates untracked docs/problems/*.md signal" {
|
|
50
|
+
# Contract criterion: drafted-but-unlanded problem tickets are enumerated
|
|
51
|
+
# as one of the session-continuity signals. The preflight section already
|
|
52
|
+
# references docs/problems/ for the scan surface; the test checks that
|
|
53
|
+
# the Preflight section names untracked problem files as a detection
|
|
54
|
+
# signal (not merely the backlog-scan surface).
|
|
55
|
+
run grep -niE "untracked.*docs/problems|docs/problems/.*untracked" "$SKILL_FILE"
|
|
56
|
+
[ "$status" -eq 0 ]
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
@test "SKILL.md Step 0 enumerates .afk-run-state/iter-*.json error signal" {
|
|
60
|
+
# Contract criterion: the .afk-run-state/iter-*.json subprocess artefacts
|
|
61
|
+
# (per ADR-032) with is_error: true or api_error_status >= 400 are named
|
|
62
|
+
# as a signal.
|
|
63
|
+
run grep -nE "\.afk-run-state/iter-\*\.json|\.afk-run-state/iter-.*\.json" "$SKILL_FILE"
|
|
64
|
+
[ "$status" -eq 0 ]
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
@test "SKILL.md Step 0 names the is_error / api_error_status fields" {
|
|
68
|
+
# Contract criterion: the specific JSON fields the detection pass reads
|
|
69
|
+
# are named verbatim so the contract is unambiguous.
|
|
70
|
+
run grep -niE "is_error.*true|api_error_status" "$SKILL_FILE"
|
|
71
|
+
[ "$status" -eq 0 ]
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
@test "SKILL.md Step 0 enumerates stale .claude/worktrees signal" {
|
|
75
|
+
# Contract criterion: stale subagent worktrees are a detection signal.
|
|
76
|
+
# Detection only (not cleanup/mutation — per P109 scope boundary).
|
|
77
|
+
run grep -nE "\.claude/worktrees" "$SKILL_FILE"
|
|
78
|
+
[ "$status" -eq 0 ]
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
@test "SKILL.md Step 0 enumerates git worktree list signal for claude/* branches" {
|
|
82
|
+
# Contract criterion: git worktree list is the detection mechanism for
|
|
83
|
+
# claude/* branches adjacent to the .claude/worktrees/ dir check.
|
|
84
|
+
run grep -niE "git worktree list" "$SKILL_FILE"
|
|
85
|
+
[ "$status" -eq 0 ]
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
@test "SKILL.md Step 0 enumerates uncommitted SKILL.md / source / ADR edits signal" {
|
|
89
|
+
# Contract criterion: mid-authoring source edits are a detection signal.
|
|
90
|
+
run grep -niE "uncommitted.*(SKILL\.md|source|ADR)|(SKILL\.md|source|ADR).*uncommitted" "$SKILL_FILE"
|
|
91
|
+
[ "$status" -eq 0 ]
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
@test "SKILL.md Step 0 routes interactive via AskUserQuestion" {
|
|
95
|
+
# Contract criterion per ADR-013 Rule 1: the interactive branch uses
|
|
96
|
+
# AskUserQuestion.
|
|
97
|
+
run grep -nE "AskUserQuestion" "$SKILL_FILE"
|
|
98
|
+
[ "$status" -eq 0 ]
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
@test "SKILL.md Step 0 interactive branch names the 4 option categories" {
|
|
102
|
+
# Contract criterion: the AskUserQuestion 4-option shape is pinned so
|
|
103
|
+
# adopters know the branch set. Resume / discard / leave-and-lower-priority / halt.
|
|
104
|
+
run grep -niE "resume" "$SKILL_FILE"
|
|
105
|
+
[ "$status" -eq 0 ]
|
|
106
|
+
run grep -niE "discard" "$SKILL_FILE"
|
|
107
|
+
[ "$status" -eq 0 ]
|
|
108
|
+
run grep -niE "leave.*lower.priority|leave.and.lower.priority" "$SKILL_FILE"
|
|
109
|
+
[ "$status" -eq 0 ]
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
@test "SKILL.md Step 0 routes AFK via halt-with-report per ADR-013 Rule 6" {
|
|
113
|
+
# Contract criterion per ADR-013 Rule 6: the non-interactive / AFK branch
|
|
114
|
+
# halts with a report rather than silently choosing.
|
|
115
|
+
run grep -niE "halt.with.report|halt with report|Rule 6 fail.safe" "$SKILL_FILE"
|
|
116
|
+
[ "$status" -eq 0 ]
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
@test "SKILL.md Step 0 cites ADR-013 Rule 6 for AFK fail-safe" {
|
|
120
|
+
# Contract criterion: ADR-013 Rule 6 is named as the authority for the
|
|
121
|
+
# non-interactive halt branch.
|
|
122
|
+
run grep -nE "ADR-013.*Rule 6|Rule 6.*ADR-013" "$SKILL_FILE"
|
|
123
|
+
[ "$status" -eq 0 ]
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
@test "SKILL.md Step 0 emits a structured Prior-Session State report" {
|
|
127
|
+
# Contract criterion: the AFK halt branch surfaces a structured report,
|
|
128
|
+
# not a free-text prose blurb — so the user can act on it on return.
|
|
129
|
+
run grep -niE "Prior.Session State" "$SKILL_FILE"
|
|
130
|
+
[ "$status" -eq 0 ]
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
@test "SKILL.md Non-Interactive Decision Making table covers session-continuity" {
|
|
134
|
+
# Contract criterion: the decision-matrix section surfaces the new branch
|
|
135
|
+
# so adopters of the skill can find the AFK default without reading
|
|
136
|
+
# Step 0 prose in full.
|
|
137
|
+
run grep -niE "Prior.session partial.work|session.continuity.*dirty|Prior-Session State.*AFK" "$SKILL_FILE"
|
|
138
|
+
[ "$status" -eq 0 ]
|
|
139
|
+
}
|
|
@@ -213,3 +213,35 @@ setup() {
|
|
|
213
213
|
run grep -nE "ADR-014" "$SKILL_FILE"
|
|
214
214
|
[ "$status" -eq 0 ]
|
|
215
215
|
}
|
|
216
|
+
|
|
217
|
+
# @problem P083
|
|
218
|
+
# @jtbd JTBD-006
|
|
219
|
+
# @jtbd JTBD-001
|
|
220
|
+
# @jtbd JTBD-101
|
|
221
|
+
# @jtbd JTBD-201
|
|
222
|
+
#
|
|
223
|
+
# STRUCTURAL: tests the SKILL.md content contract per ADR-037's Permitted
|
|
224
|
+
# Exception (doc-lint contract assertion against the contract document itself)
|
|
225
|
+
# — same rationale already covered by the file-header block above. Behavioural
|
|
226
|
+
# alternative would require simulating a `claude -p` iteration subprocess with
|
|
227
|
+
# a large task and observing tool-call traces for absence of ScheduleWakeup;
|
|
228
|
+
# that harness is not yet available at the skill layer (see P081 retrofit path).
|
|
229
|
+
@test "SKILL.md Step 5 iteration prompt forbids ScheduleWakeup (P083 — synchronous-handoff contract)" {
|
|
230
|
+
# P083 (2026-04-21): AFK iter 5 observed an iteration worker call ScheduleWakeup
|
|
231
|
+
# mid-task, abandoning the synchronous-handoff contract (no ITERATION_SUMMARY
|
|
232
|
+
# returned; uncommitted work left in tree; Step 6.75 halted the loop on
|
|
233
|
+
# dirty-for-unknown-reason). The Step 5 iteration prompt body MUST explicitly
|
|
234
|
+
# forbid ScheduleWakeup so LLM-driven workers do not reach for time-deferring
|
|
235
|
+
# primitives — iteration workers are synchronous by contract (ADR-032 AFK
|
|
236
|
+
# iteration-isolation wrapper). Regression guard for the 260768f clause.
|
|
237
|
+
run grep -niE "Do NOT use .?ScheduleWakeup|ScheduleWakeup.{0,80}(must not|not.{0,10}self-reschedule)|not.{0,10}self-reschedule" "$SKILL_FILE"
|
|
238
|
+
[ "$status" -eq 0 ]
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
@test "SKILL.md Step 5 ScheduleWakeup forbidding clause cites P083" {
|
|
242
|
+
# The forbidding clause must cite P083 inline so the contract document is
|
|
243
|
+
# self-documenting — a future contributor removing the clause reads the
|
|
244
|
+
# P083 reference and understands why it exists before deleting it.
|
|
245
|
+
run grep -nE "ScheduleWakeup.{0,120}P083|P083.{0,120}ScheduleWakeup" "$SKILL_FILE"
|
|
246
|
+
[ "$status" -eq 0 ]
|
|
247
|
+
}
|