@windyroad/itil 0.22.0-preview.243 → 0.22.1-preview.246
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/hooks/lib/detectors.sh +4 -0
- package/hooks/lib/session-id.sh +31 -18
- package/hooks/test/itil-assistant-output-review.bats +40 -0
- package/hooks/test/session-id.bats +41 -0
- package/package.json +1 -1
- package/skills/work-problems/SKILL.md +31 -1
- package/skills/work-problems/test/work-problems-no-mid-loop-asking.bats +192 -0
package/hooks/lib/detectors.sh
CHANGED
|
@@ -32,6 +32,10 @@ PROSE_ASK_PATTERNS=(
|
|
|
32
32
|
'\([a-c]\) ?/ ?\([a-c]\)'
|
|
33
33
|
'\(1\) .*\(2\)'
|
|
34
34
|
'Which (do you|option|one|path) .*\?'
|
|
35
|
+
'Awaiting your (direction|input|decision|response|confirmation|answer|reply)'
|
|
36
|
+
'Pending your (direction|input|decision|response|confirmation|answer|reply)'
|
|
37
|
+
'Once you confirm'
|
|
38
|
+
'Awaiting your direction on whether'
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
# Direction-pinning patterns — signals in the user's incoming prompt
|
package/hooks/lib/session-id.sh
CHANGED
|
@@ -88,25 +88,38 @@ get_current_session_id() {
|
|
|
88
88
|
voice-tone
|
|
89
89
|
)
|
|
90
90
|
|
|
91
|
-
local system marker
|
|
91
|
+
local system marker
|
|
92
92
|
for system in "${systems[@]}"; do
|
|
93
|
-
#
|
|
94
|
-
#
|
|
95
|
-
#
|
|
96
|
-
#
|
|
97
|
-
#
|
|
98
|
-
#
|
|
99
|
-
#
|
|
100
|
-
#
|
|
101
|
-
#
|
|
102
|
-
#
|
|
103
|
-
#
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
93
|
+
# Two-axis selection:
|
|
94
|
+
# ACROSS systems — fixed priority order (architect first, then
|
|
95
|
+
# jtbd, ...). The outer for-loop encodes this. The first system
|
|
96
|
+
# with any present marker wins; later systems are not consulted.
|
|
97
|
+
# WITHIN a system — most-recent-mtime wins (`ls -t | head -1`).
|
|
98
|
+
# Multi-session developer machines accumulate one
|
|
99
|
+
# `${system}-announced-${SID}` marker per past session in /tmp;
|
|
100
|
+
# the live session's marker is by construction the most-recently-
|
|
101
|
+
# created one. P124 Phase 2 used first-glob-match (alphabetical),
|
|
102
|
+
# which returned the lexically-first stale UUID when /tmp had
|
|
103
|
+
# accumulated markers from prior sessions — observed regression
|
|
104
|
+
# 2026-04-28 with 103 stale architect markers selecting the
|
|
105
|
+
# wrong UUID, denying the create-gate (P119).
|
|
106
|
+
#
|
|
107
|
+
# Why mtime is safe here even though Phase 1 architect rejected it:
|
|
108
|
+
# The Phase 1 rejection applied to `-reviewed-` markers, which
|
|
109
|
+
# `touch`-refresh on every gate check (ADR-009 sliding TTL +
|
|
110
|
+
# P111 subprocess refresh). Mtime on a `-reviewed-` marker is
|
|
111
|
+
# "last seen", not "first written" — selecting newest-mtime can
|
|
112
|
+
# surface a stale session whose marker was just touch-refreshed.
|
|
113
|
+
# `-announced-` markers are write-once-per-session per ADR-038
|
|
114
|
+
# (no `touch`-refresh, no TTL); their mtime IS the announcing
|
|
115
|
+
# session's first-prompt timestamp. Newest mtime within a single
|
|
116
|
+
# `-announced-` glob unambiguously identifies the live session.
|
|
117
|
+
#
|
|
118
|
+
# Portability: `ls -t` is POSIX (sort by modification time, newest
|
|
119
|
+
# first). 2>/dev/null suppresses "no such file" when the glob
|
|
120
|
+
# expands to nothing under both bash and zsh; head -1 gracefully
|
|
121
|
+
# returns empty in that case.
|
|
122
|
+
marker=$(ls -t "${marker_dir}/${system}-announced-"* 2>/dev/null | head -1)
|
|
110
123
|
if [ -n "$marker" ]; then
|
|
111
124
|
# Strip the prefix to recover the trailing UUID.
|
|
112
125
|
basename "$marker" | sed "s/^${system}-announced-//"
|
|
@@ -134,3 +134,43 @@ JSON
|
|
|
134
134
|
[ "$status" -eq 0 ]
|
|
135
135
|
[[ "$output" == *"stopReason"* ]]
|
|
136
136
|
}
|
|
137
|
+
|
|
138
|
+
# 2026-04-28 regression evidence (P085 reopen, Citation 1).
|
|
139
|
+
# Orchestrator main turn emitted a halt-summary ending with "Awaiting your
|
|
140
|
+
# direction on whether to add it + resume on P123, or end the session."
|
|
141
|
+
# The Stop hook should have caught this binary-choice prose-ask but the
|
|
142
|
+
# pattern list did not match. Detector extension closes the gap.
|
|
143
|
+
@test "review: 'Awaiting your direction on whether ... or ...' (Citation 1 shape) triggers stopReason" {
|
|
144
|
+
write_transcript "ok" "Loop is still halted. Remaining open item: missing changeset for b9da37e. Awaiting your direction on whether to add it + resume on P123, or end the session."
|
|
145
|
+
run run_hook
|
|
146
|
+
[ "$status" -eq 0 ]
|
|
147
|
+
[[ "$output" == *"stopReason"* ]]
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
@test "review: 'Awaiting your input' triggers stopReason nudge" {
|
|
151
|
+
write_transcript "ok" "Plan staged. Awaiting your input on the next step."
|
|
152
|
+
run run_hook
|
|
153
|
+
[ "$status" -eq 0 ]
|
|
154
|
+
[[ "$output" == *"stopReason"* ]]
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
@test "review: 'Pending your decision' triggers stopReason nudge" {
|
|
158
|
+
write_transcript "review" "Refactor scoped to three files. Pending your decision before I continue."
|
|
159
|
+
run run_hook
|
|
160
|
+
[ "$status" -eq 0 ]
|
|
161
|
+
[[ "$output" == *"stopReason"* ]]
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
@test "review: 'Once you confirm' triggers stopReason nudge" {
|
|
165
|
+
write_transcript "look" "Rename ready. Once you confirm, I will proceed with the rename."
|
|
166
|
+
run run_hook
|
|
167
|
+
[ "$status" -eq 0 ]
|
|
168
|
+
[[ "$output" == *"stopReason"* ]]
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
@test "review: 'Awaiting your response' triggers stopReason nudge" {
|
|
172
|
+
write_transcript "go" "Two paths identified. Awaiting your response so I know which to take."
|
|
173
|
+
run run_hook
|
|
174
|
+
[ "$status" -eq 0 ]
|
|
175
|
+
[[ "$output" == *"stopReason"* ]]
|
|
176
|
+
}
|
|
@@ -135,3 +135,44 @@ mark_announced() {
|
|
|
135
135
|
[[ "$zsh_run" != *"shopt: command not found"* ]]
|
|
136
136
|
[[ "$zsh_run" != *"command not found: shopt"* ]]
|
|
137
137
|
}
|
|
138
|
+
|
|
139
|
+
# --- Behavioural contract: within-system mtime selection (P124 Phase 3) ---
|
|
140
|
+
#
|
|
141
|
+
# Phase 2 portability fix (shopt-portable for-loop existence check) shipped
|
|
142
|
+
# 2026-04-28 morning, restoring zsh compatibility but introducing a
|
|
143
|
+
# regression on multi-session developer machines: glob expansion enumerates
|
|
144
|
+
# matches in ASCII-alphabetical order, and the inner "first match wins"
|
|
145
|
+
# heuristic returned the lexically-first stale UUID when /tmp had
|
|
146
|
+
# accumulated `${system}-announced-*` markers from prior sessions. Ticket
|
|
147
|
+
# regression block cites 103 stale architect markers selecting the wrong
|
|
148
|
+
# UUID, denying the create-gate (P119) and forcing a brute-force recovery.
|
|
149
|
+
#
|
|
150
|
+
# Phase 3 selection contract: WITHIN a single system's marker namespace,
|
|
151
|
+
# the most-recent-mtime marker wins. ACROSS systems, the fixed priority
|
|
152
|
+
# order is preserved (asserted by the existing "deterministic priority"
|
|
153
|
+
# test above). `-announced-` markers are write-once-per-session per ADR-038
|
|
154
|
+
# (no `touch`-refresh sliding TTL), so mtime IS the announcing session's
|
|
155
|
+
# first-prompt timestamp — newest mtime unambiguously identifies the live
|
|
156
|
+
# session within that system's marker glob.
|
|
157
|
+
|
|
158
|
+
@test "within-system mtime: newest architect-announced marker wins over older same-system markers" {
|
|
159
|
+
oldest_uuid="00000000-0000-0000-0000-000000000001" # ASCII-first
|
|
160
|
+
middle_uuid="11111111-1111-1111-1111-111111111111"
|
|
161
|
+
newest_uuid="22222222-2222-2222-2222-222222222222" # ASCII-last of the three
|
|
162
|
+
# Fixture intent: alphabetical-first UUID has the OLDEST mtime; the
|
|
163
|
+
# newest-mtime UUID is alphabetically last. Phase 2 first-glob-match
|
|
164
|
+
# would pick `oldest_uuid` (alphabetical first); Phase 3 mtime
|
|
165
|
+
# selection picks `newest_uuid` (most-recent mtime). The sleep
|
|
166
|
+
# boundaries guarantee distinct mtimes across filesystems with
|
|
167
|
+
# 1-second mtime resolution (e.g. older HFS+).
|
|
168
|
+
mark_announced "architect" "$oldest_uuid"
|
|
169
|
+
sleep 1
|
|
170
|
+
mark_announced "architect" "$middle_uuid"
|
|
171
|
+
sleep 1
|
|
172
|
+
mark_announced "architect" "$newest_uuid"
|
|
173
|
+
output=$(discover)
|
|
174
|
+
[[ "$output" == *"$newest_uuid"* ]]
|
|
175
|
+
[[ "$output" != *"$oldest_uuid"* ]]
|
|
176
|
+
[[ "$output" != *"$middle_uuid"* ]]
|
|
177
|
+
[[ "$output" == *"EXIT:0"* ]]
|
|
178
|
+
}
|
package/package.json
CHANGED
|
@@ -270,7 +270,7 @@ rm -f "$ITER_JSON"
|
|
|
270
270
|
|
|
271
271
|
1. **Context**: this is one iteration of the AFK work-problems loop. The user is AFK. The orchestrator selected `P<NNN> (<title>)` as the highest-WSJF actionable ticket.
|
|
272
272
|
2. **Task**: apply the `/wr-itil:manage-problem` workflow for `work highest WSJF problem that can be progressed non-interactively as the user is AFK`. Follow manage-problem SKILL.md verbatim, including architect / jtbd / style-guide / voice-tone gate reviews and the commit gate (manage-problem Step 11). Because this subprocess has the Agent tool in its own surface, the normal review-via-subagent paths work — no inline-verdict fallback needed.
|
|
273
|
-
3. **Constraints**: commit the completed work per ADR-014. Do NOT push, do NOT run `push:watch`, do NOT run `release:watch` — the orchestrator's Step 6.5 owns release cadence. Do NOT invoke `capture-*` background skills (AFK carve-out — ADR-032). Do NOT use `ScheduleWakeup` under any circumstance (P083 — iteration workers must not self-reschedule). **NEVER call `AskUserQuestion` mid-loop in AFK** (P135 / ADR-044): direction / deviation-approval / one-time-override / silent-framework observations queue at `ITERATION_SUMMARY.outstanding_questions` for loop-end batched presentation. Per-iter `AskUserQuestion` calls are sub-contracting framework-resolved decisions back to the user (lazy deferral per Step 2d Ask Hygiene Pass classification). Non-interactive defaults apply per ADR-013 Rule 6 + ADR-044's framework-resolution boundary.
|
|
273
|
+
3. **Constraints**: commit the completed work per ADR-014. Do NOT push, do NOT run `push:watch`, do NOT run `release:watch` — the orchestrator's Step 6.5 owns release cadence. Do NOT invoke `capture-*` background skills (AFK carve-out — ADR-032). Do NOT use `ScheduleWakeup` under any circumstance (P083 — iteration workers must not self-reschedule). **NEVER call `AskUserQuestion` mid-loop in AFK** (P135 / ADR-044): direction / deviation-approval / one-time-override / silent-framework observations queue at `ITERATION_SUMMARY.outstanding_questions` for loop-end batched presentation. Per-iter `AskUserQuestion` calls are sub-contracting framework-resolved decisions back to the user (lazy deferral per Step 2d Ask Hygiene Pass classification). Non-interactive defaults apply per ADR-013 Rule 6 + ADR-044's framework-resolution boundary. **Treat the user as transient** (P130): even when observably present at orchestrator dispatch time, the user may answer one question and disappear for hours; presence is not a reliable signal and is not the goal. The iter's job is to progress the ticket and accumulate questions for batched surfacing — not to ask "is it OK to proceed?" at a mechanical-stage boundary.
|
|
274
274
|
4. **Retro-on-exit (P086)**: before emitting `ITERATION_SUMMARY`, invoke `/wr-retrospective:run-retro`. Retro runs INSIDE this subprocess so its Step 2b pipeline-instability scan has access to the iteration's rich tool-call history (hook misbehaviour, repeat-workaround patterns, subagent-delegation friction, release-path instability). Retro may create tickets or update `docs/BRIEFING.md` — run-retro commits its own work per ADR-014; any tickets it creates ride into either the iteration's own commit (if retro runs before the main commit) or a retro-owned follow-up commit, and the orchestrator picks them up on the next Step 1 scan. Proceed to `ITERATION_SUMMARY` emission regardless of retro findings — retro is non-blocking (do not block on retro): if retro fails or surfaces findings, the iteration still returns a summary so the AFK loop does not silently halt on a flaky retro run.
|
|
275
275
|
5. **Output**: end the final message with the `ITERATION_SUMMARY` block defined below — this is how the orchestrator consumes the iteration's result.
|
|
276
276
|
|
|
@@ -505,6 +505,36 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
|
|
|
505
505
|
| Halt-path final summary with accumulated user-answerable skips (CI failure / Rule 5 above-appetite / dirty-unknown / session-continuity / fetch failure) | Run Step 2.5b's surfacing routine before emitting the halt path's final AFK summary. Step 2.5b is gated on ≥1 accumulated user-answerable skip — empty-skip halts skip the routine. Step 2.5b surfaces *prior-iter accumulated user-answerable skips only*; it does NOT ask the user how to remediate the halt cause itself (CI failure / above-appetite state / dirty-unknown state remain halt-with-bug-signal). Per ADR-013 Rule 1 + ADR-032 + P126 (`halt-paths-must-route-design-questions-through-Step-2.5b`). |
|
|
506
506
|
| Unexpected dirty state between iterations | Halt the loop. Report the `git status --porcelain` output, the last iteration's reported outcome, and the divergence — per P036 (Step 6.75). Run Step 2.5b before emitting the halt summary if ≥1 accumulated user-answerable skip from prior iters (P126). Do NOT attempt non-interactive recovery of the dirty state itself. |
|
|
507
507
|
| External root cause detected at Open → Known Error, or at park with `upstream-blocked` reason | Append the stable `- **Upstream report pending** — external dependency identified; invoke /wr-itil:report-upstream when ready` marker to the ticket's `## Related` section; do NOT auto-invoke `/wr-itil:report-upstream` (Step 6 security-path branch is interactive — per ADR-024 Consequences). Use the already-noted grep check to avoid duplicate lines. Per P063 + ADR-013 Rule 6. |
|
|
508
|
+
| Mid-loop ask between iters in the orchestrator's main turn | Forbidden except at framework-prescribed halt points (Step 0 session-continuity / fetch-failure halt; Step 2.5 / 2.5b loop-end emit; Step 6.5 above-appetite Rule 5 halt; Step 6.5 CI-failure / release:watch halt; Step 6.75 dirty-for-unknown-reason halt). The loop's purpose is **progress + accumulation**; mechanical-stage transitions between iters are framework-resolved and MUST NOT prompt the user. Per ADR-044 framework-resolution boundary + ADR-013 Rule 1 (as amended by ADR-044) + P130. |
|
|
509
|
+
|
|
510
|
+
### Mid-loop ask discipline (orchestrator main turn) — P130
|
|
511
|
+
|
|
512
|
+
The orchestrator MUST NOT call `AskUserQuestion` between iterations except at the framework-prescribed user-interaction halt points listed below. The loop's purpose is **progress + accumulation** — progress every ticket the agent can advance autonomously, accumulate user-answerable questions as a side-effect, and surface the accumulated batch only at a halt point. This rule applies whether the user is observably present or not, because **presence-detection is unreliable** and is not the goal — the user may answer one question and disappear for hours; the orchestrator's job is to keep advancing the backlog and stage the user-interaction surface for whenever the user actually returns. Treat the user as transient.
|
|
513
|
+
|
|
514
|
+
**Framework-prescribed halt points (the only orchestrator-main-turn surfaces where `AskUserQuestion` is permitted):**
|
|
515
|
+
|
|
516
|
+
- **Step 0 session-continuity halt** — Prior-Session State report; user routes resume / discard / leave-and-lower / halt (interactive branch only; AFK branch halts with the structured report per ADR-013 Rule 6).
|
|
517
|
+
- **Step 0 fetch-failure halt** — `git fetch origin` network failure; halt-with-report so the user retries on return.
|
|
518
|
+
- **Step 2.5 / Step 2.5b loop-end emit** — accumulated `outstanding_questions` queue presented as batched `AskUserQuestion` (or fallback Outstanding Design Questions table per ADR-013 Rule 6). This is the framework's prescribed user-interaction point; do NOT dilute it by asking earlier.
|
|
519
|
+
- **Step 6.5 above-appetite Rule 5 halt** — auto-apply loop exhausted without convergence; halt-with-batched-questions per the Step 2.5b cross-reference (Step 2.5b surfaces *prior-iter accumulated user-answerable skips only* — the halt-causing scorer-gap remains a halt-with-bug-signal per ADR-042 Rule 5).
|
|
520
|
+
- **Step 6.5 CI-failure / `release:watch` failure halt** — push:watch or release:watch failed; halt-with-batched-questions per the Step 2.5b cross-reference.
|
|
521
|
+
- **Step 6.75 dirty-for-unknown-reason halt** — `git status --porcelain` divergence; halt-with-batched-questions per the Step 2.5b cross-reference.
|
|
522
|
+
|
|
523
|
+
**No mid-iter ask points.** Every other point in the orchestrator's main turn (between Step 5 dispatch completing and Step 6.5 release-cadence check; between Step 6.75 verification and Step 7 loop-back; between Step 7 and Step 1 next-iteration; between consecutive iters generally) is a mechanical-stage transition that the framework has already resolved. Do NOT introduce ad-hoc `AskUserQuestion` calls at those points to confirm "is it OK to proceed?" or "want me to start the next iter?" — proceeding IS the framework-resolved default. Continue iterating until quota or stop-condition #1/#2/#3 fires.
|
|
524
|
+
|
|
525
|
+
**Accumulated-question discipline at surface time** (per ADR-044's six-class authority taxonomy — questions that reach the user must be load-bearing):
|
|
526
|
+
|
|
527
|
+
- **Direction-setting only** — questions that ONLY the user can answer because they reflect goals, intent, or trade-offs the framework has not yet captured. Other accumulated observations (deviation-approval, one-time-override, silent-framework, taste, correction-followup) follow the same shape as the deviation-candidate schema in Step 5's `outstanding_questions` contract.
|
|
528
|
+
- **No BUFD** — don't pre-judge architectural decisions before evidence accumulates. Small, actionable questions; not galaxy-brain ones. The deviation-candidate surface (per ADR-044's anti-BUFD-for-framework-evolution clause) is the place where iter-discovered misfits accumulate; the user resolves with full context at loop end.
|
|
529
|
+
- **No questions answerable by research / exploration / experimentation** — the agent should prototype, read code, run experiments to answer those itself rather than sub-contracting routine investigative work back to the user. The user is the source for genuine direction-setting decisions, not for "what does this hook do" or "which file holds X" — those are research questions the agent owns.
|
|
530
|
+
|
|
531
|
+
**Cross-references:**
|
|
532
|
+
|
|
533
|
+
- **Step 5's iteration-prompt body** carries the per-subprocess "Do not call `AskUserQuestion`" constraint; this subsection carries the orchestrator-main-turn equivalent. Together they enforce the same discipline at both the subprocess layer and the main-turn layer end-to-end.
|
|
534
|
+
- **ADR-044** is the parent decision narrowing ADR-013 Rule 1 to framework-unresolved decisions; this subsection is one of its load-bearing implementation surfaces.
|
|
535
|
+
- **ADR-013 Rule 1** (as amended by ADR-044) restricts `AskUserQuestion` to framework-unresolved decisions; the framework-prescribed halt enumeration above is the orchestrator-layer interpretation of that narrowing.
|
|
536
|
+
- **ADR-013 Rule 6** is the non-interactive fail-safe — when `AskUserQuestion` is unavailable (restricted permission mode, hook-disabled tool surface), the framework-prescribed halts fall back to structured-summary table emission rather than skipping the user-interaction.
|
|
537
|
+
- **ADR-032** subprocess-boundary contract is unchanged — this subsection is orchestrator-main-turn discipline; the iteration-subprocess dispatch shape (P084 + P121 + P086 + P089) is untouched.
|
|
508
538
|
|
|
509
539
|
## Edge Cases
|
|
510
540
|
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
# P130: /wr-itil:work-problems orchestrator must NOT call AskUserQuestion
|
|
4
|
+
# mid-loop. The loop's purpose is progress + accumulation; user-interaction
|
|
5
|
+
# is reserved for framework-prescribed halt points (Step 0, Step 2.5/2.5b,
|
|
6
|
+
# Step 6.5 Rule 5 + CI-failure, Step 6.75 dirty-unknown). The user is
|
|
7
|
+
# transient — presence-detection is unreliable and is not the goal; the
|
|
8
|
+
# orchestrator must accumulate questions and surface them at halt points.
|
|
9
|
+
#
|
|
10
|
+
# Per the user's reframe (ticket P130 lines 95-123), the original dual-mode
|
|
11
|
+
# dispatch fix-shape was rejected. The fix is SKILL.md prose discipline:
|
|
12
|
+
# enumerate the framework-prescribed halt points + assert no mid-iter asks
|
|
13
|
+
# elsewhere + state the accumulated-question discipline (direction-setting
|
|
14
|
+
# only; no BUFD; no questions answerable by research/exploration/
|
|
15
|
+
# experimentation).
|
|
16
|
+
#
|
|
17
|
+
# Doc-lint contract assertions per ADR-037 Permitted Exception (structural
|
|
18
|
+
# checks on prose contract, sibling shape with P126 / P135 fixtures).
|
|
19
|
+
#
|
|
20
|
+
# @problem P130
|
|
21
|
+
# @adr ADR-044 (Decision-Delegation Contract — framework-resolution boundary)
|
|
22
|
+
# @adr ADR-013 Rule 1 (as amended by ADR-044) + Rule 6 (non-interactive fail-safe)
|
|
23
|
+
# @adr ADR-032 (subprocess-boundary contract — unchanged)
|
|
24
|
+
# @adr ADR-037 (skill-testing-strategy — Permitted Exception for prose contract)
|
|
25
|
+
# @jtbd JTBD-006 (Progress the Backlog While I'm Away)
|
|
26
|
+
# @jtbd JTBD-001 (Enforce Governance Without Slowing Down)
|
|
27
|
+
|
|
28
|
+
setup() {
|
|
29
|
+
REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../../.." && pwd)"
|
|
30
|
+
SKILL_MD="$REPO_ROOT/packages/itil/skills/work-problems/SKILL.md"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
@test "work-problems P130: SKILL.md exists" {
|
|
34
|
+
[ -f "$SKILL_MD" ]
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# ── Mid-loop ask discipline subsection presence ─────────────────────────────
|
|
38
|
+
|
|
39
|
+
@test "work-problems P130: SKILL.md names a 'Mid-loop ask discipline' subsection (orchestrator main turn)" {
|
|
40
|
+
# The architect-approved placement is inside Non-Interactive Decision
|
|
41
|
+
# Making section as a subsection. The heading must exist as a navigable
|
|
42
|
+
# markdown anchor so cross-references resolve.
|
|
43
|
+
run grep -nE '^#{3,4} Mid-loop ask discipline' "$SKILL_MD"
|
|
44
|
+
[ "$status" -eq 0 ]
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
@test "work-problems P130: subsection scopes itself to the orchestrator main turn (not the subprocess)" {
|
|
48
|
+
# The subprocess-layer constraint (Step 5's iteration-prompt body)
|
|
49
|
+
# already exists; this subsection is the orchestrator-main-turn
|
|
50
|
+
# equivalent. The scope must be explicit so future readers do not
|
|
51
|
+
# confuse the layers.
|
|
52
|
+
run grep -nE 'orchestrator main turn|orchestrator.s main turn' "$SKILL_MD"
|
|
53
|
+
[ "$status" -eq 0 ]
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# ── Core invariant: no mid-iter AskUserQuestion ─────────────────────────────
|
|
57
|
+
|
|
58
|
+
@test "work-problems P130: subsection asserts no mid-loop AskUserQuestion between iters except at framework halts" {
|
|
59
|
+
# The load-bearing rule. The orchestrator must NOT call AskUserQuestion
|
|
60
|
+
# between iterations EXCEPT at the framework-prescribed halt points.
|
|
61
|
+
run grep -nE 'MUST NOT call .?AskUserQuestion.? between iter' "$SKILL_MD"
|
|
62
|
+
[ "$status" -eq 0 ]
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@test "work-problems P130: subsection cites the loop's purpose as progress + accumulation" {
|
|
66
|
+
# The reframed direction (ticket lines 95-123): the loop's purpose is
|
|
67
|
+
# progress + accumulation, not interactive-vs-AFK routing. This phrasing
|
|
68
|
+
# must appear so future authors do not re-introduce mid-loop asks under
|
|
69
|
+
# 'the user might be present' rationalisations.
|
|
70
|
+
run grep -nE 'progress \+ accumulation|progress and accumulation' "$SKILL_MD"
|
|
71
|
+
[ "$status" -eq 0 ]
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
@test "work-problems P130: subsection states presence-detection is unreliable and not the goal" {
|
|
75
|
+
# The transient-user framing. Without this, future authors may try to
|
|
76
|
+
# add presence-detection (the originally-rejected fix-shape).
|
|
77
|
+
run grep -nE 'presence[- ]detection is unreliable|presence is unreliable|user as transient|treat the user as transient' "$SKILL_MD"
|
|
78
|
+
[ "$status" -eq 0 ]
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# ── Halt-point enumeration ──────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
@test "work-problems P130: subsection enumerates Step 0 session-continuity as a permitted halt point" {
|
|
84
|
+
run grep -nE 'Step 0.*session[- ]continuity' "$SKILL_MD"
|
|
85
|
+
[ "$status" -eq 0 ]
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
@test "work-problems P130: subsection enumerates Step 2.5 / 2.5b loop-end emit as a permitted halt point" {
|
|
89
|
+
run grep -nE 'Step 2\.5.*loop[- ]end|Step 2\.5b' "$SKILL_MD"
|
|
90
|
+
[ "$status" -eq 0 ]
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
@test "work-problems P130: subsection enumerates Step 6.5 above-appetite Rule 5 halt as a permitted halt point" {
|
|
94
|
+
run grep -nE 'Step 6\.5.*Rule 5|above[- ]appetite Rule 5' "$SKILL_MD"
|
|
95
|
+
[ "$status" -eq 0 ]
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
@test "work-problems P130: subsection enumerates Step 6.75 dirty-for-unknown-reason as a permitted halt point" {
|
|
99
|
+
run grep -nE 'Step 6\.75.*dirty[- ]for[- ]unknown|Step 6\.75.*unknown reason' "$SKILL_MD"
|
|
100
|
+
[ "$status" -eq 0 ]
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# ── Accumulated-question discipline at surface time ─────────────────────────
|
|
104
|
+
|
|
105
|
+
@test "work-problems P130: subsection states direction-setting only at surface time" {
|
|
106
|
+
# ADR-044's six-class taxonomy: only the user-answerable categories
|
|
107
|
+
# qualify. Direction-setting is the canonical example.
|
|
108
|
+
run grep -nE 'Direction[- ]setting only|direction[- ]setting only' "$SKILL_MD"
|
|
109
|
+
[ "$status" -eq 0 ]
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
@test "work-problems P130: subsection forbids BUFD-style questions" {
|
|
113
|
+
# No big-design-up-front; small actionable questions. Anti-BUFD is a
|
|
114
|
+
# project-wide value (cited in ADR-044's anti-BUFD-for-framework-
|
|
115
|
+
# evolution clause).
|
|
116
|
+
run grep -nE 'No BUFD|no BUFD' "$SKILL_MD"
|
|
117
|
+
[ "$status" -eq 0 ]
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
@test "work-problems P130: subsection forbids questions answerable by research / exploration / experimentation" {
|
|
121
|
+
# The agent should investigate itself rather than sub-contract routine
|
|
122
|
+
# work back to the user.
|
|
123
|
+
run grep -nE 'research.*exploration.*experimentation|prototype.*read code.*experiments' "$SKILL_MD"
|
|
124
|
+
[ "$status" -eq 0 ]
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# ── ADR citations ──────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
@test "work-problems P130: subsection cites ADR-044 (primary) as the framework-resolution authority" {
|
|
130
|
+
# ADR-044 is the parent decision narrowing ADR-013 Rule 1 to framework-
|
|
131
|
+
# unresolved decisions. The subsection must cite it so future readers
|
|
132
|
+
# follow the cross-reference for the full picture.
|
|
133
|
+
run grep -nE 'ADR-044' "$SKILL_MD"
|
|
134
|
+
[ "$status" -eq 0 ]
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
@test "work-problems P130: subsection cites ADR-013 Rule 1 as amended by ADR-044" {
|
|
138
|
+
# The narrowing happens at ADR-013 Rule 1's amendment. The subsection
|
|
139
|
+
# must name the rule so the chain is traceable.
|
|
140
|
+
run grep -nE 'ADR-013.*Rule 1|Rule 1.*amended by ADR-044' "$SKILL_MD"
|
|
141
|
+
[ "$status" -eq 0 ]
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@test "work-problems P130: subsection cites ADR-013 Rule 6 as the non-interactive fail-safe underlying constraint" {
|
|
145
|
+
# When AFK, Rule 6 is the underlying constraint that forbids
|
|
146
|
+
# AskUserQuestion. The subsection cites it so the subprocess-boundary
|
|
147
|
+
# connection is explicit.
|
|
148
|
+
run grep -nE 'ADR-013.*Rule 6|Rule 6.*non[- ]interactive fail[- ]safe' "$SKILL_MD"
|
|
149
|
+
[ "$status" -eq 0 ]
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# ── ADR-032 subprocess-boundary unchanged ──────────────────────────────────
|
|
153
|
+
|
|
154
|
+
@test "work-problems P130: subsection cross-references ADR-032 subprocess-boundary contract as unchanged" {
|
|
155
|
+
# The architect's flag: this fix does NOT amend ADR-032. The subsection
|
|
156
|
+
# must say so explicitly to prevent future readers thinking the
|
|
157
|
+
# subprocess-boundary contract is in scope.
|
|
158
|
+
run grep -nE 'subprocess[- ]boundary contract.*unchanged|ADR-032.*unchanged' "$SKILL_MD"
|
|
159
|
+
[ "$status" -eq 0 ]
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# ── Subprocess vs orchestrator layer cross-reference ───────────────────────
|
|
163
|
+
|
|
164
|
+
@test "work-problems P130: subsection cross-references Step 5's per-subprocess constraint as the parallel layer" {
|
|
165
|
+
# The orchestrator-main-turn discipline parallels Step 5's iteration-
|
|
166
|
+
# prompt-body 'Do not call AskUserQuestion' constraint. Both layers
|
|
167
|
+
# together enforce the discipline end-to-end.
|
|
168
|
+
run grep -nE "Step 5.*iteration[- ]prompt.*AskUserQuestion|Step 5's iteration[- ]prompt" "$SKILL_MD"
|
|
169
|
+
[ "$status" -eq 0 ]
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# ── Step 5's iteration-prompt body augmented with transient-user framing ───
|
|
173
|
+
|
|
174
|
+
@test "work-problems P130: Step 5 iteration-prompt body includes transient-user framing" {
|
|
175
|
+
# The reframed direction's core insight: the user is transient even
|
|
176
|
+
# when present. The iteration-prompt body (already-existing
|
|
177
|
+
# 'NEVER call AskUserQuestion mid-loop in AFK') gains the transient
|
|
178
|
+
# framing so future iter authors understand why the constraint is
|
|
179
|
+
# absolute.
|
|
180
|
+
run grep -nE 'transient|disappear for hours' "$SKILL_MD"
|
|
181
|
+
[ "$status" -eq 0 ]
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# ── Decision Table row reflects the discipline ─────────────────────────────
|
|
185
|
+
|
|
186
|
+
@test "work-problems P130: Non-Interactive Decision Making table carries a row for mid-loop ask discipline" {
|
|
187
|
+
# The decisions table (line ~487) summarises the orchestrator's
|
|
188
|
+
# non-interactive defaults. A row naming mid-loop ask discipline keeps
|
|
189
|
+
# the table consistent with the new subsection.
|
|
190
|
+
run grep -nE '\| Mid-loop ask|mid[- ]loop AskUserQuestion|mid-loop ask between iter' "$SKILL_MD"
|
|
191
|
+
[ "$status" -eq 0 ]
|
|
192
|
+
}
|