@windyroad/itil 0.22.0-preview.243 → 0.22.1-preview.246

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  {
2
2
  "name": "wr-itil",
3
- "version": "0.22.0",
3
+ "version": "0.22.1",
4
4
  "description": "ITIL-aligned IT service management for Claude Code"
5
5
  }
@@ -32,6 +32,10 @@ PROSE_ASK_PATTERNS=(
32
32
  '\([a-c]\) ?/ ?\([a-c]\)'
33
33
  '\(1\) .*\(2\)'
34
34
  'Which (do you|option|one|path) .*\?'
35
+ 'Awaiting your (direction|input|decision|response|confirmation|answer|reply)'
36
+ 'Pending your (direction|input|decision|response|confirmation|answer|reply)'
37
+ 'Once you confirm'
38
+ 'Awaiting your direction on whether'
35
39
  )
36
40
 
37
41
  # Direction-pinning patterns — signals in the user's incoming prompt
@@ -88,25 +88,38 @@ get_current_session_id() {
88
88
  voice-tone
89
89
  )
90
90
 
91
- local system marker f
91
+ local system marker
92
92
  for system in "${systems[@]}"; do
93
- # Portable existence-check loop. The previous implementation used
94
- # `shopt -s nullglob` in a subshell — bash-only, errors under zsh
95
- # with `command not found: shopt` and lets the glob fall through to
96
- # the literal unmatched-pattern string, returning a wrong UUID.
97
- # P124 Phase 2: the for-loop existence check works identically
98
- # under bash, zsh, and POSIX dash. The first existing match wins
99
- # (selection by fixed marker-system priority order, NOT mtime
100
- # `-announced-` markers are write-once-per-session per ADR-038
101
- # so any present marker is the active SID; mtime selection would
102
- # reintroduce the `-reviewed-` marker fragility ADR-009 + P111
103
- # describe).
104
- marker=""
105
- for f in "${marker_dir}/${system}-announced-"*; do
106
- [ -e "$f" ] || continue
107
- marker="$f"
108
- break
109
- done
93
+ # Two-axis selection:
94
+ # ACROSS systems fixed priority order (architect first, then
95
+ # jtbd, ...). The outer for-loop encodes this. The first system
96
+ # with any present marker wins; later systems are not consulted.
97
+ # WITHIN a system most-recent-mtime wins (`ls -t | head -1`).
98
+ # Multi-session developer machines accumulate one
99
+ # `${system}-announced-${SID}` marker per past session in /tmp;
100
+ # the live session's marker is by construction the most-recently-
101
+ # created one. P124 Phase 2 used first-glob-match (alphabetical),
102
+ # which returned the lexically-first stale UUID when /tmp had
103
+ # accumulated markers from prior sessions — observed regression
104
+ # 2026-04-28 with 103 stale architect markers selecting the
105
+ # wrong UUID, denying the create-gate (P119).
106
+ #
107
+ # Why mtime is safe here even though Phase 1 architect rejected it:
108
+ # The Phase 1 rejection applied to `-reviewed-` markers, which
109
+ # `touch`-refresh on every gate check (ADR-009 sliding TTL +
110
+ # P111 subprocess refresh). Mtime on a `-reviewed-` marker is
111
+ # "last seen", not "first written" — selecting newest-mtime can
112
+ # surface a stale session whose marker was just touch-refreshed.
113
+ # `-announced-` markers are write-once-per-session per ADR-038
114
+ # (no `touch`-refresh, no TTL); their mtime IS the announcing
115
+ # session's first-prompt timestamp. Newest mtime within a single
116
+ # `-announced-` glob unambiguously identifies the live session.
117
+ #
118
+ # Portability: `ls -t` is POSIX (sort by modification time, newest
119
+ # first). 2>/dev/null suppresses "no such file" when the glob
120
+ # expands to nothing under both bash and zsh; head -1 gracefully
121
+ # returns empty in that case.
122
+ marker=$(ls -t "${marker_dir}/${system}-announced-"* 2>/dev/null | head -1)
110
123
  if [ -n "$marker" ]; then
111
124
  # Strip the prefix to recover the trailing UUID.
112
125
  basename "$marker" | sed "s/^${system}-announced-//"
@@ -134,3 +134,43 @@ JSON
134
134
  [ "$status" -eq 0 ]
135
135
  [[ "$output" == *"stopReason"* ]]
136
136
  }
137
+
138
+ # 2026-04-28 regression evidence (P085 reopen, Citation 1).
139
+ # Orchestrator main turn emitted a halt-summary ending with "Awaiting your
140
+ # direction on whether to add it + resume on P123, or end the session."
141
+ # The Stop hook should have caught this binary-choice prose-ask but the
142
+ # pattern list did not match. Detector extension closes the gap.
143
+ @test "review: 'Awaiting your direction on whether ... or ...' (Citation 1 shape) triggers stopReason" {
144
+ write_transcript "ok" "Loop is still halted. Remaining open item: missing changeset for b9da37e. Awaiting your direction on whether to add it + resume on P123, or end the session."
145
+ run run_hook
146
+ [ "$status" -eq 0 ]
147
+ [[ "$output" == *"stopReason"* ]]
148
+ }
149
+
150
+ @test "review: 'Awaiting your input' triggers stopReason nudge" {
151
+ write_transcript "ok" "Plan staged. Awaiting your input on the next step."
152
+ run run_hook
153
+ [ "$status" -eq 0 ]
154
+ [[ "$output" == *"stopReason"* ]]
155
+ }
156
+
157
+ @test "review: 'Pending your decision' triggers stopReason nudge" {
158
+ write_transcript "review" "Refactor scoped to three files. Pending your decision before I continue."
159
+ run run_hook
160
+ [ "$status" -eq 0 ]
161
+ [[ "$output" == *"stopReason"* ]]
162
+ }
163
+
164
+ @test "review: 'Once you confirm' triggers stopReason nudge" {
165
+ write_transcript "look" "Rename ready. Once you confirm, I will proceed with the rename."
166
+ run run_hook
167
+ [ "$status" -eq 0 ]
168
+ [[ "$output" == *"stopReason"* ]]
169
+ }
170
+
171
+ @test "review: 'Awaiting your response' triggers stopReason nudge" {
172
+ write_transcript "go" "Two paths identified. Awaiting your response so I know which to take."
173
+ run run_hook
174
+ [ "$status" -eq 0 ]
175
+ [[ "$output" == *"stopReason"* ]]
176
+ }
@@ -135,3 +135,44 @@ mark_announced() {
135
135
  [[ "$zsh_run" != *"shopt: command not found"* ]]
136
136
  [[ "$zsh_run" != *"command not found: shopt"* ]]
137
137
  }
138
+
139
+ # --- Behavioural contract: within-system mtime selection (P124 Phase 3) ---
140
+ #
141
+ # Phase 2 portability fix (shopt-portable for-loop existence check) shipped
142
+ # 2026-04-28 morning, restoring zsh compatibility but introducing a
143
+ # regression on multi-session developer machines: glob expansion enumerates
144
+ # matches in ASCII-alphabetical order, and the inner "first match wins"
145
+ # heuristic returned the lexically-first stale UUID when /tmp had
146
+ # accumulated `${system}-announced-*` markers from prior sessions. Ticket
147
+ # regression block cites 103 stale architect markers selecting the wrong
148
+ # UUID, denying the create-gate (P119) and forcing a brute-force recovery.
149
+ #
150
+ # Phase 3 selection contract: WITHIN a single system's marker namespace,
151
+ # the most-recent-mtime marker wins. ACROSS systems, the fixed priority
152
+ # order is preserved (asserted by the existing "deterministic priority"
153
+ # test above). `-announced-` markers are write-once-per-session per ADR-038
154
+ # (no `touch`-refresh sliding TTL), so mtime IS the announcing session's
155
+ # first-prompt timestamp — newest mtime unambiguously identifies the live
156
+ # session within that system's marker glob.
157
+
158
+ @test "within-system mtime: newest architect-announced marker wins over older same-system markers" {
159
+ oldest_uuid="00000000-0000-0000-0000-000000000001" # ASCII-first
160
+ middle_uuid="11111111-1111-1111-1111-111111111111"
161
+ newest_uuid="22222222-2222-2222-2222-222222222222" # ASCII-last of the three
162
+ # Fixture intent: alphabetical-first UUID has the OLDEST mtime; the
163
+ # newest-mtime UUID is alphabetically last. Phase 2 first-glob-match
164
+ # would pick `oldest_uuid` (alphabetical first); Phase 3 mtime
165
+ # selection picks `newest_uuid` (most-recent mtime). The sleep
166
+ # boundaries guarantee distinct mtimes across filesystems with
167
+ # 1-second mtime resolution (e.g. older HFS+).
168
+ mark_announced "architect" "$oldest_uuid"
169
+ sleep 1
170
+ mark_announced "architect" "$middle_uuid"
171
+ sleep 1
172
+ mark_announced "architect" "$newest_uuid"
173
+ output=$(discover)
174
+ [[ "$output" == *"$newest_uuid"* ]]
175
+ [[ "$output" != *"$oldest_uuid"* ]]
176
+ [[ "$output" != *"$middle_uuid"* ]]
177
+ [[ "$output" == *"EXIT:0"* ]]
178
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@windyroad/itil",
3
- "version": "0.22.0-preview.243",
3
+ "version": "0.22.1-preview.246",
4
4
  "description": "ITIL-aligned IT service management for Claude Code (problem, and future incident/change skills)",
5
5
  "bin": {
6
6
  "windyroad-itil": "./bin/install.mjs"
@@ -270,7 +270,7 @@ rm -f "$ITER_JSON"
270
270
 
271
271
  1. **Context**: this is one iteration of the AFK work-problems loop. The user is AFK. The orchestrator selected `P<NNN> (<title>)` as the highest-WSJF actionable ticket.
272
272
  2. **Task**: apply the `/wr-itil:manage-problem` workflow for `work highest WSJF problem that can be progressed non-interactively as the user is AFK`. Follow manage-problem SKILL.md verbatim, including architect / jtbd / style-guide / voice-tone gate reviews and the commit gate (manage-problem Step 11). Because this subprocess has the Agent tool in its own surface, the normal review-via-subagent paths work — no inline-verdict fallback needed.
273
- 3. **Constraints**: commit the completed work per ADR-014. Do NOT push, do NOT run `push:watch`, do NOT run `release:watch` — the orchestrator's Step 6.5 owns release cadence. Do NOT invoke `capture-*` background skills (AFK carve-out — ADR-032). Do NOT use `ScheduleWakeup` under any circumstance (P083 — iteration workers must not self-reschedule). **NEVER call `AskUserQuestion` mid-loop in AFK** (P135 / ADR-044): direction / deviation-approval / one-time-override / silent-framework observations queue at `ITERATION_SUMMARY.outstanding_questions` for loop-end batched presentation. Per-iter `AskUserQuestion` calls are sub-contracting framework-resolved decisions back to the user (lazy deferral per Step 2d Ask Hygiene Pass classification). Non-interactive defaults apply per ADR-013 Rule 6 + ADR-044's framework-resolution boundary.
273
+ 3. **Constraints**: commit the completed work per ADR-014. Do NOT push, do NOT run `push:watch`, do NOT run `release:watch` — the orchestrator's Step 6.5 owns release cadence. Do NOT invoke `capture-*` background skills (AFK carve-out — ADR-032). Do NOT use `ScheduleWakeup` under any circumstance (P083 — iteration workers must not self-reschedule). **NEVER call `AskUserQuestion` mid-loop in AFK** (P135 / ADR-044): direction / deviation-approval / one-time-override / silent-framework observations queue at `ITERATION_SUMMARY.outstanding_questions` for loop-end batched presentation. Per-iter `AskUserQuestion` calls are sub-contracting framework-resolved decisions back to the user (lazy deferral per Step 2d Ask Hygiene Pass classification). Non-interactive defaults apply per ADR-013 Rule 6 + ADR-044's framework-resolution boundary. **Treat the user as transient** (P130): even when observably present at orchestrator dispatch time, the user may answer one question and disappear for hours; presence is not a reliable signal and is not the goal. The iter's job is to progress the ticket and accumulate questions for batched surfacing — not to ask "is it OK to proceed?" at a mechanical-stage boundary.
274
274
  4. **Retro-on-exit (P086)**: before emitting `ITERATION_SUMMARY`, invoke `/wr-retrospective:run-retro`. Retro runs INSIDE this subprocess so its Step 2b pipeline-instability scan has access to the iteration's rich tool-call history (hook misbehaviour, repeat-workaround patterns, subagent-delegation friction, release-path instability). Retro may create tickets or update `docs/BRIEFING.md` — run-retro commits its own work per ADR-014; any tickets it creates ride into either the iteration's own commit (if retro runs before the main commit) or a retro-owned follow-up commit, and the orchestrator picks them up on the next Step 1 scan. Proceed to `ITERATION_SUMMARY` emission regardless of retro findings — retro is non-blocking (do not block on retro): if retro fails or surfaces findings, the iteration still returns a summary so the AFK loop does not silently halt on a flaky retro run.
275
275
  5. **Output**: end the final message with the `ITERATION_SUMMARY` block defined below — this is how the orchestrator consumes the iteration's result.
276
276
 
@@ -505,6 +505,36 @@ When `AskUserQuestion` is unavailable or the user is AFK, the skill (and the del
505
505
  | Halt-path final summary with accumulated user-answerable skips (CI failure / Rule 5 above-appetite / dirty-unknown / session-continuity / fetch failure) | Run Step 2.5b's surfacing routine before emitting the halt path's final AFK summary. Step 2.5b is gated on ≥1 accumulated user-answerable skip — empty-skip halts skip the routine. Step 2.5b surfaces *prior-iter accumulated user-answerable skips only*; it does NOT ask the user how to remediate the halt cause itself (CI failure / above-appetite state / dirty-unknown state remain halt-with-bug-signal). Per ADR-013 Rule 1 + ADR-032 + P126 (`halt-paths-must-route-design-questions-through-Step-2.5b`). |
506
506
  | Unexpected dirty state between iterations | Halt the loop. Report the `git status --porcelain` output, the last iteration's reported outcome, and the divergence — per P036 (Step 6.75). Run Step 2.5b before emitting the halt summary if ≥1 accumulated user-answerable skip from prior iters (P126). Do NOT attempt non-interactive recovery of the dirty state itself. |
507
507
  | External root cause detected at Open → Known Error, or at park with `upstream-blocked` reason | Append the stable `- **Upstream report pending** — external dependency identified; invoke /wr-itil:report-upstream when ready` marker to the ticket's `## Related` section; do NOT auto-invoke `/wr-itil:report-upstream` (Step 6 security-path branch is interactive — per ADR-024 Consequences). Use the already-noted grep check to avoid duplicate lines. Per P063 + ADR-013 Rule 6. |
508
+ | Mid-loop ask between iters in the orchestrator's main turn | Forbidden except at framework-prescribed halt points (Step 0 session-continuity / fetch-failure halt; Step 2.5 / 2.5b loop-end emit; Step 6.5 above-appetite Rule 5 halt; Step 6.5 CI-failure / release:watch halt; Step 6.75 dirty-for-unknown-reason halt). The loop's purpose is **progress + accumulation**; mechanical-stage transitions between iters are framework-resolved and MUST NOT prompt the user. Per ADR-044 framework-resolution boundary + ADR-013 Rule 1 (as amended by ADR-044) + P130. |
509
+
510
+ ### Mid-loop ask discipline (orchestrator main turn) — P130
511
+
512
+ The orchestrator MUST NOT call `AskUserQuestion` between iterations except at the framework-prescribed user-interaction halt points listed below. The loop's purpose is **progress + accumulation** — progress every ticket the agent can advance autonomously, accumulate user-answerable questions as a side-effect, and surface the accumulated batch only at a halt point. This rule applies whether the user is observably present or not, because **presence-detection is unreliable** and is not the goal — the user may answer one question and disappear for hours; the orchestrator's job is to keep advancing the backlog and stage the user-interaction surface for whenever the user actually returns. Treat the user as transient.
513
+
514
+ **Framework-prescribed halt points (the only orchestrator-main-turn surfaces where `AskUserQuestion` is permitted):**
515
+
516
+ - **Step 0 session-continuity halt** — Prior-Session State report; user routes resume / discard / leave-and-lower / halt (interactive branch only; AFK branch halts with the structured report per ADR-013 Rule 6).
517
+ - **Step 0 fetch-failure halt** — `git fetch origin` network failure; halt-with-report so the user retries on return.
518
+ - **Step 2.5 / Step 2.5b loop-end emit** — accumulated `outstanding_questions` queue presented as batched `AskUserQuestion` (or fallback Outstanding Design Questions table per ADR-013 Rule 6). This is the framework's prescribed user-interaction point; do NOT dilute it by asking earlier.
519
+ - **Step 6.5 above-appetite Rule 5 halt** — auto-apply loop exhausted without convergence; halt-with-batched-questions per the Step 2.5b cross-reference (Step 2.5b surfaces *prior-iter accumulated user-answerable skips only* — the halt-causing scorer-gap remains a halt-with-bug-signal per ADR-042 Rule 5).
520
+ - **Step 6.5 CI-failure / `release:watch` failure halt** — push:watch or release:watch failed; halt-with-batched-questions per the Step 2.5b cross-reference.
521
+ - **Step 6.75 dirty-for-unknown-reason halt** — `git status --porcelain` divergence; halt-with-batched-questions per the Step 2.5b cross-reference.
522
+
523
+ **No mid-iter ask points.** Every other point in the orchestrator's main turn (between Step 5 dispatch completing and Step 6.5 release-cadence check; between Step 6.75 verification and Step 7 loop-back; between Step 7 and Step 1 next-iteration; between consecutive iters generally) is a mechanical-stage transition that the framework has already resolved. Do NOT introduce ad-hoc `AskUserQuestion` calls at those points to confirm "is it OK to proceed?" or "want me to start the next iter?" — proceeding IS the framework-resolved default. Continue iterating until quota or stop-condition #1/#2/#3 fires.
524
+
525
+ **Accumulated-question discipline at surface time** (per ADR-044's six-class authority taxonomy — questions that reach the user must be load-bearing):
526
+
527
+ - **Direction-setting only** — questions that ONLY the user can answer because they reflect goals, intent, or trade-offs the framework has not yet captured. Other accumulated observations (deviation-approval, one-time-override, silent-framework, taste, correction-followup) follow the same shape as the deviation-candidate schema in Step 5's `outstanding_questions` contract.
528
+ - **No BUFD** — don't pre-judge architectural decisions before evidence accumulates. Small, actionable questions; not galaxy-brain ones. The deviation-candidate surface (per ADR-044's anti-BUFD-for-framework-evolution clause) is the place where iter-discovered misfits accumulate; the user resolves with full context at loop end.
529
+ - **No questions answerable by research / exploration / experimentation** — the agent should prototype, read code, run experiments to answer those itself rather than sub-contracting routine investigative work back to the user. The user is the source for genuine direction-setting decisions, not for "what does this hook do" or "which file holds X" — those are research questions the agent owns.
530
+
531
+ **Cross-references:**
532
+
533
+ - **Step 5's iteration-prompt body** carries the per-subprocess "Do not call `AskUserQuestion`" constraint; this subsection carries the orchestrator-main-turn equivalent. Together they enforce the same discipline at both the subprocess layer and the main-turn layer end-to-end.
534
+ - **ADR-044** is the parent decision narrowing ADR-013 Rule 1 to framework-unresolved decisions; this subsection is one of its load-bearing implementation surfaces.
535
+ - **ADR-013 Rule 1** (as amended by ADR-044) restricts `AskUserQuestion` to framework-unresolved decisions; the framework-prescribed halt enumeration above is the orchestrator-layer interpretation of that narrowing.
536
+ - **ADR-013 Rule 6** is the non-interactive fail-safe — when `AskUserQuestion` is unavailable (restricted permission mode, hook-disabled tool surface), the framework-prescribed halts fall back to structured-summary table emission rather than skipping the user-interaction.
537
+ - **ADR-032** subprocess-boundary contract is unchanged — this subsection is orchestrator-main-turn discipline; the iteration-subprocess dispatch shape (P084 + P121 + P086 + P089) is untouched.
508
538
 
509
539
  ## Edge Cases
510
540
 
@@ -0,0 +1,192 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # P130: /wr-itil:work-problems orchestrator must NOT call AskUserQuestion
4
+ # mid-loop. The loop's purpose is progress + accumulation; user-interaction
5
+ # is reserved for framework-prescribed halt points (Step 0, Step 2.5/2.5b,
6
+ # Step 6.5 Rule 5 + CI-failure, Step 6.75 dirty-unknown). The user is
7
+ # transient — presence-detection is unreliable and is not the goal; the
8
+ # orchestrator must accumulate questions and surface them at halt points.
9
+ #
10
+ # Per the user's reframe (ticket P130 lines 95-123), the original dual-mode
11
+ # dispatch fix-shape was rejected. The fix is SKILL.md prose discipline:
12
+ # enumerate the framework-prescribed halt points + assert no mid-iter asks
13
+ # elsewhere + state the accumulated-question discipline (direction-setting
14
+ # only; no BUFD; no questions answerable by research/exploration/
15
+ # experimentation).
16
+ #
17
+ # Doc-lint contract assertions per ADR-037 Permitted Exception (structural
18
+ # checks on prose contract, sibling shape with P126 / P135 fixtures).
19
+ #
20
+ # @problem P130
21
+ # @adr ADR-044 (Decision-Delegation Contract — framework-resolution boundary)
22
+ # @adr ADR-013 Rule 1 (as amended by ADR-044) + Rule 6 (non-interactive fail-safe)
23
+ # @adr ADR-032 (subprocess-boundary contract — unchanged)
24
+ # @adr ADR-037 (skill-testing-strategy — Permitted Exception for prose contract)
25
+ # @jtbd JTBD-006 (Progress the Backlog While I'm Away)
26
+ # @jtbd JTBD-001 (Enforce Governance Without Slowing Down)
27
+
28
+ setup() {
29
+ REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../../.." && pwd)"
30
+ SKILL_MD="$REPO_ROOT/packages/itil/skills/work-problems/SKILL.md"
31
+ }
32
+
33
+ @test "work-problems P130: SKILL.md exists" {
34
+ [ -f "$SKILL_MD" ]
35
+ }
36
+
37
+ # ── Mid-loop ask discipline subsection presence ─────────────────────────────
38
+
39
+ @test "work-problems P130: SKILL.md names a 'Mid-loop ask discipline' subsection (orchestrator main turn)" {
40
+ # The architect-approved placement is inside Non-Interactive Decision
41
+ # Making section as a subsection. The heading must exist as a navigable
42
+ # markdown anchor so cross-references resolve.
43
+ run grep -nE '^#{3,4} Mid-loop ask discipline' "$SKILL_MD"
44
+ [ "$status" -eq 0 ]
45
+ }
46
+
47
+ @test "work-problems P130: subsection scopes itself to the orchestrator main turn (not the subprocess)" {
48
+ # The subprocess-layer constraint (Step 5's iteration-prompt body)
49
+ # already exists; this subsection is the orchestrator-main-turn
50
+ # equivalent. The scope must be explicit so future readers do not
51
+ # confuse the layers.
52
+ run grep -nE 'orchestrator main turn|orchestrator.s main turn' "$SKILL_MD"
53
+ [ "$status" -eq 0 ]
54
+ }
55
+
56
+ # ── Core invariant: no mid-iter AskUserQuestion ─────────────────────────────
57
+
58
+ @test "work-problems P130: subsection asserts no mid-loop AskUserQuestion between iters except at framework halts" {
59
+ # The load-bearing rule. The orchestrator must NOT call AskUserQuestion
60
+ # between iterations EXCEPT at the framework-prescribed halt points.
61
+ run grep -nE 'MUST NOT call .?AskUserQuestion.? between iter' "$SKILL_MD"
62
+ [ "$status" -eq 0 ]
63
+ }
64
+
65
+ @test "work-problems P130: subsection cites the loop's purpose as progress + accumulation" {
66
+ # The reframed direction (ticket lines 95-123): the loop's purpose is
67
+ # progress + accumulation, not interactive-vs-AFK routing. This phrasing
68
+ # must appear so future authors do not re-introduce mid-loop asks under
69
+ # 'the user might be present' rationalisations.
70
+ run grep -nE 'progress \+ accumulation|progress and accumulation' "$SKILL_MD"
71
+ [ "$status" -eq 0 ]
72
+ }
73
+
74
+ @test "work-problems P130: subsection states presence-detection is unreliable and not the goal" {
75
+ # The transient-user framing. Without this, future authors may try to
76
+ # add presence-detection (the originally-rejected fix-shape).
77
+ run grep -nE 'presence[- ]detection is unreliable|presence is unreliable|user as transient|treat the user as transient' "$SKILL_MD"
78
+ [ "$status" -eq 0 ]
79
+ }
80
+
81
+ # ── Halt-point enumeration ──────────────────────────────────────────────────
82
+
83
+ @test "work-problems P130: subsection enumerates Step 0 session-continuity as a permitted halt point" {
84
+ run grep -nE 'Step 0.*session[- ]continuity' "$SKILL_MD"
85
+ [ "$status" -eq 0 ]
86
+ }
87
+
88
+ @test "work-problems P130: subsection enumerates Step 2.5 / 2.5b loop-end emit as a permitted halt point" {
89
+ run grep -nE 'Step 2\.5.*loop[- ]end|Step 2\.5b' "$SKILL_MD"
90
+ [ "$status" -eq 0 ]
91
+ }
92
+
93
+ @test "work-problems P130: subsection enumerates Step 6.5 above-appetite Rule 5 halt as a permitted halt point" {
94
+ run grep -nE 'Step 6\.5.*Rule 5|above[- ]appetite Rule 5' "$SKILL_MD"
95
+ [ "$status" -eq 0 ]
96
+ }
97
+
98
+ @test "work-problems P130: subsection enumerates Step 6.75 dirty-for-unknown-reason as a permitted halt point" {
99
+ run grep -nE 'Step 6\.75.*dirty[- ]for[- ]unknown|Step 6\.75.*unknown reason' "$SKILL_MD"
100
+ [ "$status" -eq 0 ]
101
+ }
102
+
103
+ # ── Accumulated-question discipline at surface time ─────────────────────────
104
+
105
+ @test "work-problems P130: subsection states direction-setting only at surface time" {
106
+ # ADR-044's six-class taxonomy: only the user-answerable categories
107
+ # qualify. Direction-setting is the canonical example.
108
+ run grep -nE 'Direction[- ]setting only|direction[- ]setting only' "$SKILL_MD"
109
+ [ "$status" -eq 0 ]
110
+ }
111
+
112
+ @test "work-problems P130: subsection forbids BUFD-style questions" {
113
+ # No big-design-up-front; small actionable questions. Anti-BUFD is a
114
+ # project-wide value (cited in ADR-044's anti-BUFD-for-framework-
115
+ # evolution clause).
116
+ run grep -nE 'No BUFD|no BUFD' "$SKILL_MD"
117
+ [ "$status" -eq 0 ]
118
+ }
119
+
120
+ @test "work-problems P130: subsection forbids questions answerable by research / exploration / experimentation" {
121
+ # The agent should investigate itself rather than sub-contract routine
122
+ # work back to the user.
123
+ run grep -nE 'research.*exploration.*experimentation|prototype.*read code.*experiments' "$SKILL_MD"
124
+ [ "$status" -eq 0 ]
125
+ }
126
+
127
+ # ── ADR citations ──────────────────────────────────────────────────────────
128
+
129
+ @test "work-problems P130: subsection cites ADR-044 (primary) as the framework-resolution authority" {
130
+ # ADR-044 is the parent decision narrowing ADR-013 Rule 1 to framework-
131
+ # unresolved decisions. The subsection must cite it so future readers
132
+ # follow the cross-reference for the full picture.
133
+ run grep -nE 'ADR-044' "$SKILL_MD"
134
+ [ "$status" -eq 0 ]
135
+ }
136
+
137
+ @test "work-problems P130: subsection cites ADR-013 Rule 1 as amended by ADR-044" {
138
+ # The narrowing happens at ADR-013 Rule 1's amendment. The subsection
139
+ # must name the rule so the chain is traceable.
140
+ run grep -nE 'ADR-013.*Rule 1|Rule 1.*amended by ADR-044' "$SKILL_MD"
141
+ [ "$status" -eq 0 ]
142
+ }
143
+
144
+ @test "work-problems P130: subsection cites ADR-013 Rule 6 as the non-interactive fail-safe underlying constraint" {
145
+ # When AFK, Rule 6 is the underlying constraint that forbids
146
+ # AskUserQuestion. The subsection cites it so the subprocess-boundary
147
+ # connection is explicit.
148
+ run grep -nE 'ADR-013.*Rule 6|Rule 6.*non[- ]interactive fail[- ]safe' "$SKILL_MD"
149
+ [ "$status" -eq 0 ]
150
+ }
151
+
152
+ # ── ADR-032 subprocess-boundary unchanged ──────────────────────────────────
153
+
154
+ @test "work-problems P130: subsection cross-references ADR-032 subprocess-boundary contract as unchanged" {
155
+ # The architect's flag: this fix does NOT amend ADR-032. The subsection
156
+ # must say so explicitly to prevent future readers thinking the
157
+ # subprocess-boundary contract is in scope.
158
+ run grep -nE 'subprocess[- ]boundary contract.*unchanged|ADR-032.*unchanged' "$SKILL_MD"
159
+ [ "$status" -eq 0 ]
160
+ }
161
+
162
+ # ── Subprocess vs orchestrator layer cross-reference ───────────────────────
163
+
164
+ @test "work-problems P130: subsection cross-references Step 5's per-subprocess constraint as the parallel layer" {
165
+ # The orchestrator-main-turn discipline parallels Step 5's iteration-
166
+ # prompt-body 'Do not call AskUserQuestion' constraint. Both layers
167
+ # together enforce the discipline end-to-end.
168
+ run grep -nE "Step 5.*iteration[- ]prompt.*AskUserQuestion|Step 5's iteration[- ]prompt" "$SKILL_MD"
169
+ [ "$status" -eq 0 ]
170
+ }
171
+
172
+ # ── Step 5's iteration-prompt body augmented with transient-user framing ───
173
+
174
+ @test "work-problems P130: Step 5 iteration-prompt body includes transient-user framing" {
175
+ # The reframed direction's core insight: the user is transient even
176
+ # when present. The iteration-prompt body (already-existing
177
+ # 'NEVER call AskUserQuestion mid-loop in AFK') gains the transient
178
+ # framing so future iter authors understand why the constraint is
179
+ # absolute.
180
+ run grep -nE 'transient|disappear for hours' "$SKILL_MD"
181
+ [ "$status" -eq 0 ]
182
+ }
183
+
184
+ # ── Decision Table row reflects the discipline ─────────────────────────────
185
+
186
+ @test "work-problems P130: Non-Interactive Decision Making table carries a row for mid-loop ask discipline" {
187
+ # The decisions table (line ~487) summarises the orchestrator's
188
+ # non-interactive defaults. A row naming mid-loop ask discipline keeps
189
+ # the table consistent with the new subsection.
190
+ run grep -nE '\| Mid-loop ask|mid[- ]loop AskUserQuestion|mid-loop ask between iter' "$SKILL_MD"
191
+ [ "$status" -eq 0 ]
192
+ }