@windyroad/itil 0.30.3-preview.319 → 0.30.4-preview.321

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  {
2
2
  "name": "wr-itil",
3
- "version": "0.30.3",
3
+ "version": "0.30.4",
4
4
  "description": "ITIL-aligned IT service management for Claude Code"
5
5
  }
package/hooks/hooks.json CHANGED
@@ -49,6 +49,10 @@
49
49
  {
50
50
  "matcher": "Bash",
51
51
  "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/itil-rfc-trailer-advisory.sh" }]
52
+ },
53
+ {
54
+ "matcher": "Write|Edit|MultiEdit",
55
+ "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/itil-fictional-defer-detect.sh" }]
52
56
  }
53
57
  ],
54
58
  "Stop": [
@@ -0,0 +1,175 @@
1
+ #!/bin/bash
2
+ # P234 Phase 1 — wr-itil PostToolUse:Write|Edit hook.
3
+ #
4
+ # Detects "fictional defer" rationales in `docs/retros/*.md` writes —
5
+ # defer-rationale phrases (`next retro`, `next session`, `defer
6
+ # pending`, `defer with cause:`, `deferred per`) that lack a
7
+ # SCHEDULED-FUTURE-SURFACE citation in the surrounding +/-5-line
8
+ # window. The regression class P234 captures (2026-05-17 session 3
9
+ # retro: 3 MUST_SPLIT files deferred with "cascade case: archive-of-
10
+ # archive tier design needed" rationale; user correction "Don't defer"
11
+ # revealed the cascade was mechanical, not a design barrier).
12
+ #
13
+ # Detection signal (per ticket Investigation Task 2 two-axis test):
14
+ # 1. tool_name is Write OR Edit OR MultiEdit AND file_path matches
15
+ # `docs/retros/*.md`.
16
+ # 2. Written file contains a defer-rationale phrase (case-insensitive).
17
+ # 3. Within +/-5 lines of the match there is NO citation of a
18
+ # SCHEDULED-FUTURE-SURFACE — concretely any of:
19
+ # * Ticket ID: P\d{3} / STORY-\d{3} / R\d{3} / RFC-\d{3}
20
+ # * Skill: /wr-[a-z-]+:[a-z-]+
21
+ # * Hook/script: \.sh\b (path component or filename)
22
+ # * CI workflow: \.github/workflows/
23
+ # * Dated ADR: ADR-\d{3} + \d{4}-\d{2}-\d{2} both present
24
+ # 4. Match line is NOT on the exception allowlist
25
+ # (e.g. `deferred per Branch B` — Branch B's next-retro
26
+ # check-briefing-budgets.sh trigger IS the scheduled surface).
27
+ #
28
+ # When all four hold, the hook emits a stderr advisory citing P234 +
29
+ # the SCHEDULED-FUTURE-SURFACE definition + remediation pattern
30
+ # (cite a surface OR execute the deferred work now). The advisory
31
+ # names the file path, line number, and detected phrase so the next
32
+ # assistant turn has enough context to self-correct.
33
+ #
34
+ # Advisory only — NEVER blocks. Per ADR-013 Rule 6 fail-safe + ADR-045
35
+ # honour-system budget (target ~600 bytes; hard ceiling 1000). Mirrors
36
+ # the itil-rfc-trailer-advisory.sh PostToolUse precedent (stderr +
37
+ # exit 0) and the itil-mid-loop-ask-detect.sh per-surface configuration
38
+ # pattern (DEFER_RATIONALE_RE / SCHEDULED_FUTURE_SURFACE_RE /
39
+ # EXEMPT_PHRASES at the top so the hook is copy-and-retarget extensible).
40
+ #
41
+ # References:
42
+ # P234 — this hook (Phase 1 structural enforcement).
43
+ # P148 — Tickets Deferred section misuse; same class, different
44
+ # surface (advisory script not hook).
45
+ # P132 — over-ask class (inverse-correctness axis of P234 under-do);
46
+ # Phase 2b hook itil-mid-loop-ask-detect.sh is the canonical
47
+ # advisory-shape template.
48
+ # ADR-013 — Rule 6 fail-open on missing inputs / parse errors.
49
+ # ADR-014 — single-commit grain (this hook never auto-fixes).
50
+ # ADR-040 — declarative-first; advisory-only over hard block.
51
+ # ADR-044 — framework-resolution boundary; named in advisory.
52
+ # ADR-045 — hook injection budget; honour-system <1000 hard ceiling.
53
+ # ADR-052 — behavioural-tests default; bats live alongside.
54
+ # ADR-057 — three-phase declarative-first cluster rollout
55
+ # (Phase 2 advisory-second slot).
56
+
57
+ # Per-surface configuration. Extending coverage to other accumulator-
58
+ # doc surfaces (briefing topic files, decision logs, capture skill
59
+ # outputs) is a copy-and-retarget operation — adjust PATH_GLOB +
60
+ # the three regex vars below.
61
+ PATH_GLOB_RE='docs/retros/.*\.md$'
62
+ DEFER_RATIONALE_RE='next retro|next session|defer pending|deferred pending|defer with cause|deferred with cause|deferred per'
63
+ TICKET_ID_RE='\b(P[0-9]{3}|STORY-[0-9]{3}|R[0-9]{3}|RFC-[0-9]{3})\b'
64
+ SKILL_INVOCATION_RE='/wr-[a-z-]+:[a-z-]+'
65
+ HOOK_PATH_RE='[A-Za-z0-9_./-]+\.sh\b'
66
+ CI_WORKFLOW_RE='\.github/workflows/'
67
+ ADR_REF_RE='ADR-[0-9]{3}'
68
+ DATE_RE='[0-9]{4}-[0-9]{2}-[0-9]{2}'
69
+ EXEMPT_PHRASES_RE='deferred per Branch B'
70
+
71
+ INPUT=$(cat 2>/dev/null || true)
72
+
73
+ # Fail-open on empty/malformed stdin.
74
+ [ -n "$INPUT" ] || exit 0
75
+
76
+ # Parse tool_name + tool_input.file_path via python3 (sibling precedent
77
+ # itil-rfc-trailer-advisory.sh). Fail-open on parse error.
78
+ TOOL_NAME=$(echo "$INPUT" | python3 -c "
79
+ import sys, json
80
+ try:
81
+ data = json.load(sys.stdin)
82
+ print(data.get('tool_name', ''))
83
+ except Exception:
84
+ print('')
85
+ " 2>/dev/null || echo "")
86
+
87
+ case "$TOOL_NAME" in
88
+ Write|Edit|MultiEdit) ;;
89
+ *) exit 0 ;;
90
+ esac
91
+
92
+ FILE_PATH=$(echo "$INPUT" | python3 -c "
93
+ import sys, json
94
+ try:
95
+ data = json.load(sys.stdin)
96
+ print(data.get('tool_input', {}).get('file_path', ''))
97
+ except Exception:
98
+ print('')
99
+ " 2>/dev/null || echo "")
100
+
101
+ # Short-circuit: no file_path → silent.
102
+ [ -n "$FILE_PATH" ] || exit 0
103
+
104
+ # Short-circuit: path doesn't match retro glob → silent.
105
+ if ! echo "$FILE_PATH" | grep -qE "$PATH_GLOB_RE"; then
106
+ exit 0
107
+ fi
108
+
109
+ # Short-circuit: file doesn't exist on disk (could be a pre-PostToolUse
110
+ # Write that hasn't materialised yet, or a path the hook can't reach) →
111
+ # silent.
112
+ [ -f "$FILE_PATH" ] || exit 0
113
+
114
+ # Scan for defer-rationale matches. grep -nE produces `lineno:content`.
115
+ MATCHES=$(grep -inE "$DEFER_RATIONALE_RE" "$FILE_PATH" 2>/dev/null || true)
116
+ [ -n "$MATCHES" ] || exit 0
117
+
118
+ # For each match, check the +/-5 line window for a SCHEDULED-FUTURE-
119
+ # SURFACE citation. Accumulate fictional-defer findings; the first
120
+ # fictional finding triggers the advisory (one advisory per write,
121
+ # even if multiple defers fail — keeps the advisory dense).
122
+ TOTAL_LINES=$(wc -l < "$FILE_PATH" | tr -d ' ')
123
+
124
+ FICTIONAL_FOUND=""
125
+ FICTIONAL_LINE=""
126
+ FICTIONAL_PHRASE=""
127
+
128
+ while IFS= read -r match_row; do
129
+ [ -n "$match_row" ] || continue
130
+ LN="${match_row%%:*}"
131
+ match_text="${match_row#*:}"
132
+
133
+ # Skip exception-allowlisted phrases (e.g. `deferred per Branch B`).
134
+ if echo "$match_text" | grep -qiE "$EXEMPT_PHRASES_RE"; then
135
+ continue
136
+ fi
137
+
138
+ # Compute window [LN-5, LN+5] clamped to file bounds.
139
+ START=$((LN - 5))
140
+ [ "$START" -lt 1 ] && START=1
141
+ END=$((LN + 5))
142
+ [ "$END" -gt "$TOTAL_LINES" ] && END="$TOTAL_LINES"
143
+
144
+ WINDOW=$(sed -n "${START},${END}p" "$FILE_PATH" 2>/dev/null || true)
145
+
146
+ # Check for any SCHEDULED-FUTURE-SURFACE citation in the window.
147
+ if echo "$WINDOW" | grep -qE "$TICKET_ID_RE"; then continue; fi
148
+ if echo "$WINDOW" | grep -qE "$SKILL_INVOCATION_RE"; then continue; fi
149
+ if echo "$WINDOW" | grep -qE "$HOOK_PATH_RE"; then continue; fi
150
+ if echo "$WINDOW" | grep -qE "$CI_WORKFLOW_RE"; then continue; fi
151
+ # Dated ADR requires BOTH an ADR-NNN ref AND a date in the window.
152
+ if echo "$WINDOW" | grep -qE "$ADR_REF_RE" \
153
+ && echo "$WINDOW" | grep -qE "$DATE_RE"; then
154
+ continue
155
+ fi
156
+
157
+ # No surface citation found — this is a fictional defer. Record the
158
+ # first one (advisory carries one example; remediation pattern
159
+ # generalises).
160
+ FICTIONAL_FOUND="yes"
161
+ FICTIONAL_LINE="$LN"
162
+ # Compact + truncate the matched phrase for the advisory body.
163
+ FICTIONAL_PHRASE=$(echo "$match_text" | tr -s ' ' ' ' | sed 's/^[[:space:]]*//' | cut -c1-80)
164
+ break
165
+ done <<< "$MATCHES"
166
+
167
+ # No fictional defers → silent.
168
+ [ -n "$FICTIONAL_FOUND" ] || exit 0
169
+
170
+ # Emit advisory to stderr (PostToolUse precedent matches
171
+ # itil-rfc-trailer-advisory.sh). Always exit 0 — advisory, never block.
172
+ # Voice-tone target ~600 bytes; ADR-045 honour-system ceiling <1000.
173
+ echo "P234 ADVISORY: fictional defer detected in ${FILE_PATH}:${FICTIONAL_LINE} — phrase: \"${FICTIONAL_PHRASE}\". No SCHEDULED-FUTURE-SURFACE cited within +/-5 lines. Per ADR-044 framework-resolution boundary, cite a concrete surface (ticket ID Pnnn, named skill /wr-foo:bar, hook path *.sh, CI workflow .github/workflows/, or dated ADR-nnn YYYY-MM-DD) OR execute the deferred work in this session. See P234." >&2
174
+
175
+ exit 0
@@ -0,0 +1,292 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # P234 Phase 1: itil-fictional-defer-detect.sh PostToolUse:Write|Edit
4
+ # hook detects "fictional defer" rationales in `docs/retros/*.md` writes
5
+ # — defer-rationale phrases (`next retro`, `next session`, `defer
6
+ # pending`, `defer with cause:`, `deferred per`) that lack a
7
+ # SCHEDULED-FUTURE-SURFACE citation in surrounding context.
8
+ #
9
+ # Detection signal (per ticket Investigation Task 2 two-axis test):
10
+ # 1. tool_name is Write OR Edit AND file_path matches docs/retros/*.md
11
+ # 2. Written file contains a defer-rationale phrase (case-insensitive)
12
+ # 3. Within +/-5 lines of the match there is NO citation of a
13
+ # SCHEDULED-FUTURE-SURFACE (ticket ID P\d{3} / STORY-\d{3} / R\d{3},
14
+ # skill invocation /wr-[a-z-]+:[a-z-]+, hook script path .sh, CI
15
+ # workflow path .github/workflows/, dated ADR ADR-\d{3} ... YYYY-MM-DD)
16
+ # 4. Match is NOT on the exception allowlist (e.g. `deferred per Branch B`).
17
+ #
18
+ # When all four hold, the hook emits a stderr advisory citing P234 + the
19
+ # SCHEDULED-FUTURE-SURFACE definition. Advisory only — never blocks
20
+ # (exit 0 always). Mirrors the itil-rfc-trailer-advisory.sh PostToolUse
21
+ # precedent (stderr + exit 0) and the itil-mid-loop-ask-detect.sh
22
+ # detection-pattern precedent (per-surface configuration at top).
23
+ #
24
+ # Per ADR-005 / ADR-052 — bats live under packages/<plugin>/hooks/test/
25
+ # and assert on emitted stderr text, not source-content. Per P081 — no
26
+ # source-grep on hook text.
27
+
28
+ setup() {
29
+ REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../.." && pwd)"
30
+ HOOK="$REPO_ROOT/packages/itil/hooks/itil-fictional-defer-detect.sh"
31
+ TMPDIR_="$(mktemp -d)"
32
+ RETRO_DIR="$TMPDIR_/docs/retros"
33
+ mkdir -p "$RETRO_DIR"
34
+ RETRO_FILE="$RETRO_DIR/2026-05-17-session-3.md"
35
+ }
36
+
37
+ teardown() {
38
+ rm -rf "$TMPDIR_"
39
+ }
40
+
41
+ # Helper: emit PostToolUse stdin payload for a Write tool call.
42
+ emit_write_payload() {
43
+ local file_path="$1"
44
+ jq -n --arg p "$file_path" '{
45
+ session_id: "fictional-defer-test",
46
+ tool_name: "Write",
47
+ tool_input: { file_path: $p, content: "(content already on disk)" },
48
+ tool_response: { success: true }
49
+ }'
50
+ }
51
+
52
+ # Helper: emit PostToolUse stdin payload for an Edit tool call.
53
+ emit_edit_payload() {
54
+ local file_path="$1"
55
+ jq -n --arg p "$file_path" '{
56
+ session_id: "fictional-defer-test",
57
+ tool_name: "Edit",
58
+ tool_input: { file_path: $p, old_string: "x", new_string: "y" },
59
+ tool_response: { success: true }
60
+ }'
61
+ }
62
+
63
+ run_hook_with_write() {
64
+ emit_write_payload "$RETRO_FILE" | bash "$HOOK"
65
+ }
66
+
67
+ run_hook_with_edit() {
68
+ emit_edit_payload "$RETRO_FILE" | bash "$HOOK"
69
+ }
70
+
71
+ # --- Positive detection: fictional defer ---
72
+
73
+ @test "detect: defer-to-next-retro with no scheduled-future-surface emits advisory" {
74
+ # Faithful reproduction of the P234 worked-example fictional-defer
75
+ # class — the defer-rationale prose carries no SCHEDULED-FUTURE-
76
+ # SURFACE citation; no ticket ID, no skill invocation, no dated ADR
77
+ # appears in the +/-5 line window around the defer phrase.
78
+ cat > "$RETRO_FILE" <<'EOF'
79
+ # Session 3 Retro
80
+
81
+ ## Signal-vs-Noise Pass
82
+
83
+ Deferred this retro per session-length constraint (16+ briefing
84
+ entries across 13 topic files would require ~30 min of per-entry
85
+ scoring). Next retro should run a full pass.
86
+ EOF
87
+ run run_hook_with_write
88
+ [ "$status" -eq 0 ]
89
+ [[ "$stderr" == *"P234"* ]] || [[ "$output" == *"P234"* ]]
90
+ }
91
+
92
+ @test "detect: deferred-pending-design-judgement with no scheduled-future-surface emits advisory" {
93
+ cat > "$RETRO_FILE" <<'EOF'
94
+ # Session 4 Retro
95
+
96
+ ## Topic File Rotation Candidates
97
+
98
+ | File | Action |
99
+ |------|--------|
100
+ | governance-workflow.md | deferred pending design judgement (cascade case) |
101
+ | hooks-and-gates.md | deferred pending complexity review |
102
+ EOF
103
+ run run_hook_with_write
104
+ [ "$status" -eq 0 ]
105
+ [[ "$stderr" == *"P234"* ]] || [[ "$output" == *"P234"* ]]
106
+ }
107
+
108
+ @test "detect: defer-with-cause-context-budget with no surface emits advisory" {
109
+ cat > "$RETRO_FILE" <<'EOF'
110
+ # Session 5 Retro
111
+
112
+ ## Codification Candidates
113
+
114
+ Deferred with cause: context budget pressure. Next session should
115
+ revisit when fresh context is available.
116
+ EOF
117
+ run run_hook_with_edit
118
+ [ "$status" -eq 0 ]
119
+ [[ "$stderr" == *"P234"* ]] || [[ "$output" == *"P234"* ]]
120
+ }
121
+
122
+ # --- Negative paths: legitimate citations (silent exit) ---
123
+
124
+ @test "allow: defer citing P-ticket within +/-5 lines exits silent" {
125
+ cat > "$RETRO_FILE" <<'EOF'
126
+ # Session 3 Retro
127
+
128
+ ## Signal-vs-Noise Pass
129
+
130
+ Deferred per [[P235]] (briefing SVN backlog: 146 entries across 17
131
+ topic files). Next retro will surface P235 if it has been promoted
132
+ to actionable.
133
+ EOF
134
+ run run_hook_with_write
135
+ [ "$status" -eq 0 ]
136
+ [[ "$stderr" != *"P234"* ]]
137
+ [[ "$output" != *"P234"* ]]
138
+ }
139
+
140
+ @test "allow: defer citing skill invocation within +/-5 lines exits silent" {
141
+ cat > "$RETRO_FILE" <<'EOF'
142
+ # Session 4 Retro
143
+
144
+ ## Tickets Deferred
145
+
146
+ Deferred pending /wr-itil:work-problems Step 6.5 above-appetite
147
+ release-loop check.
148
+ EOF
149
+ run run_hook_with_edit
150
+ [ "$status" -eq 0 ]
151
+ [[ "$stderr" != *"P234"* ]]
152
+ [[ "$output" != *"P234"* ]]
153
+ }
154
+
155
+ @test "allow: defer citing hook script path within +/-5 lines exits silent" {
156
+ cat > "$RETRO_FILE" <<'EOF'
157
+ # Session 5 Retro
158
+
159
+ ## Codification Candidates
160
+
161
+ Defer pending packages/itil/hooks/itil-fictional-defer-detect.sh
162
+ extension to also cover the assistant-output review channel.
163
+ EOF
164
+ run run_hook_with_write
165
+ [ "$status" -eq 0 ]
166
+ [[ "$stderr" != *"P234"* ]]
167
+ [[ "$output" != *"P234"* ]]
168
+ }
169
+
170
+ @test "allow: defer citing dated ADR within +/-5 lines exits silent" {
171
+ cat > "$RETRO_FILE" <<'EOF'
172
+ # Session 6 Retro
173
+
174
+ ## Codification Candidates
175
+
176
+ Deferred pending ADR-044 confirmation criterion 3 graduation
177
+ (2026-05-25). Reassess after the criterion lands.
178
+ EOF
179
+ run run_hook_with_write
180
+ [ "$status" -eq 0 ]
181
+ [[ "$stderr" != *"P234"* ]]
182
+ [[ "$output" != *"P234"* ]]
183
+ }
184
+
185
+ # --- Exception allowlist ---
186
+
187
+ @test "allow: deferred-per-Branch-B allowlist phrase exits silent" {
188
+ cat > "$RETRO_FILE" <<'EOF'
189
+ # Session 3 Retro
190
+
191
+ ## Topic File Rotation Candidates
192
+
193
+ | File | Action |
194
+ |------|--------|
195
+ | governance-workflow.md (ratio 1.5x) | leave-as-is — deferred per Branch B |
196
+ | hooks-and-gates.md (ratio 1.3x) | leave-as-is — deferred per Branch B |
197
+ EOF
198
+ run run_hook_with_write
199
+ [ "$status" -eq 0 ]
200
+ [[ "$stderr" != *"P234"* ]]
201
+ [[ "$output" != *"P234"* ]]
202
+ }
203
+
204
+ # --- Path / tool short-circuits ---
205
+
206
+ @test "allow: tool_name != Write/Edit exits silent" {
207
+ cat > "$RETRO_FILE" <<'EOF'
208
+ Deferred this retro per session-length constraint. Next retro should run.
209
+ EOF
210
+ payload=$(jq -n --arg p "$RETRO_FILE" '{
211
+ session_id: "x",
212
+ tool_name: "Bash",
213
+ tool_input: { command: "ls" },
214
+ tool_response: { stdout: "" }
215
+ }')
216
+ run bash -c "echo '$payload' | bash '$HOOK'"
217
+ [ "$status" -eq 0 ]
218
+ [[ "$stderr" != *"P234"* ]]
219
+ [[ "$output" != *"P234"* ]]
220
+ }
221
+
222
+ @test "allow: file_path outside docs/retros/ exits silent" {
223
+ OTHER="$TMPDIR_/docs/problems/foo.md"
224
+ mkdir -p "$(dirname "$OTHER")"
225
+ cat > "$OTHER" <<'EOF'
226
+ Deferred this retro per session-length constraint. Next retro should run a full pass.
227
+ EOF
228
+ payload=$(jq -n --arg p "$OTHER" '{
229
+ session_id: "x",
230
+ tool_name: "Write",
231
+ tool_input: { file_path: $p, content: "" },
232
+ tool_response: { success: true }
233
+ }')
234
+ run bash -c "echo '$payload' | bash '$HOOK'"
235
+ [ "$status" -eq 0 ]
236
+ [[ "$stderr" != *"P234"* ]]
237
+ [[ "$output" != *"P234"* ]]
238
+ }
239
+
240
+ @test "allow: missing file_path exits silent" {
241
+ payload=$(jq -n '{
242
+ session_id: "x",
243
+ tool_name: "Write",
244
+ tool_input: {},
245
+ tool_response: { success: true }
246
+ }')
247
+ run bash -c "echo '$payload' | bash '$HOOK'"
248
+ [ "$status" -eq 0 ]
249
+ [[ "$stderr" != *"P234"* ]]
250
+ [[ "$output" != *"P234"* ]]
251
+ }
252
+
253
+ # --- Crash safety ---
254
+
255
+ @test "allow: malformed JSON input does not crash the hook" {
256
+ run bash -c "echo 'not-json' | bash '$HOOK'"
257
+ [ "$status" -eq 0 ]
258
+ # Either silent OR a single advisory — but never a non-zero exit.
259
+ }
260
+
261
+ @test "allow: non-existent retro file exits silent" {
262
+ GHOST="$TMPDIR_/docs/retros/does-not-exist.md"
263
+ payload=$(jq -n --arg p "$GHOST" '{
264
+ session_id: "x",
265
+ tool_name: "Write",
266
+ tool_input: { file_path: $p, content: "" },
267
+ tool_response: { success: true }
268
+ }')
269
+ run bash -c "echo '$payload' | bash '$HOOK'"
270
+ [ "$status" -eq 0 ]
271
+ [[ "$stderr" != *"P234"* ]]
272
+ [[ "$output" != *"P234"* ]]
273
+ }
274
+
275
+ # --- Advisory budget per ADR-045 ---
276
+
277
+ @test "advisory output stays under ADR-045 1000-byte honour-system ceiling" {
278
+ cat > "$RETRO_FILE" <<'EOF'
279
+ # Session 3 Retro
280
+
281
+ ## Signal-vs-Noise Pass
282
+
283
+ Deferred this retro per session-length constraint. Next retro
284
+ should run a full pass.
285
+ EOF
286
+ emit_write_payload "$RETRO_FILE" > "$TMPDIR_/payload.json"
287
+ # Capture combined stdout+stderr; advisory channel is stderr per
288
+ # PostToolUse precedent (itil-rfc-trailer-advisory.sh).
289
+ combined=$(bash "$HOOK" < "$TMPDIR_/payload.json" 2>&1)
290
+ [ -n "$combined" ]
291
+ [ "${#combined}" -lt 1000 ]
292
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@windyroad/itil",
3
- "version": "0.30.3-preview.319",
3
+ "version": "0.30.4-preview.321",
4
4
  "description": "ITIL-aligned IT service management for Claude Code (problem, and future incident/change skills)",
5
5
  "bin": {
6
6
  "windyroad-itil": "./bin/install.mjs"
@@ -127,6 +127,82 @@ cutoff = now - window_days * 86400
127
127
  # `itil`, not `itil-reconcile`.
128
128
  BIN_RE = re.compile(r"\bwr-([a-z0-9]+)-[a-z0-9-]+")
129
129
 
130
+ # Phase 2e (P087) byte-seek bisect — find the earliest byte offset whose
131
+ # line carries a timestamp >= cutoff, then linear-scan from there. Files
132
+ # below the threshold linear-scan from byte 0 (bisect overhead is not
133
+ # worth it; the ratio of bisect-seeks to in-window lines flips around
134
+ # this size on warm-cache developer laptops per Phase 2c profile data).
135
+ # JSONL append-only monotonicity is the input invariant — pinned in
136
+ # ADR-058 §Performance contract Phase 2e amendment. Non-monotonic input
137
+ # under-counts gracefully (bisect locates by byte position, not by
138
+ # content scan) without crashing or emitting malformed NDJSON; pinned
139
+ # by the "non-monotonic timestamps — graceful degradation" bats fixture.
140
+ BINARY_SEARCH_THRESHOLD = 256 * 1024 # bytes
141
+ # Whitespace-tolerant: matches both compact `"timestamp":"..."` and
142
+ # pretty `"timestamp": "..."` JSON shapes. The cheap-probe nature of the
143
+ # bisect means the regex stays in bytes and skips json.loads on the
144
+ # probe line entirely.
145
+ TS_RE = re.compile(rb'"timestamp"\s*:\s*"([^"]+)"')
146
+
147
+
148
+ def _parse_iso_ts(b):
149
+ """Parse ISO timestamp bytes → epoch seconds, or None on parse failure."""
150
+ try:
151
+ s = b.decode("ascii", errors="replace")
152
+ return datetime.fromisoformat(s.replace("Z", "+00:00")).timestamp()
153
+ except Exception:
154
+ return None
155
+
156
+
157
+ def find_first_in_window_offset(fh, file_size, cutoff_epoch):
158
+ """Bisect byte offset of earliest line whose timestamp >= cutoff_epoch.
159
+
160
+ Returns 0 when every readable line is in-window, or `file_size` when
161
+ no in-window line is found (caller skips the file). Falls back
162
+ conservatively to the lo-bound on any per-line parse failure — the
163
+ canonical correctness invariant is "never miss an in-window line",
164
+ not "always converge to the tightest cutoff".
165
+
166
+ Termination: the boundary-aligning `readline()` always advances past
167
+ `mid` (the probed line starts strictly after `mid` when `mid != 0`).
168
+ On the in-window branch we tighten `hi = mid` rather than `hi = pos`
169
+ — the latter equals `hi` itself on line-aligned probes and stalls the
170
+ bisect. `best` records the actual byte position so the returned
171
+ offset is the discovered in-window line, even though `hi` shrinks
172
+ by `mid` to guarantee monotonic narrowing.
173
+ """
174
+ lo, hi = 0, file_size
175
+ best = file_size # default: no in-window line discovered
176
+ while lo < hi:
177
+ mid = (lo + hi) // 2
178
+ fh.seek(mid)
179
+ if mid != 0:
180
+ fh.readline() # discard partial line to align to boundary
181
+ pos = fh.tell()
182
+ if pos >= file_size:
183
+ hi = mid
184
+ continue
185
+ line = fh.readline()
186
+ if not line:
187
+ hi = mid
188
+ continue
189
+ m = TS_RE.search(line)
190
+ if not m:
191
+ # Unparseable timestamp on the probe line — back off to mid
192
+ # half. The next bisect step lands on a different probe.
193
+ hi = mid
194
+ continue
195
+ ts = _parse_iso_ts(m.group(1))
196
+ if ts is None:
197
+ hi = mid
198
+ continue
199
+ if ts < cutoff_epoch:
200
+ lo = pos + len(line)
201
+ else:
202
+ best = pos
203
+ hi = mid # tighten to mid, not pos — guarantees progress
204
+ return best
205
+
130
206
  def plugin_from_skill(name):
131
207
  """`wr-itil:manage-problem` -> `itil`. Non-wr-prefixed or short-form
132
208
  names like `commit`, `loop` return None (excluded from per-plugin
@@ -164,12 +240,47 @@ for jsonl in jsonl_iter:
164
240
  # File hasn't been touched in the window; skip without parsing.
165
241
  continue
166
242
  try:
167
- fh = jsonl.open("r", encoding="utf-8", errors="replace")
243
+ fh = jsonl.open("rb")
168
244
  except OSError:
169
245
  continue
170
246
  with fh:
171
- for line in fh:
247
+ # Phase 2e (P087) byte-seek bisect — for files at or above the
248
+ # threshold, locate the first line whose timestamp falls within
249
+ # the cutoff window and start the linear scan from there. Files
250
+ # below the threshold scan linearly from byte 0 (the bisect
251
+ # overhead exceeds the savings on small files). The bisect
252
+ # presumes JSONL append-only monotonic timestamps within a single
253
+ # session file — pinned as an input invariant in ADR-058
254
+ # §Performance Phase 2e amendment; non-monotonic input degrades
255
+ # gracefully via under-count, pinned by the bats "non-monotonic"
256
+ # fixture.
257
+ if st.st_size >= BINARY_SEARCH_THRESHOLD:
258
+ start_offset = find_first_in_window_offset(fh, st.st_size, cutoff)
259
+ if start_offset >= st.st_size:
260
+ # No in-window line found — skip the file entirely.
261
+ continue
262
+ fh.seek(start_offset)
263
+ for raw_line in fh:
264
+ # Phase 2d (P087) substring pre-filter — skip json.loads() on
265
+ # lines that cannot possibly contribute a count. The literal
266
+ # substring `"tool_use"` is the discriminating token: every
267
+ # content block we count carries `"type":"tool_use"`, while
268
+ # ~60% of in-window transcript lines (user messages,
269
+ # tool_result blocks, snapshots, title records) carry no
270
+ # `"tool_use"` value at all. The check is whitespace-robust
271
+ # because `"tool_use"` is a string value, not a key:value
272
+ # pair — compact-JSON (`"type":"tool_use"`) and pretty-JSON
273
+ # (`"type": "tool_use"`) both contain the literal token
274
+ # verbatim. False-positives (content-body prose containing
275
+ # the substring) fall through to full parse and the existing
276
+ # `c.get("type") == "tool_use"` content-block check excludes
277
+ # them. The substring check now runs on bytes (binary-mode
278
+ # file under Phase 2e) — `bytes.__contains__` is a fast
279
+ # memchr-backed operation in CPython.
280
+ if b'"tool_use"' not in raw_line:
281
+ continue
172
282
  try:
283
+ line = raw_line.decode("utf-8", errors="replace")
173
284
  rec = json.loads(line)
174
285
  except Exception:
175
286
  continue
@@ -318,3 +318,218 @@ assert rec.get('last_invocation_iso') is not None, rec
318
318
  # Only the in-window invocation counts; old one drops.
319
319
  echo "$output" | grep -q '"invocations":1'
320
320
  }
321
+
322
+ # ── Phase 2d: substring-prefilter false-positive fall-through ───────────────
323
+ # Iter 6 (2026-05-17) adds a cheap substring guard before json.loads() to skip
324
+ # lines that cannot possibly contribute counts. The filter checks for the
325
+ # literal substrings `"type":"assistant"` and `"tool_use"` in each line; lines
326
+ # missing either are skipped without parsing. Correctness invariant: any line
327
+ # whose body content (a `type=text` block, a tool_result, a user message
328
+ # rendered into the transcript verbatim) happens to contain those substrings
329
+ # MUST fall through to full JSON parse and the existing not-a-real-tool_use
330
+ # check MUST exclude it from counts. This fixture seeds exactly that scenario:
331
+ # an assistant message carrying a single `type=text` content block whose body
332
+ # literally contains both trigger substrings. The legitimate tool_use line in
333
+ # the same fixture establishes the expected count = 1. Without the existing
334
+ # `c.get("type") == "tool_use"` guard, the false-positive line would inflate
335
+ # counts; the assertion below catches any future regression on the
336
+ # fall-through path.
337
+
338
+ @test "Phase 2d: false-positive substring fall-through does not inflate counts" {
339
+ local sess="$TRANSCRIPT_ROOT/proj/falsepos.jsonl"
340
+ local ts=$(recent_iso 1)
341
+ # One legitimate Skill invocation (counts as 1).
342
+ write_skill_invocation "$sess" "wr-itil:manage-problem" "$ts"
343
+ # One adversarial assistant message: text body contains both trigger
344
+ # substrings but no real tool_use entry. Must NOT add to counts.
345
+ python3 - "$sess" "$ts" <<'PYEOF'
346
+ import json, sys
347
+ file, ts = sys.argv[1], sys.argv[2]
348
+ rec = {
349
+ "type": "assistant",
350
+ "timestamp": ts,
351
+ "message": {
352
+ "role": "assistant",
353
+ "content": [
354
+ {"type": "text", "text": 'discussing "type":"assistant" and "tool_use" tokens in prose'}
355
+ ]
356
+ }
357
+ }
358
+ with open(file, "a") as fh:
359
+ fh.write(json.dumps(rec) + "\n")
360
+ PYEOF
361
+
362
+ run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
363
+ [ "$status" -eq 0 ]
364
+ # Exactly one record (the legitimate Skill invocation); count = 1.
365
+ local line_count
366
+ line_count="$(printf '%s' "$output" | grep -c .)"
367
+ [ "$line_count" -eq 1 ]
368
+ echo "$output" | grep -q '"invocations":1'
369
+ echo "$output" | grep -q '"surface":"wr-itil:manage-problem"'
370
+ }
371
+
372
+ # ── Phase 2e: binary-search-to-first-in-window byte-seek ────────────────────
373
+ # Iter 7 (2026-05-17) adds a binary-search byte-seek before the line iterator
374
+ # for files above a size threshold. JSONL is append-only — older lines appear
375
+ # earlier in the file by author-timestamp monotonicity. The bisect locates the
376
+ # first byte offset whose line carries a timestamp >= cutoff, then scans
377
+ # forward. Files below the threshold linear-scan from byte 0 (bisect overhead
378
+ # is not worth it for small files). Correctness invariants pinned below.
379
+
380
+ # Helper: write a large jsonl that straddles the window cutoff. The first
381
+ # `old_count` lines carry timestamps `old_iso` (out-of-window); the next
382
+ # `new_count` lines carry timestamps `new_iso` (in-window). Pads each record
383
+ # with a `_pad` field so the file is comfortably above the bisect threshold
384
+ # even with modest line counts. Sets mtime to "fresh" so the file-level
385
+ # mtime filter does not skip the file before the bisect runs.
386
+ write_straddle_file() {
387
+ local file="$1"; local old_count="$2"; local new_count="$3"
388
+ local old_iso="$4"; local new_iso="$5"
389
+ mkdir -p "$(dirname "$file")"
390
+ python3 - "$file" "$old_count" "$new_count" "$old_iso" "$new_iso" <<'PYEOF'
391
+ import json, sys
392
+ file, old_count, new_count, old_iso, new_iso = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]), sys.argv[4], sys.argv[5]
393
+ # Pad each record so the file is comfortably > 256 KB even at modest line counts.
394
+ pad = "x" * 2048
395
+ def rec(ts, skill):
396
+ return {
397
+ "type": "assistant",
398
+ "timestamp": ts,
399
+ "_pad": pad,
400
+ "message": {
401
+ "role": "assistant",
402
+ "content": [{"type": "tool_use", "name": "Skill", "input": {"skill": skill}}],
403
+ },
404
+ }
405
+ with open(file, "w") as fh:
406
+ for _ in range(old_count):
407
+ fh.write(json.dumps(rec(old_iso, "wr-itil:manage-problem")) + "\n")
408
+ for _ in range(new_count):
409
+ fh.write(json.dumps(rec(new_iso, "wr-itil:manage-problem")) + "\n")
410
+ PYEOF
411
+ }
412
+
413
+ @test "Phase 2e: byte-seek straddle file counts only in-window lines" {
414
+ local sess="$TRANSCRIPT_ROOT/proj/straddle.jsonl"
415
+ local old_iso=$(recent_iso 1440) # 60 days ago — out-of-window for 30d
416
+ local new_iso=$(recent_iso 1) # 1 hour ago — in-window
417
+ # 200 old + 50 new = 250 lines × ~2.2KB padded = ~550KB → bisect path.
418
+ write_straddle_file "$sess" 200 50 "$old_iso" "$new_iso"
419
+ # Ensure file size is above the 256KB bisect threshold (sanity check).
420
+ local size
421
+ size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
422
+ [ "$size" -gt 262144 ]
423
+
424
+ run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
425
+ [ "$status" -eq 0 ]
426
+ # Only the 50 in-window invocations count; the 200 historical lines are
427
+ # excluded by the message-level timestamp filter (already correct under
428
+ # linear scan; the bisect must preserve this invariant).
429
+ echo "$output" | grep -q '"invocations":50'
430
+ echo "$output" | grep -q '"surface":"wr-itil:manage-problem"'
431
+ }
432
+
433
+ @test "Phase 2e: byte-seek all-in-window file counts every line (no fallthrough loss)" {
434
+ local sess="$TRANSCRIPT_ROOT/proj/allnew.jsonl"
435
+ local new_iso=$(recent_iso 1)
436
+ # 250 lines × ~2.2KB = ~550KB → bisect path. Bisect finds offset 0 (every
437
+ # line already in-window) and the linear scan from there counts all 250.
438
+ write_straddle_file "$sess" 0 250 "$new_iso" "$new_iso"
439
+ local size
440
+ size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
441
+ [ "$size" -gt 262144 ]
442
+
443
+ run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
444
+ [ "$status" -eq 0 ]
445
+ echo "$output" | grep -q '"invocations":250'
446
+ }
447
+
448
+ @test "Phase 2e: small file under threshold takes linear-scan path correctly" {
449
+ local sess="$TRANSCRIPT_ROOT/proj/small.jsonl"
450
+ local old_iso=$(recent_iso 1440)
451
+ local new_iso=$(recent_iso 1)
452
+ # Three lines without padding — well under 256KB → linear-scan path.
453
+ write_skill_invocation "$sess" "wr-itil:manage-problem" "$old_iso"
454
+ write_skill_invocation "$sess" "wr-itil:manage-problem" "$new_iso"
455
+ write_skill_invocation "$sess" "wr-itil:manage-problem" "$new_iso"
456
+ local size
457
+ size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
458
+ [ "$size" -lt 262144 ]
459
+
460
+ run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
461
+ [ "$status" -eq 0 ]
462
+ # 2 in-window (1 old, 2 new); message-timestamp filter excludes the old.
463
+ echo "$output" | grep -q '"invocations":2'
464
+ }
465
+
466
+ @test "Phase 2e: empty large file emits zero records and exits 0" {
467
+ local sess="$TRANSCRIPT_ROOT/proj/empty.jsonl"
468
+ mkdir -p "$(dirname "$sess")"
469
+ # Create empty file with fresh mtime.
470
+ : > "$sess"
471
+
472
+ run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
473
+ [ "$status" -eq 0 ]
474
+ # No records.
475
+ [ -z "$output" ]
476
+ }
477
+
478
+ @test "Phase 2e: non-monotonic timestamps — graceful degradation, no crash, NDJSON well-formed" {
479
+ # Architect advisory (P087 iter-7 review 2026-05-17): pin behaviour under
480
+ # clock-skew / replay where in-window lines appear BEFORE out-of-window
481
+ # lines within the same file. Real Claude Code session jsonl files are
482
+ # append-only by a single process with a monotonic-ish wall clock; this
483
+ # fixture documents the contract under synthetic violation. Bisect MAY
484
+ # under-count under non-monotonic input (it locates the first in-window
485
+ # line by byte position, not by content scan); the contract is that the
486
+ # script exits 0 and emits structurally well-formed NDJSON. ADR-058
487
+ # §Performance amendment pins monotonicity as the input invariant.
488
+ local sess="$TRANSCRIPT_ROOT/proj/nonmono.jsonl"
489
+ local old_iso=$(recent_iso 1440)
490
+ local new_iso=$(recent_iso 1)
491
+ mkdir -p "$(dirname "$sess")"
492
+ # Interleave new / old / new / old ... pattern; padded so file is over
493
+ # threshold and bisect path activates.
494
+ python3 - "$sess" "$old_iso" "$new_iso" <<'PYEOF'
495
+ import json, sys
496
+ file, old_iso, new_iso = sys.argv[1], sys.argv[2], sys.argv[3]
497
+ pad = "y" * 2048
498
+ def rec(ts):
499
+ return {
500
+ "type": "assistant",
501
+ "timestamp": ts,
502
+ "_pad": pad,
503
+ "message": {
504
+ "role": "assistant",
505
+ "content": [{"type": "tool_use", "name": "Skill", "input": {"skill": "wr-itil:manage-problem"}}],
506
+ },
507
+ }
508
+ with open(file, "w") as fh:
509
+ # 200 lines interleaved old/new — non-monotonic on purpose.
510
+ for i in range(200):
511
+ ts = new_iso if i % 2 == 0 else old_iso
512
+ fh.write(json.dumps(rec(ts)) + "\n")
513
+ PYEOF
514
+ local size
515
+ size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
516
+ [ "$size" -gt 262144 ]
517
+
518
+ run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
519
+ [ "$status" -eq 0 ]
520
+ # Structurally well-formed: zero-or-one record, each line valid JSON, no
521
+ # crash. Exact count is NOT pinned — bisect under-count under non-monotonic
522
+ # input is documented graceful degradation per ADR-058 amendment.
523
+ if [ -n "$output" ]; then
524
+ echo "$output" | python3 -c "
525
+ import json, sys
526
+ for line in sys.stdin.read().splitlines():
527
+ if not line: continue
528
+ rec = json.loads(line)
529
+ assert rec['schema_version'] == '1.0', rec
530
+ assert rec['axis'] == 'skill-invocations', rec
531
+ assert isinstance(rec.get('invocations'), int), rec
532
+ assert rec['invocations'] >= 0, rec
533
+ "
534
+ fi
535
+ }