npm - @windyroad/itil - Versions diffs - 0.30.3-preview.319 → 0.30.4-preview.321 - Mend

@windyroad/itil 0.30.3-preview.319 → 0.30.4-preview.321

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/.claude-plugin/plugin.json +1 -1
package/hooks/hooks.json +4 -0
package/hooks/itil-fictional-defer-detect.sh +175 -0
package/hooks/test/itil-fictional-defer-detect.bats +292 -0
package/package.json +1 -1
package/scripts/skill-invocations.sh +113 -2
package/scripts/test/skill-invocations.bats +215 -0

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
   "name": "wr-itil",
-  "version": "0.30.3",
+  "version": "0.30.4",
   "description": "ITIL-aligned IT service management for Claude Code"
 }

package/hooks/hooks.json CHANGED Viewed

@@ -49,6 +49,10 @@
       {
         "matcher": "Bash",
         "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/itil-rfc-trailer-advisory.sh" }]
+      },
+      {
+        "matcher": "Write|Edit|MultiEdit",
+        "hooks": [{ "type": "command", "command": "${CLAUDE_PLUGIN_ROOT}/hooks/itil-fictional-defer-detect.sh" }]
       }
     ],
     "Stop": [

package/hooks/itil-fictional-defer-detect.sh ADDED Viewed

@@ -0,0 +1,175 @@
+#!/bin/bash
+# P234 Phase 1 — wr-itil PostToolUse:Write|Edit hook.
+#
+# Detects "fictional defer" rationales in `docs/retros/*.md` writes —
+# defer-rationale phrases (`next retro`, `next session`, `defer
+# pending`, `defer with cause:`, `deferred per`) that lack a
+# SCHEDULED-FUTURE-SURFACE citation in the surrounding +/-5-line
+# window. The regression class P234 captures (2026-05-17 session 3
+# retro: 3 MUST_SPLIT files deferred with "cascade case: archive-of-
+# archive tier design needed" rationale; user correction "Don't defer"
+# revealed the cascade was mechanical, not a design barrier).
+#
+# Detection signal (per ticket Investigation Task 2 two-axis test):
+#   1. tool_name is Write OR Edit OR MultiEdit AND file_path matches
+#      `docs/retros/*.md`.
+#   2. Written file contains a defer-rationale phrase (case-insensitive).
+#   3. Within +/-5 lines of the match there is NO citation of a
+#      SCHEDULED-FUTURE-SURFACE — concretely any of:
+#        * Ticket ID:  P\d{3} / STORY-\d{3} / R\d{3} / RFC-\d{3}
+#        * Skill:      /wr-[a-z-]+:[a-z-]+
+#        * Hook/script: \.sh\b (path component or filename)
+#        * CI workflow: \.github/workflows/
+#        * Dated ADR:  ADR-\d{3} + \d{4}-\d{2}-\d{2} both present
+#   4. Match line is NOT on the exception allowlist
+#      (e.g. `deferred per Branch B` — Branch B's next-retro
+#      check-briefing-budgets.sh trigger IS the scheduled surface).
+#
+# When all four hold, the hook emits a stderr advisory citing P234 +
+# the SCHEDULED-FUTURE-SURFACE definition + remediation pattern
+# (cite a surface OR execute the deferred work now). The advisory
+# names the file path, line number, and detected phrase so the next
+# assistant turn has enough context to self-correct.
+#
+# Advisory only — NEVER blocks. Per ADR-013 Rule 6 fail-safe + ADR-045
+# honour-system budget (target ~600 bytes; hard ceiling 1000). Mirrors
+# the itil-rfc-trailer-advisory.sh PostToolUse precedent (stderr +
+# exit 0) and the itil-mid-loop-ask-detect.sh per-surface configuration
+# pattern (DEFER_RATIONALE_RE / SCHEDULED_FUTURE_SURFACE_RE /
+# EXEMPT_PHRASES at the top so the hook is copy-and-retarget extensible).
+#
+# References:
+#   P234     — this hook (Phase 1 structural enforcement).
+#   P148     — Tickets Deferred section misuse; same class, different
+#              surface (advisory script not hook).
+#   P132     — over-ask class (inverse-correctness axis of P234 under-do);
+#              Phase 2b hook itil-mid-loop-ask-detect.sh is the canonical
+#              advisory-shape template.
+#   ADR-013  — Rule 6 fail-open on missing inputs / parse errors.
+#   ADR-014  — single-commit grain (this hook never auto-fixes).
+#   ADR-040  — declarative-first; advisory-only over hard block.
+#   ADR-044  — framework-resolution boundary; named in advisory.
+#   ADR-045  — hook injection budget; honour-system <1000 hard ceiling.
+#   ADR-052  — behavioural-tests default; bats live alongside.
+#   ADR-057  — three-phase declarative-first cluster rollout
+#              (Phase 2 advisory-second slot).
+# Per-surface configuration. Extending coverage to other accumulator-
+# doc surfaces (briefing topic files, decision logs, capture skill
+# outputs) is a copy-and-retarget operation — adjust PATH_GLOB +
+# the three regex vars below.
+PATH_GLOB_RE='docs/retros/.*\.md$'
+DEFER_RATIONALE_RE='next retro|next session|defer pending|deferred pending|defer with cause|deferred with cause|deferred per'
+TICKET_ID_RE='\b(P[0-9]{3}|STORY-[0-9]{3}|R[0-9]{3}|RFC-[0-9]{3})\b'
+SKILL_INVOCATION_RE='/wr-[a-z-]+:[a-z-]+'
+HOOK_PATH_RE='[A-Za-z0-9_./-]+\.sh\b'
+CI_WORKFLOW_RE='\.github/workflows/'
+ADR_REF_RE='ADR-[0-9]{3}'
+DATE_RE='[0-9]{4}-[0-9]{2}-[0-9]{2}'
+EXEMPT_PHRASES_RE='deferred per Branch B'
+INPUT=$(cat 2>/dev/null || true)
+# Fail-open on empty/malformed stdin.
+[ -n "$INPUT" ] || exit 0
+# Parse tool_name + tool_input.file_path via python3 (sibling precedent
+# itil-rfc-trailer-advisory.sh). Fail-open on parse error.
+TOOL_NAME=$(echo "$INPUT" | python3 -c "
+import sys, json
+try:
+    data = json.load(sys.stdin)
+    print(data.get('tool_name', ''))
+except Exception:
+    print('')
+" 2>/dev/null || echo "")
+case "$TOOL_NAME" in
+  Write|Edit|MultiEdit) ;;
+  *) exit 0 ;;
+esac
+FILE_PATH=$(echo "$INPUT" | python3 -c "
+import sys, json
+try:
+    data = json.load(sys.stdin)
+    print(data.get('tool_input', {}).get('file_path', ''))
+except Exception:
+    print('')
+" 2>/dev/null || echo "")
+# Short-circuit: no file_path → silent.
+[ -n "$FILE_PATH" ] || exit 0
+# Short-circuit: path doesn't match retro glob → silent.
+if ! echo "$FILE_PATH" | grep -qE "$PATH_GLOB_RE"; then
+  exit 0
+fi
+# Short-circuit: file doesn't exist on disk (could be a pre-PostToolUse
+# Write that hasn't materialised yet, or a path the hook can't reach) →
+# silent.
+[ -f "$FILE_PATH" ] || exit 0
+# Scan for defer-rationale matches. grep -nE produces `lineno:content`.
+MATCHES=$(grep -inE "$DEFER_RATIONALE_RE" "$FILE_PATH" 2>/dev/null || true)
+[ -n "$MATCHES" ] || exit 0
+# For each match, check the +/-5 line window for a SCHEDULED-FUTURE-
+# SURFACE citation. Accumulate fictional-defer findings; the first
+# fictional finding triggers the advisory (one advisory per write,
+# even if multiple defers fail — keeps the advisory dense).
+TOTAL_LINES=$(wc -l < "$FILE_PATH" | tr -d ' ')
+FICTIONAL_FOUND=""
+FICTIONAL_LINE=""
+FICTIONAL_PHRASE=""
+while IFS= read -r match_row; do
+  [ -n "$match_row" ] || continue
+  LN="${match_row%%:*}"
+  match_text="${match_row#*:}"
+  # Skip exception-allowlisted phrases (e.g. `deferred per Branch B`).
+  if echo "$match_text" | grep -qiE "$EXEMPT_PHRASES_RE"; then
+    continue
+  fi
+  # Compute window [LN-5, LN+5] clamped to file bounds.
+  START=$((LN - 5))
+  [ "$START" -lt 1 ] && START=1
+  END=$((LN + 5))
+  [ "$END" -gt "$TOTAL_LINES" ] && END="$TOTAL_LINES"
+  WINDOW=$(sed -n "${START},${END}p" "$FILE_PATH" 2>/dev/null || true)
+  # Check for any SCHEDULED-FUTURE-SURFACE citation in the window.
+  if echo "$WINDOW" | grep -qE "$TICKET_ID_RE"; then continue; fi
+  if echo "$WINDOW" | grep -qE "$SKILL_INVOCATION_RE"; then continue; fi
+  if echo "$WINDOW" | grep -qE "$HOOK_PATH_RE"; then continue; fi
+  if echo "$WINDOW" | grep -qE "$CI_WORKFLOW_RE"; then continue; fi
+  # Dated ADR requires BOTH an ADR-NNN ref AND a date in the window.
+  if echo "$WINDOW" | grep -qE "$ADR_REF_RE" \
+     && echo "$WINDOW" | grep -qE "$DATE_RE"; then
+    continue
+  fi
+  # No surface citation found — this is a fictional defer. Record the
+  # first one (advisory carries one example; remediation pattern
+  # generalises).
+  FICTIONAL_FOUND="yes"
+  FICTIONAL_LINE="$LN"
+  # Compact + truncate the matched phrase for the advisory body.
+  FICTIONAL_PHRASE=$(echo "$match_text" | tr -s ' ' ' ' | sed 's/^[[:space:]]*//' | cut -c1-80)
+  break
+done <<< "$MATCHES"
+# No fictional defers → silent.
+[ -n "$FICTIONAL_FOUND" ] || exit 0
+# Emit advisory to stderr (PostToolUse precedent matches
+# itil-rfc-trailer-advisory.sh). Always exit 0 — advisory, never block.
+# Voice-tone target ~600 bytes; ADR-045 honour-system ceiling <1000.
+echo "P234 ADVISORY: fictional defer detected in ${FILE_PATH}:${FICTIONAL_LINE} — phrase: \"${FICTIONAL_PHRASE}\". No SCHEDULED-FUTURE-SURFACE cited within +/-5 lines. Per ADR-044 framework-resolution boundary, cite a concrete surface (ticket ID Pnnn, named skill /wr-foo:bar, hook path *.sh, CI workflow .github/workflows/, or dated ADR-nnn YYYY-MM-DD) OR execute the deferred work in this session. See P234." >&2
+exit 0

package/hooks/test/itil-fictional-defer-detect.bats ADDED Viewed

@@ -0,0 +1,292 @@
+#!/usr/bin/env bats
+# P234 Phase 1: itil-fictional-defer-detect.sh PostToolUse:Write|Edit
+# hook detects "fictional defer" rationales in `docs/retros/*.md` writes
+# — defer-rationale phrases (`next retro`, `next session`, `defer
+# pending`, `defer with cause:`, `deferred per`) that lack a
+# SCHEDULED-FUTURE-SURFACE citation in surrounding context.
+#
+# Detection signal (per ticket Investigation Task 2 two-axis test):
+#   1. tool_name is Write OR Edit AND file_path matches docs/retros/*.md
+#   2. Written file contains a defer-rationale phrase (case-insensitive)
+#   3. Within +/-5 lines of the match there is NO citation of a
+#      SCHEDULED-FUTURE-SURFACE (ticket ID P\d{3} / STORY-\d{3} / R\d{3},
+#      skill invocation /wr-[a-z-]+:[a-z-]+, hook script path .sh, CI
+#      workflow path .github/workflows/, dated ADR ADR-\d{3} ... YYYY-MM-DD)
+#   4. Match is NOT on the exception allowlist (e.g. `deferred per Branch B`).
+#
+# When all four hold, the hook emits a stderr advisory citing P234 + the
+# SCHEDULED-FUTURE-SURFACE definition. Advisory only — never blocks
+# (exit 0 always). Mirrors the itil-rfc-trailer-advisory.sh PostToolUse
+# precedent (stderr + exit 0) and the itil-mid-loop-ask-detect.sh
+# detection-pattern precedent (per-surface configuration at top).
+#
+# Per ADR-005 / ADR-052 — bats live under packages/<plugin>/hooks/test/
+# and assert on emitted stderr text, not source-content. Per P081 — no
+# source-grep on hook text.
+setup() {
+  REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../../.." && pwd)"
+  HOOK="$REPO_ROOT/packages/itil/hooks/itil-fictional-defer-detect.sh"
+  TMPDIR_="$(mktemp -d)"
+  RETRO_DIR="$TMPDIR_/docs/retros"
+  mkdir -p "$RETRO_DIR"
+  RETRO_FILE="$RETRO_DIR/2026-05-17-session-3.md"
+}
+teardown() {
+  rm -rf "$TMPDIR_"
+}
+# Helper: emit PostToolUse stdin payload for a Write tool call.
+emit_write_payload() {
+  local file_path="$1"
+  jq -n --arg p "$file_path" '{
+    session_id: "fictional-defer-test",
+    tool_name: "Write",
+    tool_input: { file_path: $p, content: "(content already on disk)" },
+    tool_response: { success: true }
+  }'
+}
+# Helper: emit PostToolUse stdin payload for an Edit tool call.
+emit_edit_payload() {
+  local file_path="$1"
+  jq -n --arg p "$file_path" '{
+    session_id: "fictional-defer-test",
+    tool_name: "Edit",
+    tool_input: { file_path: $p, old_string: "x", new_string: "y" },
+    tool_response: { success: true }
+  }'
+}
+run_hook_with_write() {
+  emit_write_payload "$RETRO_FILE" | bash "$HOOK"
+}
+run_hook_with_edit() {
+  emit_edit_payload "$RETRO_FILE" | bash "$HOOK"
+}
+# --- Positive detection: fictional defer ---
+@test "detect: defer-to-next-retro with no scheduled-future-surface emits advisory" {
+  # Faithful reproduction of the P234 worked-example fictional-defer
+  # class — the defer-rationale prose carries no SCHEDULED-FUTURE-
+  # SURFACE citation; no ticket ID, no skill invocation, no dated ADR
+  # appears in the +/-5 line window around the defer phrase.
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 3 Retro
+## Signal-vs-Noise Pass
+Deferred this retro per session-length constraint (16+ briefing
+entries across 13 topic files would require ~30 min of per-entry
+scoring). Next retro should run a full pass.
+EOF
+  run run_hook_with_write
+  [ "$status" -eq 0 ]
+  [[ "$stderr" == *"P234"* ]] || [[ "$output" == *"P234"* ]]
+}
+@test "detect: deferred-pending-design-judgement with no scheduled-future-surface emits advisory" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 4 Retro
+## Topic File Rotation Candidates
+| File | Action |
+|------|--------|
+| governance-workflow.md | deferred pending design judgement (cascade case) |
+| hooks-and-gates.md | deferred pending complexity review |
+EOF
+  run run_hook_with_write
+  [ "$status" -eq 0 ]
+  [[ "$stderr" == *"P234"* ]] || [[ "$output" == *"P234"* ]]
+}
+@test "detect: defer-with-cause-context-budget with no surface emits advisory" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 5 Retro
+## Codification Candidates
+Deferred with cause: context budget pressure. Next session should
+revisit when fresh context is available.
+EOF
+  run run_hook_with_edit
+  [ "$status" -eq 0 ]
+  [[ "$stderr" == *"P234"* ]] || [[ "$output" == *"P234"* ]]
+}
+# --- Negative paths: legitimate citations (silent exit) ---
+@test "allow: defer citing P-ticket within +/-5 lines exits silent" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 3 Retro
+## Signal-vs-Noise Pass
+Deferred per [[P235]] (briefing SVN backlog: 146 entries across 17
+topic files). Next retro will surface P235 if it has been promoted
+to actionable.
+EOF
+  run run_hook_with_write
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+@test "allow: defer citing skill invocation within +/-5 lines exits silent" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 4 Retro
+## Tickets Deferred
+Deferred pending /wr-itil:work-problems Step 6.5 above-appetite
+release-loop check.
+EOF
+  run run_hook_with_edit
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+@test "allow: defer citing hook script path within +/-5 lines exits silent" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 5 Retro
+## Codification Candidates
+Defer pending packages/itil/hooks/itil-fictional-defer-detect.sh
+extension to also cover the assistant-output review channel.
+EOF
+  run run_hook_with_write
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+@test "allow: defer citing dated ADR within +/-5 lines exits silent" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 6 Retro
+## Codification Candidates
+Deferred pending ADR-044 confirmation criterion 3 graduation
+(2026-05-25). Reassess after the criterion lands.
+EOF
+  run run_hook_with_write
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+# --- Exception allowlist ---
+@test "allow: deferred-per-Branch-B allowlist phrase exits silent" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 3 Retro
+## Topic File Rotation Candidates
+| File | Action |
+|------|--------|
+| governance-workflow.md (ratio 1.5x) | leave-as-is — deferred per Branch B |
+| hooks-and-gates.md (ratio 1.3x) | leave-as-is — deferred per Branch B |
+EOF
+  run run_hook_with_write
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+# --- Path / tool short-circuits ---
+@test "allow: tool_name != Write/Edit exits silent" {
+  cat > "$RETRO_FILE" <<'EOF'
+Deferred this retro per session-length constraint. Next retro should run.
+EOF
+  payload=$(jq -n --arg p "$RETRO_FILE" '{
+    session_id: "x",
+    tool_name: "Bash",
+    tool_input: { command: "ls" },
+    tool_response: { stdout: "" }
+  }')
+  run bash -c "echo '$payload' | bash '$HOOK'"
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+@test "allow: file_path outside docs/retros/ exits silent" {
+  OTHER="$TMPDIR_/docs/problems/foo.md"
+  mkdir -p "$(dirname "$OTHER")"
+  cat > "$OTHER" <<'EOF'
+Deferred this retro per session-length constraint. Next retro should run a full pass.
+EOF
+  payload=$(jq -n --arg p "$OTHER" '{
+    session_id: "x",
+    tool_name: "Write",
+    tool_input: { file_path: $p, content: "" },
+    tool_response: { success: true }
+  }')
+  run bash -c "echo '$payload' | bash '$HOOK'"
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+@test "allow: missing file_path exits silent" {
+  payload=$(jq -n '{
+    session_id: "x",
+    tool_name: "Write",
+    tool_input: {},
+    tool_response: { success: true }
+  }')
+  run bash -c "echo '$payload' | bash '$HOOK'"
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+# --- Crash safety ---
+@test "allow: malformed JSON input does not crash the hook" {
+  run bash -c "echo 'not-json' | bash '$HOOK'"
+  [ "$status" -eq 0 ]
+  # Either silent OR a single advisory — but never a non-zero exit.
+}
+@test "allow: non-existent retro file exits silent" {
+  GHOST="$TMPDIR_/docs/retros/does-not-exist.md"
+  payload=$(jq -n --arg p "$GHOST" '{
+    session_id: "x",
+    tool_name: "Write",
+    tool_input: { file_path: $p, content: "" },
+    tool_response: { success: true }
+  }')
+  run bash -c "echo '$payload' | bash '$HOOK'"
+  [ "$status" -eq 0 ]
+  [[ "$stderr" != *"P234"* ]]
+  [[ "$output" != *"P234"* ]]
+}
+# --- Advisory budget per ADR-045 ---
+@test "advisory output stays under ADR-045 1000-byte honour-system ceiling" {
+  cat > "$RETRO_FILE" <<'EOF'
+# Session 3 Retro
+## Signal-vs-Noise Pass
+Deferred this retro per session-length constraint. Next retro
+should run a full pass.
+EOF
+  emit_write_payload "$RETRO_FILE" > "$TMPDIR_/payload.json"
+  # Capture combined stdout+stderr; advisory channel is stderr per
+  # PostToolUse precedent (itil-rfc-trailer-advisory.sh).
+  combined=$(bash "$HOOK" < "$TMPDIR_/payload.json" 2>&1)
+  [ -n "$combined" ]
+  [ "${#combined}" -lt 1000 ]
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@windyroad/itil",
-  "version": "0.30.3-preview.319",
+  "version": "0.30.4-preview.321",
   "description": "ITIL-aligned IT service management for Claude Code (problem, and future incident/change skills)",
   "bin": {
     "windyroad-itil": "./bin/install.mjs"

package/scripts/skill-invocations.sh CHANGED Viewed

@@ -127,6 +127,82 @@ cutoff = now - window_days * 86400
 # `itil`, not `itil-reconcile`.
 BIN_RE = re.compile(r"\bwr-([a-z0-9]+)-[a-z0-9-]+")
+# Phase 2e (P087) byte-seek bisect — find the earliest byte offset whose
+# line carries a timestamp >= cutoff, then linear-scan from there. Files
+# below the threshold linear-scan from byte 0 (bisect overhead is not
+# worth it; the ratio of bisect-seeks to in-window lines flips around
+# this size on warm-cache developer laptops per Phase 2c profile data).
+# JSONL append-only monotonicity is the input invariant — pinned in
+# ADR-058 §Performance contract Phase 2e amendment. Non-monotonic input
+# under-counts gracefully (bisect locates by byte position, not by
+# content scan) without crashing or emitting malformed NDJSON; pinned
+# by the "non-monotonic timestamps — graceful degradation" bats fixture.
+BINARY_SEARCH_THRESHOLD = 256 * 1024  # bytes
+# Whitespace-tolerant: matches both compact `"timestamp":"..."` and
+# pretty `"timestamp": "..."` JSON shapes. The cheap-probe nature of the
+# bisect means the regex stays in bytes and skips json.loads on the
+# probe line entirely.
+TS_RE = re.compile(rb'"timestamp"\s*:\s*"([^"]+)"')
+def _parse_iso_ts(b):
+    """Parse ISO timestamp bytes → epoch seconds, or None on parse failure."""
+    try:
+        s = b.decode("ascii", errors="replace")
+        return datetime.fromisoformat(s.replace("Z", "+00:00")).timestamp()
+    except Exception:
+        return None
+def find_first_in_window_offset(fh, file_size, cutoff_epoch):
+    """Bisect byte offset of earliest line whose timestamp >= cutoff_epoch.
+    Returns 0 when every readable line is in-window, or `file_size` when
+    no in-window line is found (caller skips the file). Falls back
+    conservatively to the lo-bound on any per-line parse failure — the
+    canonical correctness invariant is "never miss an in-window line",
+    not "always converge to the tightest cutoff".
+    Termination: the boundary-aligning `readline()` always advances past
+    `mid` (the probed line starts strictly after `mid` when `mid != 0`).
+    On the in-window branch we tighten `hi = mid` rather than `hi = pos`
+    — the latter equals `hi` itself on line-aligned probes and stalls the
+    bisect. `best` records the actual byte position so the returned
+    offset is the discovered in-window line, even though `hi` shrinks
+    by `mid` to guarantee monotonic narrowing.
+    """
+    lo, hi = 0, file_size
+    best = file_size  # default: no in-window line discovered
+    while lo < hi:
+        mid = (lo + hi) // 2
+        fh.seek(mid)
+        if mid != 0:
+            fh.readline()  # discard partial line to align to boundary
+        pos = fh.tell()
+        if pos >= file_size:
+            hi = mid
+            continue
+        line = fh.readline()
+        if not line:
+            hi = mid
+            continue
+        m = TS_RE.search(line)
+        if not m:
+            # Unparseable timestamp on the probe line — back off to mid
+            # half. The next bisect step lands on a different probe.
+            hi = mid
+            continue
+        ts = _parse_iso_ts(m.group(1))
+        if ts is None:
+            hi = mid
+            continue
+        if ts < cutoff_epoch:
+            lo = pos + len(line)
+        else:
+            best = pos
+            hi = mid  # tighten to mid, not pos — guarantees progress
+    return best
 def plugin_from_skill(name):
     """`wr-itil:manage-problem` -> `itil`. Non-wr-prefixed or short-form
     names like `commit`, `loop` return None (excluded from per-plugin
@@ -164,12 +240,47 @@ for jsonl in jsonl_iter:
         # File hasn't been touched in the window; skip without parsing.
         continue
     try:
-        fh = jsonl.open("r", encoding="utf-8", errors="replace")
+        fh = jsonl.open("rb")
     except OSError:
         continue
     with fh:
-        for line in fh:
+        # Phase 2e (P087) byte-seek bisect — for files at or above the
+        # threshold, locate the first line whose timestamp falls within
+        # the cutoff window and start the linear scan from there. Files
+        # below the threshold scan linearly from byte 0 (the bisect
+        # overhead exceeds the savings on small files). The bisect
+        # presumes JSONL append-only monotonic timestamps within a single
+        # session file — pinned as an input invariant in ADR-058
+        # §Performance Phase 2e amendment; non-monotonic input degrades
+        # gracefully via under-count, pinned by the bats "non-monotonic"
+        # fixture.
+        if st.st_size >= BINARY_SEARCH_THRESHOLD:
+            start_offset = find_first_in_window_offset(fh, st.st_size, cutoff)
+            if start_offset >= st.st_size:
+                # No in-window line found — skip the file entirely.
+                continue
+            fh.seek(start_offset)
+        for raw_line in fh:
+            # Phase 2d (P087) substring pre-filter — skip json.loads() on
+            # lines that cannot possibly contribute a count. The literal
+            # substring `"tool_use"` is the discriminating token: every
+            # content block we count carries `"type":"tool_use"`, while
+            # ~60% of in-window transcript lines (user messages,
+            # tool_result blocks, snapshots, title records) carry no
+            # `"tool_use"` value at all. The check is whitespace-robust
+            # because `"tool_use"` is a string value, not a key:value
+            # pair — compact-JSON (`"type":"tool_use"`) and pretty-JSON
+            # (`"type": "tool_use"`) both contain the literal token
+            # verbatim. False-positives (content-body prose containing
+            # the substring) fall through to full parse and the existing
+            # `c.get("type") == "tool_use"` content-block check excludes
+            # them. The substring check now runs on bytes (binary-mode
+            # file under Phase 2e) — `bytes.__contains__` is a fast
+            # memchr-backed operation in CPython.
+            if b'"tool_use"' not in raw_line:
+                continue
             try:
+                line = raw_line.decode("utf-8", errors="replace")
                 rec = json.loads(line)
             except Exception:
                 continue

package/scripts/test/skill-invocations.bats CHANGED Viewed

@@ -318,3 +318,218 @@ assert rec.get('last_invocation_iso') is not None, rec
   # Only the in-window invocation counts; old one drops.
   echo "$output" | grep -q '"invocations":1'
 }
+# ── Phase 2d: substring-prefilter false-positive fall-through ───────────────
+# Iter 6 (2026-05-17) adds a cheap substring guard before json.loads() to skip
+# lines that cannot possibly contribute counts. The filter checks for the
+# literal substrings `"type":"assistant"` and `"tool_use"` in each line; lines
+# missing either are skipped without parsing. Correctness invariant: any line
+# whose body content (a `type=text` block, a tool_result, a user message
+# rendered into the transcript verbatim) happens to contain those substrings
+# MUST fall through to full JSON parse and the existing not-a-real-tool_use
+# check MUST exclude it from counts. This fixture seeds exactly that scenario:
+# an assistant message carrying a single `type=text` content block whose body
+# literally contains both trigger substrings. The legitimate tool_use line in
+# the same fixture establishes the expected count = 1. Without the existing
+# `c.get("type") == "tool_use"` guard, the false-positive line would inflate
+# counts; the assertion below catches any future regression on the
+# fall-through path.
+@test "Phase 2d: false-positive substring fall-through does not inflate counts" {
+  local sess="$TRANSCRIPT_ROOT/proj/falsepos.jsonl"
+  local ts=$(recent_iso 1)
+  # One legitimate Skill invocation (counts as 1).
+  write_skill_invocation "$sess" "wr-itil:manage-problem" "$ts"
+  # One adversarial assistant message: text body contains both trigger
+  # substrings but no real tool_use entry. Must NOT add to counts.
+  python3 - "$sess" "$ts" <<'PYEOF'
+import json, sys
+file, ts = sys.argv[1], sys.argv[2]
+rec = {
+  "type": "assistant",
+  "timestamp": ts,
+  "message": {
+    "role": "assistant",
+    "content": [
+      {"type": "text", "text": 'discussing "type":"assistant" and "tool_use" tokens in prose'}
+    ]
+  }
+}
+with open(file, "a") as fh:
+  fh.write(json.dumps(rec) + "\n")
+PYEOF
+  run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
+  [ "$status" -eq 0 ]
+  # Exactly one record (the legitimate Skill invocation); count = 1.
+  local line_count
+  line_count="$(printf '%s' "$output" | grep -c .)"
+  [ "$line_count" -eq 1 ]
+  echo "$output" | grep -q '"invocations":1'
+  echo "$output" | grep -q '"surface":"wr-itil:manage-problem"'
+}
+# ── Phase 2e: binary-search-to-first-in-window byte-seek ────────────────────
+# Iter 7 (2026-05-17) adds a binary-search byte-seek before the line iterator
+# for files above a size threshold. JSONL is append-only — older lines appear
+# earlier in the file by author-timestamp monotonicity. The bisect locates the
+# first byte offset whose line carries a timestamp >= cutoff, then scans
+# forward. Files below the threshold linear-scan from byte 0 (bisect overhead
+# is not worth it for small files). Correctness invariants pinned below.
+# Helper: write a large jsonl that straddles the window cutoff. The first
+# `old_count` lines carry timestamps `old_iso` (out-of-window); the next
+# `new_count` lines carry timestamps `new_iso` (in-window). Pads each record
+# with a `_pad` field so the file is comfortably above the bisect threshold
+# even with modest line counts. Sets mtime to "fresh" so the file-level
+# mtime filter does not skip the file before the bisect runs.
+write_straddle_file() {
+  local file="$1"; local old_count="$2"; local new_count="$3"
+  local old_iso="$4"; local new_iso="$5"
+  mkdir -p "$(dirname "$file")"
+  python3 - "$file" "$old_count" "$new_count" "$old_iso" "$new_iso" <<'PYEOF'
+import json, sys
+file, old_count, new_count, old_iso, new_iso = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]), sys.argv[4], sys.argv[5]
+# Pad each record so the file is comfortably > 256 KB even at modest line counts.
+pad = "x" * 2048
+def rec(ts, skill):
+    return {
+        "type": "assistant",
+        "timestamp": ts,
+        "_pad": pad,
+        "message": {
+            "role": "assistant",
+            "content": [{"type": "tool_use", "name": "Skill", "input": {"skill": skill}}],
+        },
+    }
+with open(file, "w") as fh:
+    for _ in range(old_count):
+        fh.write(json.dumps(rec(old_iso, "wr-itil:manage-problem")) + "\n")
+    for _ in range(new_count):
+        fh.write(json.dumps(rec(new_iso, "wr-itil:manage-problem")) + "\n")
+PYEOF
+}
+@test "Phase 2e: byte-seek straddle file counts only in-window lines" {
+  local sess="$TRANSCRIPT_ROOT/proj/straddle.jsonl"
+  local old_iso=$(recent_iso 1440)   # 60 days ago — out-of-window for 30d
+  local new_iso=$(recent_iso 1)      # 1 hour ago — in-window
+  # 200 old + 50 new = 250 lines × ~2.2KB padded = ~550KB → bisect path.
+  write_straddle_file "$sess" 200 50 "$old_iso" "$new_iso"
+  # Ensure file size is above the 256KB bisect threshold (sanity check).
+  local size
+  size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
+  [ "$size" -gt 262144 ]
+  run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
+  [ "$status" -eq 0 ]
+  # Only the 50 in-window invocations count; the 200 historical lines are
+  # excluded by the message-level timestamp filter (already correct under
+  # linear scan; the bisect must preserve this invariant).
+  echo "$output" | grep -q '"invocations":50'
+  echo "$output" | grep -q '"surface":"wr-itil:manage-problem"'
+}
+@test "Phase 2e: byte-seek all-in-window file counts every line (no fallthrough loss)" {
+  local sess="$TRANSCRIPT_ROOT/proj/allnew.jsonl"
+  local new_iso=$(recent_iso 1)
+  # 250 lines × ~2.2KB = ~550KB → bisect path. Bisect finds offset 0 (every
+  # line already in-window) and the linear scan from there counts all 250.
+  write_straddle_file "$sess" 0 250 "$new_iso" "$new_iso"
+  local size
+  size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
+  [ "$size" -gt 262144 ]
+  run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
+  [ "$status" -eq 0 ]
+  echo "$output" | grep -q '"invocations":250'
+}
+@test "Phase 2e: small file under threshold takes linear-scan path correctly" {
+  local sess="$TRANSCRIPT_ROOT/proj/small.jsonl"
+  local old_iso=$(recent_iso 1440)
+  local new_iso=$(recent_iso 1)
+  # Three lines without padding — well under 256KB → linear-scan path.
+  write_skill_invocation "$sess" "wr-itil:manage-problem" "$old_iso"
+  write_skill_invocation "$sess" "wr-itil:manage-problem" "$new_iso"
+  write_skill_invocation "$sess" "wr-itil:manage-problem" "$new_iso"
+  local size
+  size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
+  [ "$size" -lt 262144 ]
+  run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
+  [ "$status" -eq 0 ]
+  # 2 in-window (1 old, 2 new); message-timestamp filter excludes the old.
+  echo "$output" | grep -q '"invocations":2'
+}
+@test "Phase 2e: empty large file emits zero records and exits 0" {
+  local sess="$TRANSCRIPT_ROOT/proj/empty.jsonl"
+  mkdir -p "$(dirname "$sess")"
+  # Create empty file with fresh mtime.
+  : > "$sess"
+  run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
+  [ "$status" -eq 0 ]
+  # No records.
+  [ -z "$output" ]
+}
+@test "Phase 2e: non-monotonic timestamps — graceful degradation, no crash, NDJSON well-formed" {
+  # Architect advisory (P087 iter-7 review 2026-05-17): pin behaviour under
+  # clock-skew / replay where in-window lines appear BEFORE out-of-window
+  # lines within the same file. Real Claude Code session jsonl files are
+  # append-only by a single process with a monotonic-ish wall clock; this
+  # fixture documents the contract under synthetic violation. Bisect MAY
+  # under-count under non-monotonic input (it locates the first in-window
+  # line by byte position, not by content scan); the contract is that the
+  # script exits 0 and emits structurally well-formed NDJSON. ADR-058
+  # §Performance amendment pins monotonicity as the input invariant.
+  local sess="$TRANSCRIPT_ROOT/proj/nonmono.jsonl"
+  local old_iso=$(recent_iso 1440)
+  local new_iso=$(recent_iso 1)
+  mkdir -p "$(dirname "$sess")"
+  # Interleave new / old / new / old ... pattern; padded so file is over
+  # threshold and bisect path activates.
+  python3 - "$sess" "$old_iso" "$new_iso" <<'PYEOF'
+import json, sys
+file, old_iso, new_iso = sys.argv[1], sys.argv[2], sys.argv[3]
+pad = "y" * 2048
+def rec(ts):
+    return {
+        "type": "assistant",
+        "timestamp": ts,
+        "_pad": pad,
+        "message": {
+            "role": "assistant",
+            "content": [{"type": "tool_use", "name": "Skill", "input": {"skill": "wr-itil:manage-problem"}}],
+        },
+    }
+with open(file, "w") as fh:
+    # 200 lines interleaved old/new — non-monotonic on purpose.
+    for i in range(200):
+        ts = new_iso if i % 2 == 0 else old_iso
+        fh.write(json.dumps(rec(ts)) + "\n")
+PYEOF
+  local size
+  size=$(python3 -c 'import os,sys; print(os.path.getsize(sys.argv[1]))' "$sess")
+  [ "$size" -gt 262144 ]
+  run "$SCRIPT" --window-days=30 --root="$TRANSCRIPT_ROOT" --project-root="$PROJECT_ROOT"
+  [ "$status" -eq 0 ]
+  # Structurally well-formed: zero-or-one record, each line valid JSON, no
+  # crash. Exact count is NOT pinned — bisect under-count under non-monotonic
+  # input is documented graceful degradation per ADR-058 amendment.
+  if [ -n "$output" ]; then
+    echo "$output" | python3 -c "
+import json, sys
+for line in sys.stdin.read().splitlines():
+    if not line: continue
+    rec = json.loads(line)
+    assert rec['schema_version'] == '1.0', rec
+    assert rec['axis'] == 'skill-invocations', rec
+    assert isinstance(rec.get('invocations'), int), rec
+    assert rec['invocations'] >= 0, rec
+"
+  fi
+}