@windyroad/retrospective 0.23.2 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/package.json +1 -1
- package/skills/analyze-context/SKILL.md +6 -6
- package/skills/analyze-context/test/analyze-context-skill-contract.bats +31 -1
- package/skills/run-retro/SKILL.md +17 -6
- package/skills/run-retro/test/run-retro-context-usage-step-2c.bats +58 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: wr-retrospective:analyze-context
|
|
3
|
-
description: Deep
|
|
3
|
+
description: Deep context-usage analyzer. Runs richer heuristics than run-retro Step 2c — per-turn attribution, per-plugin decomposition, suggestion generation, policy-breach detection. Produces a markdown report at docs/retros/<date>-context-analysis.md with an HTML-comment trailer carrying the bucket-snapshot for delta-from-prior comparison. Auto-fires from run-retro Step 2c when the combined trigger holds (calendar-elapse >14 days OR delta >20% any bucket, once-per-day guard) per ADR-043 Amendment 2026-06-08; also user-invokable on demand.
|
|
4
4
|
allowed-tools: Read, Write, Edit, Bash, Glob, Grep, Skill
|
|
5
5
|
---
|
|
6
6
|
|
|
@@ -13,11 +13,11 @@ This skill is the **deep layer** of the two-layer design in **ADR-043** (Progres
|
|
|
13
13
|
## When to use
|
|
14
14
|
|
|
15
15
|
- The user invokes `/wr-retrospective:analyze-context` directly.
|
|
16
|
-
-
|
|
16
|
+
- **Auto-fired by run-retro Step 2c** when the combined trigger holds (calendar-elapse >14 days OR delta >20% any bucket since prior snapshot) and the once-per-day guard is not satisfied (no `docs/retros/<TODAY>-context-analysis.md` exists). Per ADR-043 Amendment 2026-06-08 (P295 settlement).
|
|
17
17
|
- The user is preparing to trim context — e.g. before a release that introduces new hooks or skills, or after observing early compaction in long-running AFK loops.
|
|
18
18
|
- The user wants a baseline snapshot at a known-good moment (e.g. immediately after a P091-cluster fix lands).
|
|
19
19
|
|
|
20
|
-
**
|
|
20
|
+
**Auto-fires from run-retro Step 2c, silent in interactive and AFK modes.** Per ADR-043 Amendment 2026-06-08 + ADR-044 framework-resolution boundary, this skill auto-fires from the cheap layer when the combined trigger condition holds (calendar-elapse OR delta breach, capped at once per day). The skill never invokes `AskUserQuestion` — it writes a committed `docs/retros/<TODAY>-context-analysis.md` report and exits. AFK orchestrators read the resulting report on iteration close; the user reviews on return.
|
|
21
21
|
|
|
22
22
|
## Output Formatting
|
|
23
23
|
|
|
@@ -227,11 +227,11 @@ After the commit lands, report:
|
|
|
227
227
|
- The number of policy breaches detected.
|
|
228
228
|
- A pointer to run-retro Step 2c: *"Subsequent `/wr-retrospective:run-retro` invocations will read this report's HTML-comment trailer for delta comparison."*
|
|
229
229
|
|
|
230
|
-
## Non-interactive / AFK behaviour (ADR-013 Rule 6)
|
|
230
|
+
## Non-interactive / AFK behaviour (ADR-013 Rule 6 + ADR-043 Amendment 2026-06-08)
|
|
231
231
|
|
|
232
|
-
This skill is **
|
|
232
|
+
This skill is **auto-invoked from run-retro Step 2c** when the combined trigger holds (calendar-elapse >14 days OR delta >20% any bucket, once-per-day guard) per ADR-043 Amendment 2026-06-08 (P295 settlement). The skill is silent (never invokes `AskUserQuestion`) and produces a committed `docs/retros/<TODAY>-context-analysis.md` report; identical behaviour in interactive and AFK modes per ADR-044 framework-resolution boundary — auto-invocation is framework-resolved mechanical action, not a user-decided surface.
|
|
233
233
|
|
|
234
|
-
If invoked in a non-interactive context
|
|
234
|
+
If invoked in a non-interactive context AND the commit gate flags above-appetite risk: skip the commit, report the uncommitted report path clearly, and let the user resolve on return. The report file itself is still written — it is the evidence the user reviews.
|
|
235
235
|
|
|
236
236
|
## Further reading (REFERENCE.md — lazy-loaded per ADR-054)
|
|
237
237
|
|
|
@@ -69,8 +69,38 @@ setup() {
|
|
|
69
69
|
[ "$status" -eq 0 ]
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
-
@test "analyze-context:
|
|
72
|
+
@test "analyze-context: auto-fires from run-retro Step 2c per ADR-043 Amendment 2026-06-08 (P295)" {
|
|
73
|
+
# P295 settlement: deep layer auto-fires from cheap-layer Step 2c when the
|
|
74
|
+
# combined trigger holds. Replaces the prior 'never auto-invoked' clause.
|
|
75
|
+
run grep -iF 'auto-fires from run-retro Step 2c' "$SKILL_MD"
|
|
76
|
+
[ "$status" -eq 0 ]
|
|
77
|
+
run grep -F 'Amendment 2026-06-08' "$SKILL_MD"
|
|
78
|
+
[ "$status" -eq 0 ]
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
@test "analyze-context: SKILL.md no longer asserts 'never auto-invoked' / 'never auto-fires' (P295 supersedes)" {
|
|
82
|
+
# The supersession discipline: the post-P295 SKILL.md MUST NOT carry the
|
|
83
|
+
# contradicting 'never auto-fires' / 'never auto-invoked' prose, or future
|
|
84
|
+
# agents will read it as still-authoritative and revert the wiring.
|
|
73
85
|
run grep -F 'never auto-invoked' "$SKILL_MD"
|
|
86
|
+
[ "$status" -ne 0 ]
|
|
87
|
+
run grep -F 'Never auto-fires' "$SKILL_MD"
|
|
88
|
+
[ "$status" -ne 0 ]
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@test "analyze-context: cites the combined-trigger threshold values (14 days + 20%)" {
|
|
92
|
+
# ADR-043 Amendment 2026-06-08 chosen initial thresholds per ADR-026
|
|
93
|
+
# grounding rule (concrete numerics + no-prior-data sentinel).
|
|
94
|
+
run grep -F '14 days' "$SKILL_MD"
|
|
95
|
+
[ "$status" -eq 0 ]
|
|
96
|
+
run grep -F '20%' "$SKILL_MD"
|
|
97
|
+
[ "$status" -eq 0 ]
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
@test "analyze-context: cites the once-per-day guard via TODAY snapshot file presence" {
|
|
101
|
+
run grep -F 'once-per-day guard' "$SKILL_MD"
|
|
102
|
+
[ "$status" -eq 0 ]
|
|
103
|
+
run grep -F '<TODAY>-context-analysis.md' "$SKILL_MD"
|
|
74
104
|
[ "$status" -eq 0 ]
|
|
75
105
|
}
|
|
76
106
|
|
|
@@ -229,20 +229,31 @@ Per **ADR-043** (Progressive context-usage measurement and reporting for retrosp
|
|
|
229
229
|
- A per-bucket table (one row per script-emitted bucket, sorted by bytes descending). Columns: `Bucket | Bytes | % of total | Δ vs prior`.
|
|
230
230
|
- A top-5 offenders block when ≥ 5 buckets carry non-zero byte counts. Top-5 cites the bucket name + byte count + measurement-method (per ADR-026).
|
|
231
231
|
- A one-line affordance: `Per-plugin breakdown available in /wr-retrospective:analyze-context (deep layer).`
|
|
232
|
-
- When the deep layer's last run is older than 14 days OR a bucket's delta exceeds +20% since prior snapshot, append the one-line note: `Deep analysis recommended — invoke /wr-retrospective:analyze-context.` This is a non-blocking advisory, never a prompt.
|
|
233
232
|
|
|
234
|
-
4. **
|
|
233
|
+
4. **Auto-invoke the deep layer when the combined trigger holds (ADR-043 Amendment 2026-06-08, settles P295)**. After rendering the cheap-layer report, evaluate the combined whichever-comes-first trigger:
|
|
235
234
|
|
|
236
|
-
|
|
235
|
+
- **Calendar-elapse**: the most recent `docs/retros/*-context-analysis.md` (lex-desc sort on date in filename) is older than 14 days, OR no prior report exists.
|
|
236
|
+
- **Delta-breach**: any bucket's byte total in the just-rendered report has changed by more than 20% versus the prior snapshot (HTML-comment trailer from step 2).
|
|
237
|
+
- **Once-per-day guard**: if `docs/retros/<TODAY>-context-analysis.md` already exists (where `<TODAY>` is the current ISO date), treat the trigger as already-satisfied and skip auto-invocation. The snapshot artefact itself is the state; no new persistent state file is needed.
|
|
237
238
|
|
|
238
|
-
|
|
239
|
+
When the trigger holds AND the once-per-day guard is not satisfied, invoke `/wr-retrospective:analyze-context` via the Skill tool. The deep layer is silent (never invokes `AskUserQuestion`) and produces a committed `docs/retros/<TODAY>-context-analysis.md` report. Record the auto-invocation outcome in the cheap-layer section: `Deep analysis auto-fired — see docs/retros/<TODAY>-context-analysis.md.` Identical behaviour in interactive and AFK modes per ADR-013 Rule 6 + ADR-044 framework-resolution boundary — auto-invocation is framework-resolved mechanical action, not a user-decided surface.
|
|
240
|
+
|
|
241
|
+
When the trigger does NOT hold (no calendar elapse AND no delta breach), emit a one-line note: `Cadence trigger inactive — next auto-fire when calendar-elapse >14 days OR delta >20% any bucket.` This is non-blocking and never a prompt.
|
|
242
|
+
|
|
243
|
+
When the trigger holds but the once-per-day guard fires (today's report already exists), emit: `Cadence trigger holds; auto-fire skipped — docs/retros/<TODAY>-context-analysis.md already present.`
|
|
244
|
+
|
|
245
|
+
5. **Forbidden phrases (ADR-026)**: the cheap-layer report MUST NOT contain qualitative-only phrases. Banned: `load is negligible`, `microseconds only`, `minimal`, `small change`, `trim X to reduce bloat` (without comparable prior). Concrete byte counts + measurement-method citations are mandatory; ungrounded fields use the explicit `not measured — <reason>` sentinel.
|
|
246
|
+
|
|
247
|
+
6. **Defensive trip (fail-open)**: if the script exits non-zero or the rendered report exceeds the `THRESHOLD bytes=<N>` ceiling at runtime, skip the bucket table and emit the one-line pointer `cheap layer disabled — invoke /wr-retrospective:analyze-context for context measurement`. Log the trip in Step 2b's Pipeline Instability section so the regression is captured as a ticket candidate per the existing flow.
|
|
248
|
+
|
|
249
|
+
7. **AFK behaviour (ADR-013 Rule 6)**: identical to interactive mode. The cheap layer is silent (no `AskUserQuestion`); the bucket table + the auto-fire / cadence-trigger line ride the retro summary. The deep layer (when auto-fired per step 4) is also silent and produces a committed report — never blocks. AFK orchestrators read the summary on iteration close.
|
|
239
250
|
|
|
240
251
|
**Interaction with other surfaces:**
|
|
241
252
|
|
|
242
253
|
- **`P099` Tier 3 advisory** (`check-briefing-budgets.sh`) — measures **per-topic-file** budget on `docs/briefing/<topic>.md`. The cheap layer aggregates this into a single `briefing` bucket row; the per-file detail is drillable via P099's existing surface. No double-counting.
|
|
243
254
|
- **`P105` signal-vs-noise pass** (Step 1.5 of this skill) — measures **per-entry** signal scores on briefing entries. The cheap layer's `briefing` bucket is upstream of the per-entry signal scores; deep layer cites both as evidence sources.
|
|
244
|
-
- **Step 4 / 4b — codification flow**:
|
|
245
|
-
- **`/wr-retrospective:analyze-context` (deep layer)** —
|
|
255
|
+
- **Step 4 / 4b — codification flow**: the deep layer (`/wr-retrospective:analyze-context`) produces a `docs/retros/<date>-context-analysis.md` report with per-turn attribution and suggestion generation. Per the Amendment 2026-06-08 settlement (P295), the cheap layer **auto-routes** to the deep layer when the combined trigger holds (see step 4 above) — formerly "never auto-routes" prose is superseded.
|
|
256
|
+
- **`/wr-retrospective:analyze-context` (deep layer)** — auto-fires from this step (Step 2c step 4 above) when the combined trigger holds (calendar-elapse >14 days OR delta >20% any bucket, once-per-day guard) per ADR-043 Amendment 2026-06-08. Also remains user-invokable on demand. Deep-layer report writes the HTML-comment-trailer snapshot that subsequent runs of this step read.
|
|
246
257
|
- **ADR-032 supersession note** (was: ADR-027 compatibility note): no Step-0 subagent migration applies — under ADR-032's foreground-synchronous pattern the script invocation runs in main-agent context as written. The migration shape this note previously discussed is obviated by the supersession.
|
|
247
258
|
|
|
248
259
|
### 2d. Ask Hygiene Pass (P135 Phase 5 / ADR-044)
|
|
@@ -96,3 +96,61 @@ setup() {
|
|
|
96
96
|
[ "$step_2b_line" -lt "$step_2c_line" ]
|
|
97
97
|
[ "$step_2c_line" -lt "$step_3_line" ]
|
|
98
98
|
}
|
|
99
|
+
|
|
100
|
+
# --- P295 (ADR-043 Amendment 2026-06-08) — auto-invoke wiring ---
|
|
101
|
+
|
|
102
|
+
@test "run-retro: Step 2c cites ADR-043 Amendment 2026-06-08 and P295 settlement" {
|
|
103
|
+
run grep -F 'Amendment 2026-06-08' "$SKILL_MD"
|
|
104
|
+
[ "$status" -eq 0 ]
|
|
105
|
+
run grep -F 'P295' "$SKILL_MD"
|
|
106
|
+
[ "$status" -eq 0 ]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
@test "run-retro: Step 2c declares the combined whichever-comes-first trigger (calendar + delta)" {
|
|
110
|
+
run grep -F 'combined whichever-comes-first trigger' "$SKILL_MD"
|
|
111
|
+
[ "$status" -eq 0 ]
|
|
112
|
+
run grep -F 'Calendar-elapse' "$SKILL_MD"
|
|
113
|
+
[ "$status" -eq 0 ]
|
|
114
|
+
run grep -F 'Delta-breach' "$SKILL_MD"
|
|
115
|
+
[ "$status" -eq 0 ]
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
@test "run-retro: Step 2c declares the 14-day calendar threshold and 20% delta threshold" {
|
|
119
|
+
run grep -F 'older than 14 days' "$SKILL_MD"
|
|
120
|
+
[ "$status" -eq 0 ]
|
|
121
|
+
run grep -F 'more than 20%' "$SKILL_MD"
|
|
122
|
+
[ "$status" -eq 0 ]
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
@test "run-retro: Step 2c declares the once-per-day guard via TODAY snapshot file presence" {
|
|
126
|
+
run grep -F 'Once-per-day guard' "$SKILL_MD"
|
|
127
|
+
[ "$status" -eq 0 ]
|
|
128
|
+
run grep -F '<TODAY>-context-analysis.md' "$SKILL_MD"
|
|
129
|
+
[ "$status" -eq 0 ]
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
@test "run-retro: Step 2c auto-invokes /wr-retrospective:analyze-context via the Skill tool" {
|
|
133
|
+
run grep -F 'invoke `/wr-retrospective:analyze-context` via the Skill tool' "$SKILL_MD"
|
|
134
|
+
[ "$status" -eq 0 ]
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
@test "run-retro: Step 2c declares identical interactive + AFK auto-fire behaviour (silent deep layer)" {
|
|
138
|
+
run grep -F 'Identical behaviour in interactive and AFK modes' "$SKILL_MD"
|
|
139
|
+
[ "$status" -eq 0 ]
|
|
140
|
+
run grep -F 'never invokes `AskUserQuestion`' "$SKILL_MD"
|
|
141
|
+
[ "$status" -eq 0 ]
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@test "run-retro: Step 2c no longer carries the superseded 'never auto-routes' / 'Never auto-fires' prose (P295 supersession)" {
|
|
145
|
+
# The 'never auto-routes' / 'Never auto-fires from this step' clauses were
|
|
146
|
+
# the on-demand-only design now reversed by ADR-043 Amendment 2026-06-08.
|
|
147
|
+
# The Step 2c block MUST NOT carry both the new auto-fire wiring AND the
|
|
148
|
+
# contradicting "never" prose — or future agents read it as still-canonical
|
|
149
|
+
# and revert the wiring. Bound the regex to the Step 2c block by checking
|
|
150
|
+
# the original on-demand-only sentinel phrases are absent from the whole
|
|
151
|
+
# SKILL.md (they appeared nowhere else).
|
|
152
|
+
run grep -F 'The cheap layer never auto-routes.' "$SKILL_MD"
|
|
153
|
+
[ "$status" -ne 0 ]
|
|
154
|
+
run grep -F 'Never auto-fires from this step.' "$SKILL_MD"
|
|
155
|
+
[ "$status" -ne 0 ]
|
|
156
|
+
}
|