anvil-dev-framework 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +71 -22
  2. package/VERSION +1 -1
  3. package/docs/ANV-263-hook-logging-investigation.md +116 -0
  4. package/docs/command-reference.md +398 -17
  5. package/docs/session-workflow.md +62 -9
  6. package/docs/system-architecture.md +584 -0
  7. package/global/api/__pycache__/ralph_api.cpython-314.pyc +0 -0
  8. package/global/api/openapi.yaml +357 -0
  9. package/global/api/ralph_api.py +528 -0
  10. package/global/commands/anvil-settings.md +47 -19
  11. package/global/commands/audit.md +163 -0
  12. package/global/commands/checklist.md +180 -0
  13. package/global/commands/coderabbit-fix.md +282 -0
  14. package/global/commands/efficiency.md +356 -0
  15. package/global/commands/evidence.md +117 -33
  16. package/global/commands/hud.md +24 -0
  17. package/global/commands/insights.md +101 -3
  18. package/global/commands/orient.md +22 -21
  19. package/global/commands/patterns.md +115 -0
  20. package/global/commands/ralph.md +47 -1
  21. package/global/commands/token-budget.md +214 -0
  22. package/global/commands/weekly-review.md +21 -1
  23. package/global/config/notifications.yaml.template +50 -0
  24. package/global/hooks/ralph_stop.sh +33 -1
  25. package/global/hooks/statusline.sh +67 -2
  26. package/global/lib/__pycache__/coderabbit_metrics.cpython-314.pyc +0 -0
  27. package/global/lib/__pycache__/command_tracker.cpython-314.pyc +0 -0
  28. package/global/lib/__pycache__/context_optimizer.cpython-314.pyc +0 -0
  29. package/global/lib/__pycache__/git_utils.cpython-314.pyc +0 -0
  30. package/global/lib/__pycache__/issue_models.cpython-314.pyc +0 -0
  31. package/global/lib/__pycache__/linear_provider.cpython-314.pyc +0 -0
  32. package/global/lib/__pycache__/optimization_applier.cpython-314.pyc +0 -0
  33. package/global/lib/__pycache__/ralph_state.cpython-314.pyc +0 -0
  34. package/global/lib/__pycache__/ralph_webhooks.cpython-314.pyc +0 -0
  35. package/global/lib/__pycache__/state_manager.cpython-314.pyc +0 -0
  36. package/global/lib/__pycache__/token_analyzer.cpython-314.pyc +0 -0
  37. package/global/lib/__pycache__/token_metrics.cpython-314.pyc +0 -0
  38. package/global/lib/coderabbit_metrics.py +647 -0
  39. package/global/lib/command_tracker.py +147 -0
  40. package/global/lib/context_optimizer.py +323 -0
  41. package/global/lib/linear_provider.py +210 -16
  42. package/global/lib/log_rotation.py +287 -0
  43. package/global/lib/optimization_applier.py +582 -0
  44. package/global/lib/ralph_events.py +398 -0
  45. package/global/lib/ralph_notifier.py +366 -0
  46. package/global/lib/ralph_state.py +264 -24
  47. package/global/lib/ralph_webhooks.py +470 -0
  48. package/global/lib/state_manager.py +121 -0
  49. package/global/lib/token_analyzer.py +1383 -0
  50. package/global/lib/token_metrics.py +919 -0
  51. package/global/tests/__pycache__/test_command_tracker.cpython-314-pytest-9.0.2.pyc +0 -0
  52. package/global/tests/__pycache__/test_context_optimizer.cpython-314-pytest-9.0.2.pyc +0 -0
  53. package/global/tests/__pycache__/test_doc_coverage.cpython-314-pytest-9.0.2.pyc +0 -0
  54. package/global/tests/__pycache__/test_git_utils.cpython-314-pytest-9.0.2.pyc +0 -0
  55. package/global/tests/__pycache__/test_issue_models.cpython-314-pytest-9.0.2.pyc +0 -0
  56. package/global/tests/__pycache__/test_linear_filtering.cpython-314-pytest-9.0.2.pyc +0 -0
  57. package/global/tests/__pycache__/test_linear_provider.cpython-314-pytest-9.0.2.pyc +0 -0
  58. package/global/tests/__pycache__/test_local_provider.cpython-314-pytest-9.0.2.pyc +0 -0
  59. package/global/tests/__pycache__/test_optimization_applier.cpython-314-pytest-9.0.2.pyc +0 -0
  60. package/global/tests/__pycache__/test_token_analyzer.cpython-314-pytest-9.0.2.pyc +0 -0
  61. package/global/tests/__pycache__/test_token_analyzer_phase6.cpython-314-pytest-9.0.2.pyc +0 -0
  62. package/global/tests/__pycache__/test_token_metrics.cpython-314-pytest-9.0.2.pyc +0 -0
  63. package/global/tests/test_command_tracker.py +172 -0
  64. package/global/tests/test_context_optimizer.py +321 -0
  65. package/global/tests/test_linear_filtering.py +319 -0
  66. package/global/tests/test_linear_provider.py +40 -1
  67. package/global/tests/test_optimization_applier.py +508 -0
  68. package/global/tests/test_token_analyzer.py +735 -0
  69. package/global/tests/test_token_analyzer_phase6.py +537 -0
  70. package/global/tests/test_token_metrics.py +829 -0
  71. package/global/tools/README.md +153 -0
  72. package/global/tools/__pycache__/anvil-hud.cpython-314.pyc +0 -0
  73. package/global/tools/__pycache__/orient_linear.cpython-314.pyc +0 -0
  74. package/global/tools/__pycache__/ralph-watchcpython-314.pyc +0 -0
  75. package/global/tools/anvil-hud.py +86 -1
  76. package/global/tools/anvil-memory/src/__tests__/ccs/context-monitor.test.ts +472 -0
  77. package/global/tools/anvil-memory/src/__tests__/ccs/fixtures.ts +405 -0
  78. package/global/tools/anvil-memory/src/__tests__/ccs/index.ts +36 -0
  79. package/global/tools/anvil-memory/src/__tests__/ccs/prompt-generator.test.ts +653 -0
  80. package/global/tools/anvil-memory/src/__tests__/ccs/ralph-stop.test.ts +727 -0
  81. package/global/tools/anvil-memory/src/__tests__/ccs/test-utils.ts +340 -0
  82. package/global/tools/anvil-memory/src/__tests__/commands.test.ts +218 -0
  83. package/global/tools/anvil-memory/src/commands/context.ts +322 -0
  84. package/global/tools/anvil-memory/src/db.ts +108 -0
  85. package/global/tools/anvil-memory/src/index.ts +2 -8
  86. package/global/tools/orient_linear.py +159 -0
  87. package/global/tools/ralph-watch +423 -0
  88. package/package.json +2 -1
  89. package/project/.anvil-project.yaml.template +93 -0
  90. package/project/CLAUDE.md.template +343 -0
  91. package/project/agents/README.md +119 -0
  92. package/project/agents/cross-layer-debugger.md +217 -0
  93. package/project/agents/security-code-reviewer.md +162 -0
  94. package/project/constitution.md.template +235 -0
  95. package/project/coordination.md +103 -0
  96. package/project/docs/background-tasks.md +258 -0
  97. package/project/docs/skills-frontmatter.md +243 -0
  98. package/project/examples/README.md +106 -0
  99. package/project/examples/api-route-template.ts +171 -0
  100. package/project/examples/component-template.tsx +110 -0
  101. package/project/examples/hook-template.ts +152 -0
  102. package/project/examples/service-template.ts +207 -0
  103. package/project/examples/test-template.test.tsx +249 -0
  104. package/project/hooks/README.md +491 -0
  105. package/project/hooks/__pycache__/notification.cpython-314.pyc +0 -0
  106. package/project/hooks/__pycache__/post_tool_use.cpython-314.pyc +0 -0
  107. package/project/hooks/__pycache__/pre_tool_use.cpython-314.pyc +0 -0
  108. package/project/hooks/__pycache__/session_start.cpython-314.pyc +0 -0
  109. package/project/hooks/__pycache__/stop.cpython-314.pyc +0 -0
  110. package/project/hooks/notification.py +183 -0
  111. package/project/hooks/permission_request.py +438 -0
  112. package/project/hooks/post_tool_use.py +397 -0
  113. package/project/hooks/pre_compact.py +126 -0
  114. package/project/hooks/pre_tool_use.py +454 -0
  115. package/project/hooks/session_start.py +656 -0
  116. package/project/hooks/stop.py +356 -0
  117. package/project/hooks/subagent_start.py +223 -0
  118. package/project/hooks/subagent_stop.py +215 -0
  119. package/project/hooks/user_prompt_submit.py +110 -0
  120. package/project/hooks/utils/llm/anth.py +114 -0
  121. package/project/hooks/utils/llm/oai.py +114 -0
  122. package/project/hooks/utils/tts/elevenlabs_tts.py +63 -0
  123. package/project/hooks/utils/tts/mlx_audio_tts.py +86 -0
  124. package/project/hooks/utils/tts/openai_tts.py +92 -0
  125. package/project/hooks/utils/tts/pyttsx3_tts.py +75 -0
  126. package/project/linear.yaml.template +23 -0
  127. package/project/product.md.template +238 -0
  128. package/project/retros/README.md +126 -0
  129. package/project/rules/README.md +90 -0
  130. package/project/rules/debugging.md +139 -0
  131. package/project/rules/security-review.md +115 -0
  132. package/project/settings.yaml.template +185 -0
  133. package/project/specs/SPEC-ANV-72-hud-kanban.md +525 -0
  134. package/project/templates/api-python/CLAUDE.md +547 -0
  135. package/project/templates/generic/CLAUDE.md +260 -0
  136. package/project/templates/saas/CLAUDE.md +478 -0
  137. package/project/tests/README.md +140 -0
  138. package/project/tests/__pycache__/test_transcript_parser.cpython-314-pytest-9.0.2.pyc +0 -0
  139. package/project/tests/fixtures/sample-transcript.jsonl +21 -0
  140. package/project/tests/test-hooks.sh +259 -0
  141. package/project/tests/test-lib.sh +248 -0
  142. package/project/tests/test-statusline.sh +165 -0
  143. package/project/tests/test_transcript_parser.py +323 -0
@@ -0,0 +1,356 @@
1
+ # /efficiency - Historical Token Efficiency Analysis
2
+
3
+ > Analyze token consumption patterns over time and identify optimization opportunities.
4
+
5
+ ## When to Use
6
+ - Weekly/monthly efficiency reviews
7
+ - Identify consistently low-efficiency components
8
+ - Track optimization impact over time
9
+ - Plan CLAUDE.md and hook optimization
10
+
11
+ ## Variants
12
+
13
+ | Command | Description |
14
+ |---------|-------------|
15
+ | `/efficiency` | Weekly report (default, last 7 days) |
16
+ | `/efficiency --weekly` | Explicit weekly report |
17
+ | `/efficiency --monthly` | Monthly report (last 30 days) |
18
+ | `/efficiency --recommendations` | Show only recommendations |
19
+ | `/efficiency --apply [ID]` | Apply a specific recommendation |
20
+ | `/efficiency --apply-all` | Apply all low-risk recommendations |
21
+ | `/efficiency --rollback [ID]` | Rollback a previous optimization |
22
+ | `/efficiency --impact` | Show impact of applied optimizations |
23
+
24
+ ## Execution Steps
25
+
26
+ ### Step 1: Load Token Analyzer
27
+
28
+ ```python
29
+ import sys
30
+ sys.path.insert(0, 'global/lib')
31
+ from token_analyzer import get_analyzer
32
+
33
+ analyzer = get_analyzer()
34
+ ```
35
+
36
+ ### Step 2: Generate Report
37
+
38
+ ```python
39
+ # Weekly report (default)
40
+ report = analyzer.generate_efficiency_report(period_days=7)
41
+
42
+ # Monthly report
43
+ report = analyzer.generate_efficiency_report(period_days=30)
44
+ ```
45
+
46
+ ### Step 3: Format and Output
47
+
48
+ ```python
49
+ formatted = analyzer.format_efficiency_report(report)
50
+ print(formatted)
51
+ ```
52
+
53
+ ### Step 4: Output Report
54
+
55
+ Weekly report format:
56
+
57
+ ```markdown
58
+ ## Weekly Efficiency Report
59
+
60
+ **Period**: Last 7 days
61
+ **Generated**: 2026-01-15 14:30
62
+ **Overall Efficiency**: 72/100
63
+
64
+ ### Summary
65
+
66
+ - **Sessions Analyzed**: 42
67
+ - **Total Tokens**: 1,250,000
68
+ - **Avg per Session**: 29,762
69
+
70
+ ### Component Efficiency Scores
71
+
72
+ | Component | Type | Score | Utilization | Trend |
73
+ |-----------|------|-------|-------------|-------|
74
+ | patterns | command | 35 | 15% | ↓ |
75
+ | checklist | command | 42 | 22% | → |
76
+ | orient | command | 85 | 92% | ↑ |
77
+ | CLAUDE.md | system | 78 | 100% | → |
78
+ | ready | command | 91 | 88% | ↑ |
79
+
80
+ ### Top Recommendations
81
+
82
+ - 🔴 **Defer loading patterns**: Used only 15% of the time, avg 1,200 tokens
83
+ - Potential savings: ~1,020 tokens
84
+ - 🔴 **Defer loading checklist**: Used only 22% of the time, avg 800 tokens
85
+ - Potential savings: ~624 tokens
86
+ - 🟡 **Optimize large-context**: Averaging 3,500 tokens per load
87
+ - Potential savings: ~1,050 tokens
88
+ ```
89
+
90
+ ## Applying Optimizations
91
+
92
+ ### Step 1: Load Services
93
+
94
+ ```python
95
+ import sys
96
+ sys.path.insert(0, 'global/lib')
97
+ from token_analyzer import get_analyzer
98
+ from optimization_applier import OptimizationApplier
99
+
100
+ analyzer = get_analyzer()
101
+ applier = OptimizationApplier(auto_commit=False) # Set True for auto-commit
102
+ ```
103
+
104
+ ### Step 2: Generate and Review Suggestions
105
+
106
+ ```python
107
+ # Analyze usage patterns
108
+ usage = analyzer.analyze_usage_patterns(days=30)
109
+
110
+ # Generate suggestions
111
+ suggestions = analyzer.generate_optimization_suggestions(usage)
112
+
113
+ # Format for review
114
+ report = analyzer.format_suggestions_report(suggestions)
115
+ print(report)
116
+ ```
117
+
118
+ ### Step 3: Apply a Specific Recommendation
119
+
120
+ ```python
121
+ # Find the suggestion by ID
122
+ suggestion = next(s for s in suggestions if s['id'] == target_id)
123
+
124
+ # Apply with backup
125
+ result = applier.apply_recommendation(
126
+ recommendation_id=suggestion['id'],
127
+ recommendation_type=suggestion['type'],
128
+ description=suggestion['title'],
129
+ target_files=suggestion['target_files'],
130
+ changes=suggestion['changes'],
131
+ estimated_savings=suggestion['estimated_savings']
132
+ )
133
+
134
+ if result.success:
135
+ print(f"✅ Applied optimization #{result.optimization_id}")
136
+ print(f" Files modified: {', '.join(result.files_modified)}")
137
+ print(f" Tokens saved: {result.savings:,}")
138
+ print(f" Backup at: {result.backup_paths[0]}")
139
+ else:
140
+ print(f"❌ Failed: {result.error_message}")
141
+ ```
142
+
143
+ ### Step 4: Apply All Low-Risk Recommendations
144
+
145
+ ```python
146
+ # Filter to low-risk only
147
+ low_risk = [s for s in suggestions if s['risk_level'] == 'low']
148
+
149
+ applied = []
150
+ for suggestion in low_risk:
151
+ result = applier.apply_recommendation(
152
+ recommendation_id=suggestion['id'],
153
+ recommendation_type=suggestion['type'],
154
+ description=suggestion['title'],
155
+ target_files=suggestion['target_files'],
156
+ changes=suggestion['changes'],
157
+ estimated_savings=suggestion['estimated_savings']
158
+ )
159
+ if result.success:
160
+ applied.append(result)
161
+
162
+ print(f"Applied {len(applied)} optimizations")
163
+ print(f"Total savings: {sum(r.savings for r in applied):,} tokens")
164
+ ```
165
+
166
+ ### Step 5: View Impact Report
167
+
168
+ ```python
169
+ # Get impact summary
170
+ impact = applier.get_total_savings()
171
+ print(f"Total tokens saved: {impact['total_tokens_saved']:,}")
172
+ print(f"Optimizations applied: {impact['optimizations_count']}")
173
+ print(f"Rollbacks: {impact['reverted_count']}")
174
+
175
+ # Detailed report
176
+ print(applier.generate_impact_report())
177
+ ```
178
+
179
+ ### Step 6: Rollback if Needed
180
+
181
+ ```python
182
+ # Rollback a specific optimization
183
+ success = applier.rollback_optimization(optimization_id=123)
184
+ if success:
185
+ print("✅ Rollback successful")
186
+ else:
187
+ print("❌ Rollback failed")
188
+ ```
189
+
190
+ ### Output Format: Apply Result
191
+
192
+ ```markdown
193
+ ## Optimization Applied
194
+
195
+ **ID**: OPT-001
196
+ **Type**: defer_loading
197
+ **Description**: Defer loading of patterns command
198
+
199
+ ### Before/After
200
+
201
+ | Metric | Before | After | Change |
202
+ |--------|--------|-------|--------|
203
+ | CLAUDE.md tokens | 3,500 | 2,200 | -1,300 |
204
+ | Initial context | 8,200 | 6,900 | -1,300 |
205
+
206
+ ### Files Modified
207
+ - `.claude/CLAUDE.md` — Removed patterns section
208
+ - `global/commands/patterns.md` — Content moved here
209
+
210
+ ### Backup Location
211
+ `.claude/backups/optimizations/CLAUDE.md.20260116_103000.bak`
212
+
213
+ ### Verify
214
+ Run `/audit` to confirm token reduction in next session.
215
+
216
+ ### Rollback
217
+ If issues occur: `/efficiency --rollback OPT-001`
218
+ ```
219
+
220
+ ## Efficiency Score Calculation
221
+
222
+ Component efficiency score (0-100) is based on:
223
+
224
+ | Factor | Points | Criteria |
225
+ |--------|--------|----------|
226
+ | Utilization | 0-50 | % of loads where component was used |
227
+ | Token Cost | 0-30 | Lower avg tokens = higher score |
228
+ | Consistency | 0-20 | Frequent use with high utilization |
229
+
230
+ | Score Range | Interpretation |
231
+ |-------------|----------------|
232
+ | 90-100 | Excellent—keep as is |
233
+ | 70-89 | Good—minor optimization possible |
234
+ | 50-69 | Fair—consider optimization |
235
+ | <50 | Poor—candidate for removal/deferral |
236
+
237
+ ## Trend Indicators
238
+
239
+ | Icon | Meaning |
240
+ |------|---------|
241
+ | ↑ | Improving (utilization increasing) |
242
+ | → | Stable (no significant change) |
243
+ | ↓ | Degrading (utilization decreasing) |
244
+ | ★ | New (no previous data) |
245
+
246
+ ## Key Behaviors
247
+ - Reports compare to previous period when possible
248
+ - Components sorted by efficiency score (lowest first)
249
+ - Recommendations focus on highest-impact improvements
250
+ - Historical data preserved for 90 days
251
+
252
+ ## Recommendations Categories
253
+
254
+ | Category | Priority | Action |
255
+ |----------|----------|--------|
256
+ | defer | High (1-2) | Move to on-demand loading |
257
+ | optimize | Medium (2) | Reduce size or split |
258
+ | review | Low (3) | Evaluate if still needed |
259
+
260
+ ## Anti-Patterns to Avoid
261
+ - ❌ Running without sufficient historical data (<7 days)
262
+ - ❌ Ignoring degrading trends
263
+ - ❌ Optimizing high-utilization components
264
+ - ❌ Applying high-risk optimizations without review
265
+ - ❌ Applying multiple optimizations without testing between
266
+ - ❌ Skipping backup verification before rollback
267
+
268
+ ## Integration Points
269
+ - **Requires**: Phase 1 instrumentation active
270
+ - **Uses**: `global/lib/token_analyzer.py`, `global/lib/optimization_applier.py`
271
+ - **Data source**: `~/.anvil/token_metrics.db`
272
+ - **Backups**: `.claude/backups/optimizations/`
273
+ - **Related commands**: `/audit` (real-time), `/token-budget` (proactive)
274
+
275
+ ## Recommendations Workflow
276
+
277
+ After running `/efficiency`:
278
+
279
+ 1. Review low-score components (score < 50)
280
+ 2. Check trends for degrading patterns
281
+ 3. Apply recommendations:
282
+ - **defer**: Move to on-demand command
283
+ - **optimize**: Reduce component size
284
+ - **review**: Consider removal
285
+ 4. Track impact in next week's report
286
+
287
+ ## Example: Acting on Recommendations
288
+
289
+ If `/efficiency` recommends deferring `patterns`:
290
+
291
+ ### Manual Approach
292
+ 1. Move detailed patterns from CLAUDE.md to `/patterns` command
293
+ 2. Keep only trigger keywords in CLAUDE.md
294
+ 3. Run `/audit` to verify reduction
295
+ 4. Check next `/efficiency` for improved score
296
+
297
+ ### Automated Approach (--apply)
298
+ 1. Run `/efficiency --recommendations` to see suggestions with IDs
299
+ 2. Review the suggestion: "Defer loading of patterns (REC-001)"
300
+ 3. Apply: `/efficiency --apply REC-001`
301
+ 4. Review before/after comparison
302
+ 5. Run `/audit` to verify
303
+ 6. If issues: `/efficiency --rollback OPT-001`
304
+
305
+ ## Self-Improvement Loop
306
+
307
+ The `/efficiency --apply` system enables a continuous self-improvement loop:
308
+
309
+ ```
310
+ ┌─────────────────────────────────────────────────────┐
311
+ │ Weekly Cycle │
312
+ ├─────────────────────────────────────────────────────┤
313
+ │ 1. /efficiency → Generate report │
314
+ │ 2. Review recommendations → Prioritize by risk │
315
+ │ 3. /efficiency --apply ID → Apply low-risk first │
316
+ │ 4. /audit → Verify improvements │
317
+ │ 5. Monitor next week → Track trend changes │
318
+ │ 6. /efficiency --rollback → Revert if needed │
319
+ └─────────────────────────────────────────────────────┘
320
+ ```
321
+
322
+ ### Risk Assessment
323
+
324
+ | Risk Level | Auto-Apply | Review Required | Example |
325
+ |------------|------------|-----------------|---------|
326
+ | Low | ✅ Safe | Optional | Remove unused command |
327
+ | Medium | ⚠️ Caution | Recommended | Defer loading |
328
+ | High | ❌ Never | Required | Modify CLAUDE.md core |
329
+
330
+ ### Tracking Impact Over Time
331
+
332
+ After applying optimizations, track their cumulative impact:
333
+
334
+ ```bash
335
+ /efficiency --impact
336
+ ```
337
+
338
+ Output:
339
+ ```markdown
340
+ ## Optimization Impact Report
341
+
342
+ **Active Optimizations**: 5
343
+ **Reverted Optimizations**: 1
344
+ **Total Tokens Saved**: 4,250
345
+
346
+ ### Active Optimizations
347
+ | ID | Type | Description | Tokens Saved | Applied |
348
+ |-----|------|-------------|--------------|---------|
349
+ | 1 | defer_loading | Defer patterns... | 1,200 | 2026-01-10 |
350
+ | 2 | defer_loading | Defer checklist... | 850 | 2026-01-10 |
351
+ | 3 | reduce_context | Optimize CLAUDE.md... | 1,800 | 2026-01-12 |
352
+ | 4 | prune_rarely_used | Remove unused... | 400 | 2026-01-15 |
353
+
354
+ ### Reverted Optimizations
355
+ - [5] Remove debug command (reverted 2026-01-14) — caused issues
356
+ ```
@@ -107,46 +107,128 @@ Skip for: internal refactors, test-only changes, documentation updates.
107
107
 
108
108
  ---
109
109
 
110
- ### Step 5: Code Review (Optional)
110
+ ### Step 5: Code Review
111
111
 
112
- If code review is enabled in `.claude/anvil.config.json`:
112
+ Code review is integrated into the evidence workflow when enabled in `.claude/anvil.config.json`.
113
113
 
114
- 1. Check configuration:
115
- ```bash
116
- # Read config if exists
117
- if [ -f ".claude/anvil.config.json" ]; then
118
- cat .claude/anvil.config.json | grep -A5 '"codeReview"'
119
- fi
120
- ```
114
+ #### 5.1: Check Configuration
121
115
 
122
- 2. Based on `codeReview.enforcement` setting:
116
+ ```bash
117
+ # Read config if exists
118
+ if [ -f ".claude/anvil.config.json" ]; then
119
+ REVIEW_ENABLED=$(cat .claude/anvil.config.json | python3 -c "import json,sys; c=json.load(sys.stdin); print(c.get('codeReview',{}).get('enabled',False))")
120
+ REVIEW_ENFORCEMENT=$(cat .claude/anvil.config.json | python3 -c "import json,sys; c=json.load(sys.stdin); print(c.get('codeReview',{}).get('enforcement','soft'))")
121
+ PRE_PR=$(cat .claude/anvil.config.json | python3 -c "import json,sys; c=json.load(sys.stdin); print(c.get('codeReview',{}).get('prePR',True))")
122
+ REVIEW_CMD=$(cat .claude/anvil.config.json | python3 -c "import json,sys; c=json.load(sys.stdin); print(c.get('codeReview',{}).get('command','coderabbit review --plain'))")
123
+ RETRY_ON_FIX=$(cat .claude/anvil.config.json | python3 -c "import json,sys; c=json.load(sys.stdin); print(c.get('codeReview',{}).get('retryOnFix',True))")
124
+ fi
125
+ ```
123
126
 
124
- | Enforcement | Behavior |
125
- |-------------|----------|
126
- | `hard` | Run review automatically. Block PR if critical issues found. |
127
- | `soft` | Prompt: "Run code review? (recommended)" Proceed either way. |
128
- | `manual` | Skip automatic prompt. User triggers when wanted. |
127
+ #### 5.2: Enforcement Behavior
129
128
 
130
- 3. If enabled, run configured tool:
131
- ```bash
132
- # Default command (configurable)
133
- coderabbit --prompt-only
134
- ```
129
+ | Enforcement | Behavior |
130
+ |-------------|----------|
131
+ | `hard` | Run review automatically. **Block PR creation** if critical issues found. Must address before proceeding. |
132
+ | `soft` | Run review automatically. Show warning if issues found, but allow user to proceed with acknowledgment. |
135
133
 
136
- 4. Include results in evidence:
137
- ```markdown
138
- ### Code Review
139
- **Tool**: CodeRabbit
140
- **Status**: ✅ No critical issues / ⚠️ X issues found
134
+ #### 5.3: Check CodeRabbit Availability
141
135
 
142
- [Summary of findings if any]
143
- ```
136
+ ```bash
137
+ # Verify CodeRabbit CLI is available
138
+ if ! command -v coderabbit &> /dev/null; then
139
+ echo "CodeRabbit CLI not found. Install: npm install -g coderabbit"
140
+ # Graceful fallback - warn but don't block
141
+ fi
142
+ ```
144
143
 
145
- 5. If code review not configured:
146
- ```markdown
147
- ### Code Review
148
- Not configured. Enable with `/anvil-settings codeReview on`
149
- ```
144
+ #### 5.4: Run Code Review (if enabled and prePR is true)
145
+
146
+ ```bash
147
+ # Run the configured code review command (default: coderabbit review --plain)
148
+ eval "$REVIEW_CMD" 2>&1 | tee coderabbit-output.txt
149
+
150
+ # Parse results
151
+ ISSUES_COUNT=$(grep -c "issue\|warning\|error" coderabbit-output.txt || echo "0")
152
+ CRITICAL_COUNT=$(grep -c "critical\|security" coderabbit-output.txt || echo "0")
153
+ ```
154
+
155
+ #### 5.4.1: Retry After Fix (if retryOnFix is enabled)
156
+
157
+ When `codeReview.retryOnFix` is enabled (default: true), the review process supports automatic re-validation:
158
+
159
+ 1. **Initial Review**: Run code review and capture issues
160
+ 2. **User Applies Fixes**: User or agent addresses issues (e.g., via `/coderabbit-fix`)
161
+ 3. **Re-run Review**: Automatically re-run code review to verify fixes
162
+ 4. **Repeat**: If new issues found, repeat until clean or user skips
163
+
164
+ This ensures all issues are addressed before PR creation, especially for incremental fix workflows.
165
+
166
+ #### 5.5: Handle Results Based on Enforcement
167
+
168
+ **Hard Enforcement (blocks on critical issues):**
169
+ ```markdown
170
+ ### Code Review
171
+ **Tool**: CodeRabbit
172
+ **Enforcement**: Hard
173
+ **Status**: 2 critical issues found
174
+
175
+ **Critical Issues:**
176
+ 1. [Issue description from CodeRabbit]
177
+ 2. [Issue description from CodeRabbit]
178
+
179
+ **Action Required**: Must address critical issues before PR creation.
180
+ Run `/coderabbit-fix` to apply suggested fixes automatically.
181
+ ```
182
+
183
+ **Soft Enforcement (warns but allows proceed):**
184
+ ```markdown
185
+ ### Code Review
186
+ **Tool**: CodeRabbit
187
+ **Enforcement**: Soft
188
+ **Status**: 3 issues found (0 critical)
189
+
190
+ **Issues:**
191
+ 1. [Issue description]
192
+ 2. [Issue description]
193
+ 3. [Issue description]
194
+
195
+ **Note**: Issues found but enforcement is soft. You may proceed with PR creation.
196
+ Consider running `/coderabbit-fix` to address these issues.
197
+ ```
198
+
199
+ **Clean Review:**
200
+ ```markdown
201
+ ### Code Review
202
+ **Tool**: CodeRabbit
203
+ **Status**: No issues found
204
+
205
+ Code review passed with no issues.
206
+ ```
207
+
208
+ #### 5.6: Graceful Fallback
209
+
210
+ If CodeRabbit is unavailable:
211
+ ```markdown
212
+ ### Code Review
213
+ **Tool**: CodeRabbit
214
+ **Status**: CodeRabbit unavailable
215
+
216
+ CodeRabbit CLI not found or failed to run. Code review skipped.
217
+ - Install: `npm install -g coderabbit`
218
+ - Or disable: `/anvil-settings codeReview off`
219
+
220
+ Proceeding without code review.
221
+ ```
222
+
223
+ #### 5.7: Code Review Disabled
224
+
225
+ If code review is explicitly disabled:
226
+ ```markdown
227
+ ### Code Review
228
+ Disabled in configuration. Enable with `/anvil-settings codeReview on`
229
+ ```
230
+
231
+ **Note**: Code review is enabled by default in Anvil v1.4+. If you see this message, code review was explicitly disabled via `/anvil-settings codeReview off`.
150
232
 
151
233
  ---
152
234
 
@@ -245,10 +327,12 @@ Closes [Issue key]
245
327
  | Manual Test | ✅ Works | Description or screenshot |
246
328
  | Documentation | ⚪ Soft prompt | Status noted |
247
329
  | Changelog | ⚪ Soft prompt | Entry added or justified skip |
248
- | Code Review | If configured | Review results (when enabled) |
330
+ | Code Review | Enabled by default | Review results (address issues before PR) |
249
331
 
250
332
  **Legend**: ✅ = Required | ⚪ = Soft prompt (use judgment)
251
333
 
334
+ **Note**: Code review is enabled by default in Anvil v1.4+. Enforcement level (`soft` or `hard`) determines whether issues block PR creation.
335
+
252
336
  ## Failure Handling
253
337
 
254
338
  If any gate fails:
@@ -51,11 +51,35 @@ uv run global/tools/anvil-hud.py --demo
51
51
  | ⚠ | Context warning (>70%) |
52
52
  | 🔴 | Context critical (>85%) |
53
53
 
54
+ ### Quality Panel
55
+
56
+ The Quality panel (Tab 3) displays quality gate status for each agent's project:
57
+ - **Tests**: Pass/fail status with count
58
+ - **Lint**: Error and warning counts
59
+ - **Types**: TypeScript error count
60
+ - **CI**: GitHub Actions status
61
+ - **CR**: CodeRabbit review status (issues/suggestions)
62
+
63
+ #### CodeRabbit Weekly Metrics
64
+
65
+ The Quality panel also shows aggregated CodeRabbit metrics from the past week:
66
+
67
+ | Metric | Description | Healthy | Warning | Critical |
68
+ |--------|-------------|---------|---------|----------|
69
+ | Reviews | Total reviews this week | — | — | — |
70
+ | Issues | Found vs fixed | — | — | — |
71
+ | Avg/Review | Average issues per review | <2 | 2-5 | >5 |
72
+ | Pass Rate | % of reviews with 0 issues | >50% | 25-50% | <25% |
73
+ | Trend | Week-over-week direction | ↑ improving | → stable | ↓ degrading |
74
+
75
+ These metrics are sourced from `~/.anvil/coderabbit_metrics.db` (see `/weekly-review` for details).
76
+
54
77
  ## Data Sources
55
78
 
56
79
  The HUD reads from:
57
80
  - `~/.anvil/agents.json` - Agent registry (auto-updated by statusline hook)
58
81
  - `.claude/anvil-state.json` - Current session state
82
+ - `~/.anvil/coderabbit_metrics.db` - CodeRabbit review metrics (weekly stats)
59
83
 
60
84
  ## Troubleshooting
61
85
 
@@ -138,6 +138,99 @@ From healthcheck files, extract:
138
138
 
139
139
  Note any framework issues that correlate with retro patterns.
140
140
 
141
+ ### Step 8: Update Watermark Tracking
142
+
143
+ After generating the report (when user selects "save report"), update tracking:
144
+
145
+ #### 8.1: Update Manifest
146
+
147
+ ```python
148
+ import json
149
+ import os
150
+ from datetime import datetime
151
+
152
+ manifest_path = '.claude/insights/.manifest.json'
153
+ report_path = '.claude/insights/YYYY-MM-DD.md' # Today's report
154
+ retros_analyzed = [...] # List of retro paths from Step 0.5
155
+
156
+ # Load or create manifest
157
+ if os.path.exists(manifest_path):
158
+ with open(manifest_path) as f:
159
+ manifest = json.load(f)
160
+ else:
161
+ manifest = {"version": 1, "processed_retros": {}}
162
+
163
+ # Update manifest
164
+ now = datetime.utcnow().isoformat() + "Z"
165
+ manifest["last_run"] = now
166
+
167
+ for retro_path in retros_analyzed:
168
+ manifest["processed_retros"][retro_path] = {
169
+ "processed_at": now,
170
+ "insights_report": report_path
171
+ }
172
+
173
+ # Ensure directory exists
174
+ os.makedirs(os.path.dirname(manifest_path), exist_ok=True)
175
+
176
+ # Write manifest
177
+ with open(manifest_path, 'w') as f:
178
+ json.dump(manifest, f, indent=2)
179
+
180
+ print(f"✓ Manifest updated: {len(retros_analyzed)} retros marked as processed")
181
+ ```
182
+
183
+ #### 8.2: Update Retro Frontmatter
184
+
185
+ For each analyzed retro, add tracking metadata:
186
+
187
+ ```python
188
+ import re
189
+ from datetime import date
190
+
191
+ def update_retro_frontmatter(retro_path: str, report_path: str):
192
+ with open(retro_path) as f:
193
+ content = f.read()
194
+
195
+ today = date.today().isoformat()
196
+ new_fields = f"insights_processed: {today}\ninsights_report: {report_path}"
197
+
198
+ # Check for existing frontmatter
199
+ if content.startswith('---'):
200
+ # Insert before closing ---
201
+ parts = content.split('---', 2)
202
+ if len(parts) >= 3:
203
+ frontmatter = parts[1].strip()
204
+ # Remove old insights fields if present
205
+ frontmatter = re.sub(r'insights_processed:.*\n?', '', frontmatter)
206
+ frontmatter = re.sub(r'insights_report:.*\n?', '', frontmatter)
207
+ # Add new fields
208
+ new_content = f"---\n{frontmatter}\n{new_fields}\n---{parts[2]}"
209
+ else:
210
+ # Add frontmatter section
211
+ new_content = f"---\n{new_fields}\n---\n\n{content}"
212
+
213
+ with open(retro_path, 'w') as f:
214
+ f.write(new_content)
215
+
216
+ # Apply to all analyzed retros
217
+ for retro_path in retros_analyzed:
218
+ update_retro_frontmatter(retro_path, report_path)
219
+
220
+ print(f"✓ Frontmatter updated in {len(retros_analyzed)} retros")
221
+ ```
222
+
223
+ #### 8.3: Confirm Tracking Update
224
+
225
+ ```
226
+ ✓ Watermark tracking updated:
227
+ - Manifest: .claude/insights/.manifest.json
228
+ - Retros marked: 5
229
+ - Report: .claude/insights/2026-01-15.md
230
+
231
+ Next /insights run will skip these retros unless --all is used.
232
+ ```
233
+
141
234
  ---
142
235
 
143
236
  ## Output Format
@@ -291,9 +384,9 @@ After generating the report, offer:
291
384
 
292
385
  ## Integration Points
293
386
 
294
- - **Reads**: `.claude/retros/`, `.claude/healthchecks/`, `.claude/handoffs/`
295
- - **Modifies**: `CLAUDE.md` (when "apply patch" requested)
296
- - **Creates**: Linear issues (when "create issues" requested)
387
+ - **Reads**: `.claude/retros/`, `.claude/healthchecks/`, `.claude/handoffs/`, `.claude/insights/.manifest.json`
388
+ - **Modifies**: `CLAUDE.md` (when "apply patch" requested), retro frontmatter (when "save report" requested)
389
+ - **Creates**: Linear issues (when "create issues" requested), `.claude/insights/.manifest.json` (on first run)
297
390
  - **Saves**: `.claude/insights/` (when "save report" requested)
298
391
 
299
392
  ## Handling Edge Cases
@@ -305,6 +398,11 @@ After generating the report, offer:
305
398
  | No patterns found | Report "no recurring patterns" with individual learnings |
306
399
  | All patterns are positive | Focus on reinforcement, no fixes needed |
307
400
  | Conflicting learnings | Note the conflict, ask for clarification |
401
+ | No manifest exists | First run - treat all retros as unprocessed |
402
+ | Corrupted manifest | Backup, warn, treat all retros as unprocessed |
403
+ | No new retros | Show message, suggest `--all` to re-analyze |
404
+ | All retros already processed | Same as "no new retros" |
405
+ | Retro deleted after processing | Remove from manifest on next run |
308
406
 
309
407
  ---
310
408