claude_memory 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/CLAUDE.md +1 -0
  3. data/.claude/output-styles/memory-aware.md +1 -0
  4. data/.claude/rules/claude_memory.generated.md +1 -20
  5. data/.claude/settings.local.json +12 -1
  6. data/.claude/skills/check-memory/DEPRECATED.md +29 -0
  7. data/.claude/skills/check-memory/SKILL.md +77 -0
  8. data/.claude/skills/debug-memory +1 -0
  9. data/.claude/skills/improve/SKILL.md +532 -0
  10. data/.claude/skills/improve/feature-patterns.md +1221 -0
  11. data/.claude/skills/memory-first-workflow +1 -0
  12. data/.claude/skills/quality-update/SKILL.md +229 -0
  13. data/.claude/skills/quality-update/implementation-guide.md +346 -0
  14. data/.claude/skills/review-commit/SKILL.md +199 -0
  15. data/.claude/skills/review-for-quality/SKILL.md +154 -0
  16. data/.claude/skills/review-for-quality/expert-checklists.md +79 -0
  17. data/.claude/skills/setup-memory +1 -0
  18. data/.claude/skills/study-repo/SKILL.md +307 -0
  19. data/.claude/skills/study-repo/analysis-template.md +323 -0
  20. data/.claude/skills/study-repo/focus-examples.md +327 -0
  21. data/.claude-plugin/plugin.json +1 -1
  22. data/.lefthook/map_specs.rb +29 -0
  23. data/CHANGELOG.md +141 -0
  24. data/CLAUDE.md +168 -11
  25. data/README.md +160 -10
  26. data/Rakefile +14 -1
  27. data/WEEK2_COMPLETE.md +250 -0
  28. data/db/migrations/001_create_initial_schema.rb +117 -0
  29. data/db/migrations/002_add_project_scoping.rb +33 -0
  30. data/db/migrations/003_add_session_metadata.rb +42 -0
  31. data/db/migrations/004_add_fact_embeddings.rb +20 -0
  32. data/db/migrations/005_add_incremental_sync.rb +21 -0
  33. data/db/migrations/006_add_operation_tracking.rb +40 -0
  34. data/db/migrations/007_add_ingestion_metrics.rb +26 -0
  35. data/docs/GETTING_STARTED.md +587 -0
  36. data/docs/RELEASE_NOTES_v0.2.0.md +0 -1
  37. data/docs/RUBY_COMMUNITY_POST_v0.2.0.md +0 -2
  38. data/docs/architecture.md +53 -17
  39. data/docs/auto_init_design.md +230 -0
  40. data/docs/ci_integration.md +294 -0
  41. data/docs/eval_week1_summary.md +183 -0
  42. data/docs/eval_week2_summary.md +419 -0
  43. data/docs/evals.md +353 -0
  44. data/docs/improvements.md +551 -726
  45. data/docs/influence/.gitkeep +13 -0
  46. data/docs/influence/grepai.md +933 -0
  47. data/docs/influence/qmd.md +2195 -0
  48. data/docs/plugin.md +257 -11
  49. data/docs/quality_review.md +472 -1273
  50. data/docs/remaining_improvements.md +330 -0
  51. data/lefthook.yml +21 -1
  52. data/lib/claude_memory/commands/checks/claude_md_check.rb +41 -0
  53. data/lib/claude_memory/commands/checks/database_check.rb +120 -0
  54. data/lib/claude_memory/commands/checks/hooks_check.rb +112 -0
  55. data/lib/claude_memory/commands/checks/reporter.rb +110 -0
  56. data/lib/claude_memory/commands/checks/snapshot_check.rb +30 -0
  57. data/lib/claude_memory/commands/doctor_command.rb +12 -129
  58. data/lib/claude_memory/commands/help_command.rb +1 -0
  59. data/lib/claude_memory/commands/hook_command.rb +9 -2
  60. data/lib/claude_memory/commands/index_command.rb +169 -0
  61. data/lib/claude_memory/commands/ingest_command.rb +1 -1
  62. data/lib/claude_memory/commands/init_command.rb +5 -197
  63. data/lib/claude_memory/commands/initializers/database_ensurer.rb +30 -0
  64. data/lib/claude_memory/commands/initializers/global_initializer.rb +85 -0
  65. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +156 -0
  66. data/lib/claude_memory/commands/initializers/mcp_configurator.rb +56 -0
  67. data/lib/claude_memory/commands/initializers/memory_instructions_writer.rb +135 -0
  68. data/lib/claude_memory/commands/initializers/project_initializer.rb +111 -0
  69. data/lib/claude_memory/commands/recover_command.rb +75 -0
  70. data/lib/claude_memory/commands/registry.rb +5 -1
  71. data/lib/claude_memory/commands/stats_command.rb +239 -0
  72. data/lib/claude_memory/commands/uninstall_command.rb +226 -0
  73. data/lib/claude_memory/core/batch_loader.rb +32 -0
  74. data/lib/claude_memory/core/concept_ranker.rb +73 -0
  75. data/lib/claude_memory/core/embedding_candidate_builder.rb +37 -0
  76. data/lib/claude_memory/core/fact_collector.rb +51 -0
  77. data/lib/claude_memory/core/fact_query_builder.rb +154 -0
  78. data/lib/claude_memory/core/fact_ranker.rb +113 -0
  79. data/lib/claude_memory/core/result_builder.rb +54 -0
  80. data/lib/claude_memory/core/result_sorter.rb +25 -0
  81. data/lib/claude_memory/core/scope_filter.rb +61 -0
  82. data/lib/claude_memory/core/text_builder.rb +29 -0
  83. data/lib/claude_memory/embeddings/fastembed_adapter.rb +55 -0
  84. data/lib/claude_memory/embeddings/generator.rb +161 -0
  85. data/lib/claude_memory/embeddings/similarity.rb +69 -0
  86. data/lib/claude_memory/hook/handler.rb +4 -3
  87. data/lib/claude_memory/index/lexical_fts.rb +7 -2
  88. data/lib/claude_memory/infrastructure/operation_tracker.rb +158 -0
  89. data/lib/claude_memory/infrastructure/schema_validator.rb +206 -0
  90. data/lib/claude_memory/ingest/content_sanitizer.rb +6 -7
  91. data/lib/claude_memory/ingest/ingester.rb +103 -15
  92. data/lib/claude_memory/ingest/metadata_extractor.rb +57 -0
  93. data/lib/claude_memory/ingest/tool_extractor.rb +71 -0
  94. data/lib/claude_memory/mcp/response_formatter.rb +331 -0
  95. data/lib/claude_memory/mcp/server.rb +19 -0
  96. data/lib/claude_memory/mcp/setup_status_analyzer.rb +73 -0
  97. data/lib/claude_memory/mcp/tool_definitions.rb +279 -0
  98. data/lib/claude_memory/mcp/tool_helpers.rb +80 -0
  99. data/lib/claude_memory/mcp/tools.rb +330 -320
  100. data/lib/claude_memory/recall/dual_query_template.rb +63 -0
  101. data/lib/claude_memory/recall.rb +304 -237
  102. data/lib/claude_memory/resolve/resolver.rb +52 -49
  103. data/lib/claude_memory/store/sqlite_store.rb +210 -144
  104. data/lib/claude_memory/store/store_manager.rb +6 -6
  105. data/lib/claude_memory/sweep/sweeper.rb +6 -0
  106. data/lib/claude_memory/version.rb +1 -1
  107. data/lib/claude_memory.rb +35 -3
  108. data/output-styles/memory-aware.md +71 -0
  109. data/skills/debug-memory/SKILL.md +146 -0
  110. data/skills/memory-first-workflow/SKILL.md +144 -0
  111. data/skills/setup-memory/SKILL.md +168 -0
  112. metadata +83 -11
  113. data/.claude/.mind.mv2.aLCUZd +0 -0
  114. data/.claude/memory.sqlite3 +0 -0
  115. data/.claude/output-styles/memory-aware.md +0 -21
  116. data/.mcp.json +0 -11
  117. /data/docs/{feature_adoption_plan.md → plans/feature_adoption_plan.md} +0 -0
  118. /data/docs/{feature_adoption_plan_revised.md → plans/feature_adoption_plan_revised.md} +0 -0
  119. /data/docs/{plan.md → plans/plan.md} +0 -0
  120. /data/docs/{updated_plan.md → plans/updated_plan.md} +0 -0
data/docs/architecture.md CHANGED
@@ -9,7 +9,7 @@ ClaudeMemory is architected using Domain-Driven Design (DDD) principles with cle
9
9
  ```
10
10
  ┌─────────────────────────────────────────────────────────────┐
11
11
  │ Application Layer │
12
- │ CLI (Router) → Commands (16 classes) → Configuration │
12
+ │ CLI (Router) → Commands (20 classes) → Configuration │
13
13
  └──────────────────────┬──────────────────────────────────────┘
14
14
 
15
15
  ┌──────────────────────▼──────────────────────────────────────┐
@@ -22,12 +22,13 @@ ClaudeMemory is architected using Domain-Driven Design (DDD) principles with cle
22
22
  ┌──────────────────────▼──────────────────────────────────────┐
23
23
  │ Business Logic Layer │
24
24
  │ Recall → Resolve → Distill → Ingest → Publish │
25
- │ Sweep → MCP → Hook
25
+ │ Sweep → Embeddings → MCP → Hook
26
26
  └──────────────────────┬──────────────────────────────────────┘
27
27
 
28
28
  ┌──────────────────────▼──────────────────────────────────────┐
29
29
  │ Infrastructure Layer │
30
- │ Store (SQLite via Sequel) → FileSystem → Index (FTS5)
30
+ │ Store (SQLite v6 + WAL) → FileSystem → Index (FTS5+Vector)
31
+ │ Templates │
31
32
  └─────────────────────────────────────────────────────────────┘
32
33
  ```
33
34
 
@@ -38,8 +39,8 @@ ClaudeMemory is architected using Domain-Driven Design (DDD) principles with cle
38
39
  **Purpose:** Handle user interaction and command routing
39
40
 
40
41
  **Components:**
41
- - **CLI** (`cli.rb`): Thin router (41 lines) that dispatches to command classes
42
- - **Commands** (`commands/`): 16 command classes, each handling one CLI command
42
+ - **CLI** (`cli.rb`): Thin router that dispatches to command classes
43
+ - **Commands** (`commands/`): 20 command classes, each handling one CLI command
43
44
  - **Configuration** (`configuration.rb`): Centralized ENV access and path calculation
44
45
 
45
46
  **Key Principles:**
@@ -94,6 +95,9 @@ end
94
95
  - **SessionId**: Type-safe session identifiers
95
96
  - **TranscriptPath**: Type-safe file paths
96
97
  - **FactId**: Type-safe positive integer IDs
98
+ - **TextBuilder**: Searchable text construction from entities/facts/decisions
99
+ - **ResultSorter**: Result ranking and sorting logic
100
+ - **FactQueryBuilder**: SQL query construction for fact retrieval
97
101
  - All are immutable (frozen) and self-validating
98
102
 
99
103
  #### Null Objects (`core/`)
@@ -115,13 +119,14 @@ end
115
119
 
116
120
  **Components:**
117
121
 
118
- #### Recall (`recall.rb`)
122
+ #### Recall (`recall.rb` + `recall/`)
119
123
  - Queries facts from global and project databases
120
124
  - **Optimization**: Batch queries to eliminate N+1 issues
121
125
  - Before: 2N+1 queries for N facts
122
126
  - After: 3 queries total (FTS + batch facts + batch receipts)
123
127
  - Supports scope filtering (project, global, all)
124
128
  - Returns facts with provenance receipts
129
+ - `DualQueryTemplate`: Query template handling for dual-database queries
125
130
 
126
131
  #### Resolve (`resolve/`)
127
132
  - Truth maintenance and conflict resolution
@@ -149,9 +154,19 @@ end
149
154
  - Time-bounded execution
150
155
  - Cleans up old content and expired facts
151
156
 
157
+ #### Embeddings (`embeddings/`)
158
+ - `Generator`: Built-in TF-IDF embedding generation (always available, no dependencies)
159
+ - `FastembedAdapter`: High-quality local embeddings via [fastembed-rb](https://github.com/khasinski/fastembed-rb) (BAAI/bge-small-en-v1.5)
160
+ - 384-dimensional normalized vectors (both generators produce same dimensionality)
161
+ - Asymmetric query/passage encoding (FastEmbed) for better retrieval accuracy
162
+ - `Similarity`: Cosine similarity calculations and top-k ranking
163
+ - Dependency injection: `Recall.new(store, embedding_generator: adapter)`
164
+
152
165
  #### MCP (`mcp/`)
153
166
  - Model Context Protocol server
154
- - Exposes tools: recall, explain, promote, status, conflicts, changes, sweep_now
167
+ - Exposes 19 tools including: recall, explain, promote, status, decisions, conventions, architecture, semantic search, check_setup, and more
168
+ - `ResponseFormatter`: Consistent MCP response formatting
169
+ - `SetupStatusAnalyzer`: Initialization and version status analysis
155
170
 
156
171
  #### Hook (`hook/`)
157
172
  - Reads JSON from stdin
@@ -164,10 +179,11 @@ end
164
179
  **Components:**
165
180
 
166
181
  #### Store (`store/`)
167
- - **SQLiteStore**: Direct database access via Sequel
182
+ - **SQLiteStore**: Direct database access via Sequel (schema v6)
168
183
  - **StoreManager**: Manages dual databases (global + project)
169
184
  - **Transaction safety**: Atomic multi-step operations
170
- - Schema migrations
185
+ - **WAL mode**: Write-Ahead Logging for better concurrency
186
+ - Schema migrations with per-migration transactions
171
187
 
172
188
  #### FileSystem (`infrastructure/`)
173
189
  - **FileSystem**: Real filesystem wrapper
@@ -176,8 +192,14 @@ end
176
192
  - Enables testing without tempdir cleanup
177
193
 
178
194
  #### Index (`index/`)
179
- - SQLite FTS5 full-text search
180
- - No embeddings required
195
+ - SQLite FTS5 for lexical full-text search
196
+ - Vector embeddings for semantic similarity (384-dimensional vectors)
197
+ - Hybrid search modes: text-only, vector-only, or both (FTS5 + vector)
198
+
199
+ #### Templates (`templates/`)
200
+ - Hook configuration examples (`hooks.example.json`)
201
+ - Output style templates (`output-styles/memory-aware.md`)
202
+ - Setup and configuration scaffolding
181
203
 
182
204
  **Key Principles:**
183
205
  - Ports and Adapters: Clear interfaces for external systems
@@ -276,6 +298,16 @@ FileSystem (write)
276
298
  **Solution:** Wrap in database transactions
277
299
  **Impact:** Data integrity guaranteed
278
300
 
301
+ ### 4. WAL Mode for Concurrency
302
+ **Problem:** Database locks prevented concurrent reads during writes
303
+ **Solution:** Enable Write-Ahead Logging (WAL) mode in SQLite
304
+ **Impact:** MCP server and hooks can operate concurrently without blocking
305
+
306
+ ### 5. Local Semantic Search
307
+ **Problem:** Traditional semantic search requires cloud API calls for embedding generation
308
+ **Solution:** Local ONNX model via fastembed-rb (BAAI/bge-small-en-v1.5, 384-dimensional vectors)
309
+ **Impact:** High-quality semantic search with no API costs, no network dependency after initial model download
310
+
279
311
  ## Testing Strategy
280
312
 
281
313
  ### Unit Tests
@@ -307,14 +339,17 @@ FileSystem (write)
307
339
  - Scattered ENV access
308
340
 
309
341
  ### After Refactoring
310
- - CLI: 41 lines (95% reduction)
311
- - Tests: 426 examples (149 added)
342
+ - CLI: 41 lines (thin router, 95% reduction from original)
343
+ - Tests: 988 examples (257% increase)
312
344
  - Batch queries (3 total)
313
345
  - FileSystem abstraction
314
- - Value objects
346
+ - Value objects (SessionId, TranscriptPath, FactId)
315
347
  - Centralized Configuration
316
348
  - 4 domain models with business logic
317
- - 16 command classes
349
+ - 20 command classes
350
+ - 19 MCP tools
351
+ - Semantic search with local embeddings (FastEmbed + TF-IDF fallback)
352
+ - Schema v6 with WAL mode
318
353
 
319
354
  ## Future Improvements
320
355
 
@@ -350,11 +385,12 @@ FileSystem (write)
350
385
 
351
386
  The refactored architecture provides:
352
387
  - ✅ Clear separation of concerns
353
- - ✅ High testability (426 tests)
388
+ - ✅ High testability (988 tests)
354
389
  - ✅ Type safety (value objects)
355
390
  - ✅ Null safety (null objects)
356
- - ✅ Performance (batch queries, in-memory FS)
391
+ - ✅ Performance (batch queries, in-memory FS, WAL mode)
357
392
  - ✅ Maintainability (small, focused classes)
358
393
  - ✅ Extensibility (easy to add commands/tools)
394
+ - ✅ Semantic search (local FastEmbed ONNX model, TF-IDF fallback)
359
395
 
360
396
  The codebase now follows best practices for Ruby applications and is well-positioned for future growth.
@@ -0,0 +1,230 @@
1
+ # Auto-Initialization and Upgrade Design
2
+
3
+ ## Problem Statement
4
+
5
+ When users install ClaudeMemory (add to MCP), they must manually run `claude-memory init`. There's no:
6
+ - Automatic detection of uninitialized state
7
+ - Upgrade detection when CLAUDE.md instructions change
8
+ - Graceful degradation when not configured
9
+
10
+ ## Constraints
11
+
12
+ 1. **No hooks before init**: Can't use SessionStart hook to auto-init (hooks aren't configured yet)
13
+ 2. **MCP server is stateless**: Starts fresh each time, no persistent memory
14
+ 3. **Skills unavailable pre-init**: Can't use skills to detect/fix initialization
15
+
16
+ ## Proposed Multi-Layer Solution
17
+
18
+ ### Layer 1: Setup Status MCP Tool (Immediate Detection)
19
+
20
+ **Add new MCP tool: `memory.check_setup`**
21
+
22
+ ```ruby
23
+ {
24
+ name: "memory.check_setup",
25
+ description: "Check if ClaudeMemory is properly initialized. CALL THIS FIRST if memory tools fail or on first use of ClaudeMemory.",
26
+ result: {
27
+ initialized: true/false,
28
+ version: "1.2.3",
29
+ issues: ["No CLAUDE.md found", "Hooks not configured"],
30
+ recommendation: "Run: claude-memory init"
31
+ }
32
+ }
33
+ ```
34
+
35
+ **Implementation:**
36
+ - Check for database existence
37
+ - Check for CLAUDE.md with version marker
38
+ - Check for hooks configuration
39
+ - Return actionable recommendations
40
+
41
+ **Update other tool descriptions:**
42
+ ```ruby
43
+ description: "... If this tool fails with 'database not found', run memory.check_setup for guidance."
44
+ ```
45
+
46
+ ### Layer 2: Version Markers (Upgrade Detection)
47
+
48
+ **Add version to CLAUDE.md:**
49
+
50
+ ```markdown
51
+ <!-- ClaudeMemory v1.0.0 -->
52
+ # ClaudeMemory
53
+
54
+ ...
55
+ ```
56
+
57
+ **Create `claude-memory upgrade` command:**
58
+ - Detect current version in CLAUDE.md
59
+ - Compare with ClaudeMemory::VERSION
60
+ - Offer to upgrade instructions
61
+ - Preserve user customizations
62
+
63
+ **Workflow:**
64
+ ```bash
65
+ $ claude-memory upgrade
66
+ Checking configuration version...
67
+ Current: v0.9.0
68
+ Latest: v1.0.0
69
+
70
+ Changes in v1.0.0:
71
+ - Added memory-first workflow instructions
72
+ - Updated tool descriptions
73
+ - New /check-memory skill
74
+
75
+ Upgrade? [y/N] y
76
+ ✓ Backed up old CLAUDE.md to CLAUDE.md.backup
77
+ ✓ Updated workflow instructions
78
+ ✓ Preserved custom sections
79
+ ```
80
+
81
+ ### Layer 3: Graceful Degradation (Error Handling)
82
+
83
+ **Update MCP Tools to detect uninitialized state:**
84
+
85
+ ```ruby
86
+ def recall(args)
87
+ unless database_exists?
88
+ return {
89
+ error: "ClaudeMemory not initialized",
90
+ help: "Run 'claude-memory init' to set up databases and configuration",
91
+ documentation: "https://github.com/your-repo#installation"
92
+ }
93
+ end
94
+ # ... normal recall logic
95
+ end
96
+ ```
97
+
98
+ **Benefit**: Claude sees clear actionable errors instead of cryptic database failures.
99
+
100
+ ### Layer 4: Setup Reminder Skill
101
+
102
+ **Create `/setup-memory` skill:**
103
+
104
+ ```markdown
105
+ ---
106
+ name: setup-memory
107
+ description: Guide user through ClaudeMemory installation
108
+ disable-model-invocation: true
109
+ ---
110
+
111
+ # ClaudeMemory Setup Guide
112
+
113
+ ClaudeMemory is installed but not initialized.
114
+
115
+ ## Quick Setup
116
+
117
+ Run this command:
118
+ ```bash
119
+ claude-memory init
120
+ ```
121
+
122
+ This will:
123
+ 1. Create global and project databases
124
+ 2. Configure hooks for automatic ingestion
125
+ 3. Add workflow instructions to CLAUDE.md
126
+ 4. Set up MCP server
127
+
128
+ After running, restart Claude Code to load the configuration.
129
+
130
+ ## Verification
131
+
132
+ After init, run:
133
+ ```bash
134
+ claude-memory doctor
135
+ ```
136
+
137
+ ## Need Help?
138
+
139
+ See: https://github.com/your-repo#troubleshooting
140
+ ```
141
+
142
+ **Usage**: When Claude encounters "not initialized" errors, it can suggest: "Run `/setup-memory` for installation help"
143
+
144
+ ### Layer 5: Doctor Command Enhancement
145
+
146
+ **Add `--fix` flag to doctor:**
147
+
148
+ ```bash
149
+ $ claude-memory doctor --fix
150
+ Checking configuration...
151
+ ✗ Project database missing
152
+ ✗ No CLAUDE.md found
153
+
154
+ Would you like to run init? [y/N] y
155
+ Running: claude-memory init
156
+ ...
157
+ ```
158
+
159
+ **Add `--quiet` flag for programmatic checks:**
160
+
161
+ ```bash
162
+ $ claude-memory doctor --quiet
163
+ # Exit code 0 = healthy, 1 = needs init, 2 = needs upgrade
164
+ ```
165
+
166
+ ## Implementation Priority
167
+
168
+ ### Phase 1 (Immediate Value)
169
+ 1. ✅ Add version markers to init command
170
+ 2. ✅ Create `memory.check_setup` MCP tool
171
+ 3. ✅ Update error messages with actionable help
172
+ 4. ✅ Create `/setup-memory` skill
173
+
174
+ ### Phase 2 (Enhanced UX)
175
+ 5. ⬜ Create `claude-memory upgrade` command
176
+ 6. ⬜ Add `doctor --fix` and `doctor --quiet`
177
+ 7. ⬜ Add upgrade detection to SessionStart hook
178
+
179
+ ### Phase 3 (Polish)
180
+ 8. ⬜ Version migration system (v1.0.0 → v1.1.0)
181
+ 9. ⬜ Preserve custom CLAUDE.md sections during upgrade
182
+ 10. ⬜ Add upgrade notifications via MCP tool
183
+
184
+ ## Decision: Why Not Auto-Init?
185
+
186
+ We deliberately **don't** auto-initialize because:
187
+
188
+ 1. **User control**: Installation should be explicit, not magical
189
+ 2. **Git hygiene**: Creates `.claude/` directory - users should understand this
190
+ 3. **Global vs project**: Users choose `--global` or project-local
191
+ 4. **Customization**: Users may want to review CLAUDE.md before committing
192
+
193
+ Instead, we make initialization **obvious** and **frictionless** when needed.
194
+
195
+ ## Example User Journey
196
+
197
+ ### First-Time User
198
+
199
+ ```
200
+ User: Where are client errors handled?
201
+ Claude: Let me check memory...
202
+ Claude: (calls memory.recall)
203
+ MCP: Error - database not found. Run memory.check_setup.
204
+ Claude: (calls memory.check_setup)
205
+ MCP: Not initialized. Run: claude-memory init
206
+ Claude: "It looks like ClaudeMemory isn't set up yet. Run `claude-memory init` to configure it. Would you like me to explain what this does first?"
207
+ ```
208
+
209
+ ### Upgrading User
210
+
211
+ ```
212
+ User: Check memory about authentication
213
+ Claude: (calls memory.recall)
214
+ MCP: Returns results with warning: "Using outdated configuration v0.9.0. Run: claude-memory upgrade"
215
+ Claude: "I found these facts about authentication: [...]. Note: You can upgrade to the latest ClaudeMemory configuration by running `claude-memory upgrade`."
216
+ ```
217
+
218
+ ## Testing Strategy
219
+
220
+ - Unit tests for version detection logic
221
+ - Integration tests for upgrade workflow
222
+ - Manual testing of error messages
223
+ - Test preservation of custom CLAUDE.md sections
224
+
225
+ ## Documentation Updates
226
+
227
+ - Update README with upgrade instructions
228
+ - Add CHANGELOG for version history
229
+ - Document version markers in CLAUDE.md
230
+ - Add troubleshooting guide for common issues
@@ -0,0 +1,294 @@
1
+ # CI Integration for Eval Framework
2
+
3
+ ## Current Status: ✅ Already Working
4
+
5
+ The eval framework **requires no special CI setup** and already runs in GitHub Actions.
6
+
7
+ ### What's Already Running
8
+
9
+ `.github/workflows/main.yml` runs on:
10
+ - Every push to `main`
11
+ - Every pull request
12
+
13
+ It executes: `bundle exec rake` which runs:
14
+ 1. `rake spec` - All 1003 tests (including 15 eval tests)
15
+ 2. `rake standard` - Ruby linter
16
+
17
+ **Evals are automatically included** because they're part of the RSpec suite (`spec/evals/*.rb`).
18
+
19
+ ### Why Evals Work in CI
20
+
21
+ ✅ **No API calls** - Use stubbed responses (no Claude API key needed)
22
+ ✅ **No external services** - Self-contained in-memory fixtures
23
+ ✅ **Fast** - <1s for all 15 eval tests, 40s for full suite
24
+ ✅ **Standard dependencies** - Just RSpec + ClaudeMemory gems
25
+ ✅ **Temporary directories** - Use `Dir.mktmpdir` (standard in CI)
26
+ ✅ **No environment variables** - No configuration needed
27
+
28
+ ### Current CI Output
29
+
30
+ ```
31
+ ...
32
+ 1003 examples, 0 failures
33
+ Took 40 seconds
34
+ ```
35
+
36
+ The 15 eval tests are included in the 1003 total. They run silently unless they fail.
37
+
38
+ ## Optional Enhancements
39
+
40
+ If you want to make evals more visible in CI, consider these options:
41
+
42
+ ### Option 1: Separate Eval Report Step ⭐ Recommended
43
+
44
+ Add a dedicated step to show eval summary:
45
+
46
+ ```yaml
47
+ # .github/workflows/main.yml
48
+ steps:
49
+ - uses: actions/checkout@v4
50
+ - name: Set up Ruby
51
+ uses: ruby/setup-ruby@v1
52
+ with:
53
+ ruby-version: ${{ matrix.ruby }}
54
+ bundler-cache: true
55
+
56
+ # NEW: Run evals with summary report
57
+ - name: Run evals with summary
58
+ run: ./bin/run-evals
59
+
60
+ # Existing: Run full test suite
61
+ - name: Run tests and linter
62
+ run: bundle exec rake
63
+ ```
64
+
65
+ **Benefits:**
66
+ - Clear "EVAL SUMMARY" section in CI logs
67
+ - Shows behavioral scores prominently
68
+ - Makes eval failures obvious
69
+
70
+ **Example output in CI logs:**
71
+ ```
72
+ ============================================================
73
+ EVAL SUMMARY
74
+ ============================================================
75
+
76
+ Total Examples: 15
77
+ Passed: 15 ✅
78
+ Failed: 0 ❌
79
+
80
+ ============================================================
81
+ BEHAVIORAL SCORES
82
+ ============================================================
83
+
84
+ Convention Recall: +100% improvement
85
+ Architectural Decision: +100% improvement
86
+ Tech Stack Recall: +100% improvement
87
+
88
+ OVERALL: Memory improves responses by 100% on average
89
+ ============================================================
90
+ ```
91
+
92
+ **Trade-offs:**
93
+ - ✅ Better visibility
94
+ - ⚠️ Runs evals twice (once in summary, once in full suite)
95
+ - ⚠️ Adds ~1 second to CI time
96
+
97
+ ### Option 2: Fail Fast on Eval Failures
98
+
99
+ Run evals first to catch memory issues early:
100
+
101
+ ```yaml
102
+ - name: Run evals first (fail fast)
103
+ run: bundle exec rspec spec/evals/ --fail-fast
104
+
105
+ - name: Run full test suite
106
+ run: bundle exec rake
107
+ ```
108
+
109
+ **Benefits:**
110
+ - Fails within ~1 second if evals break
111
+ - Saves CI time (skips 1003 tests if evals fail)
112
+ - Evals become "smoke tests" for memory system
113
+
114
+ **Trade-offs:**
115
+ - ⚠️ Runs evals twice (but stops fast if they fail)
116
+
117
+ ### Option 3: Separate Workflow for Evals
118
+
119
+ Create `.github/workflows/evals.yml`:
120
+
121
+ ```yaml
122
+ name: Evals
123
+
124
+ on:
125
+ push:
126
+ branches: [main]
127
+ pull_request:
128
+ schedule:
129
+ - cron: '0 0 * * 0' # Weekly on Sunday
130
+
131
+ jobs:
132
+ evals:
133
+ runs-on: ubuntu-latest
134
+ steps:
135
+ - uses: actions/checkout@v4
136
+ - name: Set up Ruby
137
+ uses: ruby/setup-ruby@v1
138
+ with:
139
+ ruby-version: '4.0.1'
140
+ bundler-cache: true
141
+ - name: Run evals
142
+ run: ./bin/run-evals
143
+ ```
144
+
145
+ **Benefits:**
146
+ - Evals have dedicated status badge
147
+ - Can schedule periodic eval runs (e.g., weekly)
148
+ - Clearer separation of concerns
149
+
150
+ **Trade-offs:**
151
+ - ⚠️ More complex (2 workflows)
152
+ - ⚠️ Runs evals 3 times (main workflow, eval workflow, scheduled)
153
+
154
+ ### Option 4: Eval Results as PR Comment
155
+
156
+ Post eval summary as PR comment:
157
+
158
+ ```yaml
159
+ - name: Run evals and capture results
160
+ id: evals
161
+ run: |
162
+ echo "results<<EOF" >> $GITHUB_OUTPUT
163
+ ./bin/run-evals >> $GITHUB_OUTPUT
164
+ echo "EOF" >> $GITHUB_OUTPUT
165
+
166
+ - name: Comment eval results on PR
167
+ if: github.event_name == 'pull_request'
168
+ uses: actions/github-script@v7
169
+ with:
170
+ github-token: ${{ secrets.GITHUB_TOKEN }}
171
+ script: |
172
+ github.rest.issues.createComment({
173
+ issue_number: context.issue.number,
174
+ owner: context.repo.owner,
175
+ repo: context.repo.repo,
176
+ body: '## Eval Results\n\n```\n${{ steps.evals.outputs.results }}\n```'
177
+ })
178
+ ```
179
+
180
+ **Benefits:**
181
+ - Eval results visible in PR without checking logs
182
+ - Reviewers see memory improvement metrics
183
+ - Historical record in PR comments
184
+
185
+ **Trade-offs:**
186
+ - ⚠️ More complex (requires github-script action)
187
+ - ⚠️ Creates comment on every push to PR
188
+ - ⚠️ Requires GITHUB_TOKEN (usually automatic)
189
+
190
+ ## Recommendation
191
+
192
+ **Current setup is perfect for now.** Evals already run and will catch regressions.
193
+
194
+ When to add enhancements:
195
+ - **Option 1**: If you want eval results more visible in logs (simple, low cost)
196
+ - **Option 2**: If eval failures become frequent (fail fast saves time)
197
+ - **Option 3**: If you want dedicated eval status badge
198
+ - **Option 4**: If you want eval results visible to PR reviewers
199
+
200
+ Most projects should start with **Option 1** (separate step with summary) only if visibility becomes an issue.
201
+
202
+ ## Testing CI Locally
203
+
204
+ Simulate CI behavior locally:
205
+
206
+ ```bash
207
+ # What CI runs (default rake task)
208
+ bundle exec rake
209
+
210
+ # Just evals (what CI could run separately)
211
+ ./bin/run-evals
212
+
213
+ # Just evals with RSpec (alternative)
214
+ bundle exec rspec spec/evals/ --format documentation
215
+ ```
216
+
217
+ ## CI Failure Scenarios
218
+
219
+ ### Scenario 1: Eval Test Fails
220
+
221
+ ```
222
+ Failures:
223
+
224
+ 1) Convention Recall Eval mentions stored conventions when asked
225
+ Failure/Error: expect(mentions_indentation).to be(true)
226
+ expected true
227
+ got false
228
+ ```
229
+
230
+ **What happened**: Memory system regressed, stored conventions not recalled
231
+
232
+ **Fix**: Investigate why memory population or recall failed
233
+
234
+ ### Scenario 2: All Tests Pass But Behavioral Scores Drop
235
+
236
+ Current setup won't catch this (scores aren't checked automatically).
237
+
238
+ To catch this in future (Week 3+):
239
+ - Store expected scores in test
240
+ - Assert: `expect(score).to be >= 0.9` (allow small variance)
241
+
242
+ ### Scenario 3: Fixture Setup Fails
243
+
244
+ ```
245
+ Errno::EACCES: Permission denied @ dir_s_mkdir - /tmp
246
+ ```
247
+
248
+ **What happened**: CI environment doesn't allow temp directory creation
249
+
250
+ **Fix**: Unlikely in GitHub Actions (has `/tmp` access), but could use `ENV['TMPDIR']` fallback
251
+
252
+ ## Verification
253
+
254
+ To verify evals are running in CI:
255
+
256
+ 1. **Check logs**: Look for "1003 examples, 0 failures" (includes evals)
257
+ 2. **Break an eval**: Change assertion to fail, push, check CI fails
258
+ 3. **Run locally**: `bundle exec rake` should match CI behavior
259
+
260
+ ## Future: Real Claude Execution (Week 3+)
261
+
262
+ If you add real Claude execution (not stubbed):
263
+
264
+ **Will need:**
265
+ - `ANTHROPIC_API_KEY` in GitHub Secrets
266
+ - Tag tests as `:slow` and skip by default
267
+ - Optional: Run only on `main` branch (not PRs)
268
+ - Optional: Schedule runs (don't run on every commit)
269
+
270
+ **Example:**
271
+ ```yaml
272
+ - name: Run slow evals (real Claude)
273
+ if: github.ref == 'refs/heads/main'
274
+ env:
275
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
276
+ run: bundle exec rspec spec/evals/ --tag slow
277
+ ```
278
+
279
+ But for current stubbed evals: **no special setup needed!** ✅
280
+
281
+ ## Summary
282
+
283
+ | Aspect | Status | Notes |
284
+ |--------|--------|-------|
285
+ | Already running in CI? | ✅ Yes | Part of `bundle exec rake` |
286
+ | Requires API keys? | ❌ No | Uses stubbed responses |
287
+ | Requires environment variables? | ❌ No | Self-contained |
288
+ | Requires special permissions? | ❌ No | Standard filesystem access |
289
+ | Fast enough for CI? | ✅ Yes | <1s for evals, 40s total |
290
+ | Catches regressions? | ✅ Yes | Will fail if memory system breaks |
291
+ | Visible in logs? | ⚠️ Partial | Included in total count, not highlighted |
292
+ | Recommended changes? | 🤷 Optional | Add separate summary step if desired |
293
+
294
+ **Bottom line**: Evals work in CI today. Optional enhancements can improve visibility, but aren't required.