claude_memory 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/skills/dashboard/SKILL.md +42 -0
  4. data/.claude-plugin/marketplace.json +1 -1
  5. data/.claude-plugin/plugin.json +1 -1
  6. data/CHANGELOG.md +130 -0
  7. data/CLAUDE.md +30 -6
  8. data/README.md +66 -2
  9. data/db/migrations/015_add_activity_events.rb +26 -0
  10. data/db/migrations/016_add_moment_feedback.rb +22 -0
  11. data/db/migrations/017_add_last_recalled_at.rb +15 -0
  12. data/docs/1_0_punchlist.md +371 -0
  13. data/docs/EXAMPLES.md +41 -2
  14. data/docs/GETTING_STARTED.md +33 -4
  15. data/docs/architecture.md +22 -7
  16. data/docs/audit-queries.md +131 -0
  17. data/docs/dashboard.md +192 -0
  18. data/docs/improvements.md +650 -9
  19. data/docs/influence/cq.md +187 -0
  20. data/docs/plugin.md +13 -6
  21. data/docs/quality_review.md +524 -172
  22. data/docs/reflection_memory_as_accumulating_judgment.md +67 -0
  23. data/lib/claude_memory/activity_log.rb +86 -0
  24. data/lib/claude_memory/commands/census_command.rb +210 -0
  25. data/lib/claude_memory/commands/completion_command.rb +3 -0
  26. data/lib/claude_memory/commands/dashboard_command.rb +54 -0
  27. data/lib/claude_memory/commands/dedupe_conflicts_command.rb +55 -0
  28. data/lib/claude_memory/commands/digest_command.rb +273 -0
  29. data/lib/claude_memory/commands/hook_command.rb +61 -2
  30. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +7 -4
  31. data/lib/claude_memory/commands/reclassify_references_command.rb +56 -0
  32. data/lib/claude_memory/commands/registry.rb +7 -1
  33. data/lib/claude_memory/commands/show_command.rb +90 -0
  34. data/lib/claude_memory/commands/skills/distill-transcripts.md +13 -1
  35. data/lib/claude_memory/commands/stats_command.rb +131 -2
  36. data/lib/claude_memory/commands/sweep_command.rb +2 -0
  37. data/lib/claude_memory/configuration.rb +16 -0
  38. data/lib/claude_memory/core/relative_time.rb +9 -0
  39. data/lib/claude_memory/dashboard/api.rb +610 -0
  40. data/lib/claude_memory/dashboard/conflicts.rb +279 -0
  41. data/lib/claude_memory/dashboard/efficacy.rb +127 -0
  42. data/lib/claude_memory/dashboard/fact_presenter.rb +109 -0
  43. data/lib/claude_memory/dashboard/health.rb +175 -0
  44. data/lib/claude_memory/dashboard/index.html +2707 -0
  45. data/lib/claude_memory/dashboard/knowledge.rb +136 -0
  46. data/lib/claude_memory/dashboard/moments.rb +244 -0
  47. data/lib/claude_memory/dashboard/reuse.rb +97 -0
  48. data/lib/claude_memory/dashboard/scoped_fact_resolver.rb +95 -0
  49. data/lib/claude_memory/dashboard/server.rb +211 -0
  50. data/lib/claude_memory/dashboard/timeline.rb +68 -0
  51. data/lib/claude_memory/dashboard/trust.rb +454 -0
  52. data/lib/claude_memory/distill/bare_conclusion_detector.rb +71 -0
  53. data/lib/claude_memory/distill/reference_material_detector.rb +78 -0
  54. data/lib/claude_memory/hook/auto_memory_mirror.rb +112 -0
  55. data/lib/claude_memory/hook/context_injector.rb +97 -3
  56. data/lib/claude_memory/hook/handler.rb +191 -3
  57. data/lib/claude_memory/mcp/handlers/management_handlers.rb +8 -0
  58. data/lib/claude_memory/mcp/query_guide.rb +11 -0
  59. data/lib/claude_memory/mcp/text_summary.rb +29 -0
  60. data/lib/claude_memory/mcp/tool_definitions.rb +13 -0
  61. data/lib/claude_memory/mcp/tools.rb +148 -0
  62. data/lib/claude_memory/publish.rb +13 -21
  63. data/lib/claude_memory/recall/stale_detector.rb +67 -0
  64. data/lib/claude_memory/resolve/predicate_policy.rb +2 -0
  65. data/lib/claude_memory/resolve/resolver.rb +41 -11
  66. data/lib/claude_memory/store/llm_cache.rb +68 -0
  67. data/lib/claude_memory/store/metrics_aggregator.rb +96 -0
  68. data/lib/claude_memory/store/schema_manager.rb +1 -1
  69. data/lib/claude_memory/store/sqlite_store.rb +47 -143
  70. data/lib/claude_memory/store/store_manager.rb +29 -0
  71. data/lib/claude_memory/sweep/maintenance.rb +216 -0
  72. data/lib/claude_memory/sweep/recall_timestamp_refresher.rb +83 -0
  73. data/lib/claude_memory/sweep/sweeper.rb +2 -0
  74. data/lib/claude_memory/templates/hooks.example.json +5 -0
  75. data/lib/claude_memory/version.rb +1 -1
  76. data/lib/claude_memory.rb +24 -0
  77. metadata +51 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b6df0a3f58a88c1bbec82ec20e26789d51ad2712408d058337a196c5eac90654
4
- data.tar.gz: beb9c2ef59ef6a45430eeb03466e37f6b1f741ef1745b5303a1443b02a7c84b4
3
+ metadata.gz: c2164011e2c50c7fdb0bcad468a25814f372384c3a49fa4c9414313ab3975e00
4
+ data.tar.gz: 3e2843979d9b9e0d4a21bfa3650f6cd6843ce18d2a95af884e303572259bca62
5
5
  SHA512:
6
- metadata.gz: '06905bca1f77df5642caf0846cde7394ba9a1baf3c954138383ac39927fcaae2ef097ff79dd3c866e6930fa0eac0d0fb958366bded54a0616d8e356a316e616c'
7
- data.tar.gz: 9a8e3c455c20ae616bc239b766e1d4e2aa4c6e5448f494294d9c6a646a8a613428e9b63218624c5cae7e30f389704dd3bee6b788e97a369cb719b115abffddd7
6
+ metadata.gz: 6c074b607c1e4f13743de36bb2074495d0ad24d0c826b62b49e0e827f311e3424bc881f42236db22a92dd2d5281e6ef13ca450966b1d9438ac1b36ceaa3ab2ce
7
+ data.tar.gz: 4e06c8fed9c323974ee4d7e5b41386ee4682ba5ba88b67797ac6864bbdf03663e5b74cb33497a371a8263eff4c24d405708a80e4862c4f578b221286bf40b236
Binary file
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: dashboard
3
+ description: Launch a local web dashboard for ClaudeMemory debugging and observability
4
+ ---
5
+
6
+ # Dashboard
7
+
8
+ Launch the ClaudeMemory debugging dashboard to visualize memory system health, activity, and efficacy.
9
+
10
+ ## Task
11
+
12
+ Start the dashboard web server so the user can inspect what's happening behind the scenes.
13
+
14
+ ## Steps
15
+
16
+ 1. Run the dashboard command:
17
+
18
+ ```bash
19
+ claude-memory dashboard
20
+ ```
21
+
22
+ This starts a local web server (default port 3377) and opens it in the browser.
23
+
24
+ ## What the Dashboard Shows
25
+
26
+ - **Health Status**: Database health, hook configuration, vector index status
27
+ - **Overview**: Fact/entity/content counts, top predicates, entity type distribution, 30-day activity timeline
28
+ - **Activity**: Live event log of hook executions (ingest, context, sweep), memory recalls, and store extractions with timing and details
29
+ - **Facts**: Searchable fact explorer with status filtering, predicate/object search
30
+ - **Efficacy**: Recall hit rate, total results served, average results per query, top queries by result count
31
+
32
+ ## Options
33
+
34
+ - `--port PORT` - Use a different port (default: 3377)
35
+ - `--no-open` - Don't auto-open the browser
36
+
37
+ ## Notes
38
+
39
+ - Dashboard auto-refreshes every 30 seconds
40
+ - Activity events are recorded by hooks and MCP tools into the `activity_events` table
41
+ - The dashboard reads from both global and project databases
42
+ - Press Ctrl+C to stop the server
@@ -7,7 +7,7 @@
7
7
  "plugins": [
8
8
  {
9
9
  "name": "claude-memory",
10
- "version": "0.9.1",
10
+ "version": "0.11.0",
11
11
  "source": "./",
12
12
  "description": "Long-term memory for Claude Code. Recalls architecture, conventions, and decisions across sessions — so Claude explains your codebase without file traversal, follows your patterns, and never re-asks what it already learned.",
13
13
  "repository": "https://github.com/codenamev/claude_memory"
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-memory",
3
- "version": "0.9.1",
3
+ "version": "0.11.0",
4
4
  "description": "Long-term memory for Claude Code. Recalls architecture, conventions, and decisions across sessions — so Claude explains your codebase without file traversal, follows your patterns, and never re-asks what it already learned.",
5
5
  "author": {
6
6
  "name": "Valentino Stoll",
data/CHANGELOG.md CHANGED
@@ -4,6 +4,136 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [0.11.0] - 2026-04-30
8
+
9
+ Theme: **Trust & Cost** — five user-visible signals that answer "is memory still worth it?" with numbers a skeptical user can read in <30 seconds.
10
+
11
+ ### Added
12
+
13
+ - **Token budget telemetry** — every successful SessionStart context injection now records an estimated `context_tokens` count on its `activity_events` row. Surfaced three ways:
14
+ - Dashboard Trust panel emits a `token_budget` block with p50/p95/avg/sample_size over the last 30 days, so the JSON dashboard endpoint and any downstream consumer answer "what does memory cost per session?"
15
+ - `claude-memory digest` includes a "Context cost" subsection between activity and new-knowledge so the weekly report shows the price tag next to the value.
16
+ - `claude-memory stats --tokens [--since DAYS]` reports total sessions, p50/p95/avg/min/max, and a histogram across <500 / 500-1k / 1-2k / 2-5k / 5k+ buckets.
17
+ - Pure additive — no schema migration. Historical events written before this release simply contribute zero samples until new injections accumulate.
18
+ - First 0.11.0 milestone item from the 1.0 punchlist (Trust & Cost). Closes the "what % of my SessionStart token budget does memory consume?" gap.
19
+ - **Hallucination rate metric** — the dashboard now quantifies how clean the fact base is, not just how full it is. `Distill::BareConclusionDetector` is the production-side mirror of the SessionStart prompt's reason-clause requirement (decision/convention facts must embed "because…" / "so that…" / "to avoid…"). Surfaced two ways:
20
+ - Dashboard Trust panel emits a `quality_score` block aggregating across project + global active facts: `suspect_count` (predicate=reference, retagged by ReferenceMaterialDetector), `bare_conclusion_count`, percentages, and an overall 0–100 score (higher = cleaner). Returns 100 on empty stores so fresh installs aren't penalized.
21
+ - `claude-memory digest` includes a "Quality" section showing the score breakdown plus the in-window rejection rate ("of facts created in the last 7 days, X% have been rejected since"), so calibration drift is visible.
22
+ - Second 0.11.0 milestone item. Pairs with token-budget telemetry to answer "is memory still worth its cost?" via two skeptic-friendly numbers.
23
+ - **`claude-memory show`** — new CLI command prints what memory would inject at the next SessionStart in plain Markdown. Runs the exact `Hook::ContextInjector` path real sessions use, so output matches what Claude actually receives. Footer reports fact count, ~token estimate, and char count so users see the SessionStart cost at a glance.
24
+ - Default suppresses the raw-transcript "Pending Knowledge Extraction" dump (intended for LLM distillation, not human reading); pass `--pending` to include it.
25
+ - `--source SOURCE` (startup/resume/clear) simulates each fresh-session entrypoint so users can preview which sections would appear.
26
+ - Third 0.11.0 milestone item. Closes the inspectability gap — trust requires being able to see what memory will inject, the same way `cat CLAUDE.md` works.
27
+ - **First-week ROI nudge** — at SessionEnd, memory now prints `memory contributed N facts this session, %used = X` for the first 10 sessions, then quiets. New users get user-visible proof memory is doing work for them without having to know about the dashboard. Once trust is established (or it isn't), the nudge gets out of the way.
28
+ - New `claude-memory hook nudge` subcommand + `Hook::Handler#nudge`. SessionEnd config now wires `[ingest, sweep, nudge]` in order.
29
+ - Silent on `CLAUDE_MEMORY_NO_NUDGE=1` opt-out, missing session_id, n=0 contributions, and after MAX_NUDGES emissions. The empty-session silent path doesn't burn a slot — quiet sessions don't count toward the 10.
30
+ - Activity event `roi_nudge` records `{n, used, pct, prior_count}` per emission so a future migration could change the threshold without re-counting from raw events.
31
+ - Fourth 0.11.0 milestone item. Cold-start trust signal that pairs with #47 (token cost) and #48 (quality) to make the first-week answer to "is this worth it?" visible without effort.
32
+ - **Harm benchmark prototype** — `spec/benchmarks/dataset/harm_scenarios.yml` + `spec/benchmarks/e2e/harm_bench_spec.rb`. Three hand-written cases spanning the riskiest harm classes (stale_tech, mismatched_scope, superseded_undetected). The first ClaudeMemory benchmark that measures whether memory can make Claude *wrong* — every other benchmark only measures whether memory helps.
33
+ - Structure validation (regex compile, fact loadability, harm-class coverage) runs in stub mode as part of `:benchmark` tag.
34
+ - Real-mode runner: `EVAL_MODE=real bundle exec rspec spec/benchmarks/e2e/harm_bench_spec.rb` — needs `claude` CLI on PATH, ~$2-8 per run. Reports harm rate; doesn't enforce a threshold yet (that's the 0.12 release gate).
35
+ - 0.11.0 risk-de-risking item. If even one of these three surfaces a harm now, the full 10-15-case benchmark planned for 0.12 will likely reveal a fundamental issue — better to learn that at 0.11 than at 0.12. **Real-mode prototype run on 2026-04-30 reported 0/3 harm** — green light to expand to the full corpus in 0.12.
36
+
37
+ ### Changed
38
+
39
+ - **Hallucination-rate metric calibration** — `Dashboard::Trust#quality_score` now reports a windowed (last 30d) "live" score as the headline plus a "historical" block over all active facts. Production verification on 2026-04-30 (recorded in `docs/quality_review.md`) showed the unwindowed metric was technically correct but pragmatically misleading: 97% of bare-conclusion facts pre-dated the 2026-04-20 reason-clause prompt commit, and the entire 7-day rejection cluster was a single-class systemic failure (a `/study-repo` burst), not ongoing noise. The split makes the metric actionable: live score = ongoing extraction quality, historical = legacy data. The digest's "Quality" section uses the live score as the headline.
40
+
41
+ ### Fixed
42
+
43
+ - Real-eval CLI runner now passes `allowed_tools` through explicitly so the harm benchmark and other real-mode benches can pre-allow MCP memory tools without per-test wiring.
44
+
45
+ ### Upgrade Notes
46
+
47
+ - No schema migration. All new features ship purely additive.
48
+ - Hooks run the installed gem from PATH, not the working tree. After upgrading, `bundle exec rake install` (or `gem install claude_memory`) is required for the new SessionEnd nudge, `claude-memory show` command, `--tokens` stats flag, and `context_tokens` activity-event field to actually fire on real hook events.
49
+ - Existing `quality_score` consumers will see additional fields (`window_days`, `historical`) in the snapshot. The original keys (`score`, `total_active`, `suspect_count`, `bare_conclusion_count`, `suspect_pct`, `bare_pct`) remain at the top level and now reflect the 30-day live window — historical numbers move to the `historical` sub-hash.
50
+
51
+ ## [0.10.0] - 2026-04-28
52
+
53
+ ### Added
54
+
55
+ **Dashboard — feed-first redesign with observability built in**
56
+
57
+ - New feed-first dashboard UI with scope-aware moments, fact detail modal, query tester, and activity drilldown. Reuse, Trust, Knowledge, Conflicts, and Moments panels each backed by a dedicated module (`Dashboard::{Reuse, Trust, Knowledge, Conflicts, Moments}`) under unit tests, replacing the prior all-in-API-class layout.
58
+ - 👍/👎 feedback on individual moments with persisted verdicts (schema v16, `moment_feedback` table). Trust panel surfaces a 30-day up/down ratio so the dashboard can answer "when memory surfaces something, are users marking it useful?".
59
+ - Utilization ratio panel — of facts extracted in the last 30 days, how many has Claude actually used in a recall or context injection? Color-coded (green ≥40%, yellow ≥15%, red below). Hidden on fresh installs to avoid misleading zeros.
60
+ - Conflict deduping at the display layer: identical (subject, predicate, object_pair) detections collapse into one row with a `×N` badge. Sidebar "Needs review" count now reflects distinct contradictions, not raw row count.
61
+ - Activity events drilldown: each moment opens a payload modal with prettified JSONL, recall trigger correlation (which user prompt motivated this lookup), and linked-fact resolution scoped per database.
62
+ - Vector index health threshold and clickable remediation hints in the health dashboard.
63
+
64
+ **CLI — observability surfaces and one-shot cleanups**
65
+
66
+ - `claude-memory digest [--since DAYS] [--output FILE]` — weekly markdown report. Sections: Activity, New knowledge by predicate, Utilization (extracted vs used), Conflicts, Feedback. No new schema; renders from existing aggregates.
67
+ - `claude-memory census [--root DIR]` — privacy-safe cross-project vocabulary scan. Aggregates per-DB predicate × status counts, novel predicates, synonym candidates. Suppresses object literals, entity names, and paths; per-DB IDs are SHA256-prefixed.
68
+ - `claude-memory dedupe-conflicts [--scope SCOPE] [--dry-run]` — one-shot cleanup for historical conflict-row duplication that predates the Resolver dedup fix (commit f571ba4). Groups by (subject, predicate, normalized object pair), keeps the earliest, migrates provenance to the keeper.
69
+ - `claude-memory reclassify-references [--scope SCOPE] [--dry-run]` — retags active convention facts that the new `Distill::ReferenceMaterialDetector` flags as reference material (LOC counts, star counts, "X is a plugin..." templates, "by Firstname Lastname" attributions).
70
+
71
+ **Memory quality**
72
+
73
+ - Access-based staleness scoring (improvements.md #35). Schema v17 adds `last_recalled_at` to facts. `Sweep::RecallTimestampRefresher` derives the field periodically from activity_events; `claude-memory stats --stale [--stale-days N]` lists facts that haven't been recalled inside the threshold. Replaces the prior "active facts minus seen-in-recalls" approximation.
74
+ - Auto-memory mirror (improvements.md #36). On fresh sessions, the SessionStart context hook scans `~/.claude/projects/<slug>/memory/*.md` and surfaces new or changed entries as extraction candidates so users can promote auto-memory observations into claude_memory without manual copy-paste.
75
+ - Reasoning requirement enforced in distillation (improvements.md #34). The SessionStart prompt and the `/distill-transcripts` skill now require a why clause for `decision` and `convention` predicates ("because…", "so that…", etc.). Audit found ~75% of facts were bare conclusions before this change.
76
+ - `Distill::ReferenceMaterialDetector` reclassifies convention facts whose object text matches reference patterns. New `reference` predicate registered in `PredicatePolicy` with its own `:references` snapshot section. Detector runs at write time in `ManagementHandlers#store_extraction` so mislabeling can't persist.
77
+ - Predicate census command (#30) for cross-project vocabulary audits — see CLI section above.
78
+
79
+ **Benchmarks and observability**
80
+
81
+ - Repeat-correction benchmark harness (improvements.md #32). `spec/benchmarks/e2e/repeat_correction_spec.rb` pre-loads a past correction as a memory fact, runs the prompt through real Claude under `EVAL_MODE=real`, and reports pass rate (no violation patterns matched). Starter set of 2 scenarios drawn from this project's recurring gotchas.
82
+ - Relevance ratio metric (improvements.md #31). `Hook::ContextInjector#emitted_subjects` exposes the subjects injected at SessionStart; `BenchmarkHelpers::RelevanceMetrics` measures whether they appear in Claude's response. Trend signal for memory-application quality, integrated into `devmemeval_spec.rb`.
83
+ - MCP server embeds the V=R/C ("Verify before Recommend / Correct") mental model in agent instructions so memory recommendations come with built-in verification cues.
84
+
85
+ **Schema v15 → v17 (additive only, automatic on first run)**
86
+
87
+ - Migration 015: adds `activity_events` table for hook/recall/context/sweep telemetry. Powers the dashboard timeline, moments feed, and efficacy reports.
88
+ - Migration 016: adds `moment_feedback` table (unique on event_id) for the dashboard 👍/👎 surface.
89
+ - Migration 017: adds nullable `facts.last_recalled_at` for access-based staleness scoring.
90
+
91
+ **1.0 readiness track**
92
+
93
+ - New `docs/1_0_punchlist.md` opens the path to 1.0: token-budget telemetry, hallucination-rate metric, negative-fact harm benchmark, CLAUDE.md baseline publication, `claude-memory show`, benchmark scoreboard. Ten entries (#47-56) added to `docs/improvements.md` with concrete file:line plumbing notes.
94
+
95
+ ### Changed
96
+
97
+ - `Resolver#apply_conflict` no longer creates a duplicate disputed fact + conflict row when the same contradicting value is re-extracted. Looks up disputed facts in the same (subject, predicate) slot and reinforces with provenance instead.
98
+ - `Resolver` no longer treats the distiller's `scope_hint` as a scope override. `scope_hint` is advisory metadata; `fact.scope` must match the DB the row lives in. Earlier behavior caused scope leakage where global-hinted distillations landed in the project DB.
99
+ - `Hook::ContextInjector` adds `emitted_fact_ids` and `emitted_subjects` accessors so benchmark harnesses can attribute injection contributions per session.
100
+ - `SQLiteStore` decomposed via module inclusion: `LLMCache` and `MetricsAggregator` extracted into `lib/claude_memory/store/`. SQLiteStore back under 600 LOC.
101
+ - `Dashboard::API` decomposed: `FactPresenter`, `Conflicts`, `Efficacy::Reporter`, `Timeline`, `Health` extracted into dedicated classes following the boundary pattern. API now routes/delegates rather than aggregating.
102
+ - Dashboard releases DB connections after each HTTP request (was holding connections open for the lifetime of the WEBrick session).
103
+ - `Sweep::Maintenance` gains `dedupe_open_conflicts` and `reclassify_references` for the one-shot CLI commands above.
104
+ - Round-trip migration specs from v12, v13, v14 → v17 (per-version migrations covered by `spec/claude_memory/store/migrations/`). Codifies the release-blocker convention: any schema bump must round-trip from each prior major-release boundary back ~3 releases.
105
+
106
+ ### Fixed
107
+
108
+ - Dashboard surfaces an actionable hint when Recall hits FTS5 corruption (run `claude-memory compact` rather than a generic error).
109
+ - Dashboard query tester unwraps the nested Recall result shape rather than printing the raw envelope.
110
+ - Dashboard health checks correctly detect the claude-memory hook installation across the two-level Claude Code hooks structure (was reporting false negatives when hooks were installed under a matcher block).
111
+ - Dashboard Efficacy "this session" correlation falls back to a time window when the recall event has no `session_id` (MCP tool calls don't thread session_id).
112
+ - Bulk-reject in the Conflicts modal now retries with an actionable message when the server-side state is stale.
113
+
114
+ ### Upgrade Notes
115
+
116
+ **Schema bump v14 → v17.** Three migrations run automatically on first launch after upgrade. All three are additive (no existing data is rewritten):
117
+
118
+ 1. Migration 015 creates `activity_events` (hook/recall telemetry).
119
+ 2. Migration 016 creates `moment_feedback` (dashboard verdicts).
120
+ 3. Migration 017 adds `facts.last_recalled_at` (NULL by default; `Sweep::RecallTimestampRefresher` populates it on the next sweep cycle from existing activity_events).
121
+
122
+ The migration delta has round-trip spec coverage in `spec/claude_memory/store/migrations/`. Forward-compatibility: 0.10.0 databases cannot be opened by 0.9.x or earlier. Downgrade is destructive — back up `~/.claude/memory.sqlite3` and `.claude/memory.sqlite3` before downgrading.
123
+
124
+ **Optional historical cleanups.** Two new admin commands address data tails left by earlier bugs that have since been fixed at the source:
125
+
126
+ ```bash
127
+ claude-memory dedupe-conflicts --dry-run # preview duplicate conflict rows
128
+ claude-memory dedupe-conflicts # consolidate them
129
+ claude-memory reclassify-references --dry-run # preview reference-material mislabels
130
+ claude-memory reclassify-references # retag them
131
+ ```
132
+
133
+ Both are opt-in. Neither runs in the regular sweep cycle. Use `--scope global` to clean the global DB.
134
+
135
+ **Telemetry footprint.** The `activity_events` table grows with hook activity. The dashboard surfaces this by default and powers the timeline/moments/efficacy panels. Retention pruning is not yet automatic (planned for a follow-up); manual cleanup via `DELETE FROM activity_events WHERE occurred_at < ?` is safe — the dashboard tolerates missing history.
136
+
7
137
  ## [0.9.1] - 2026-04-16
8
138
 
9
139
  ### Fixed
data/CLAUDE.md CHANGED
@@ -163,7 +163,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
163
163
  - Each command is a separate class (HelpCommand, DoctorCommand, etc.)
164
164
  - All commands inherit from BaseCommand
165
165
  - Dependency injection for I/O (stdout, stderr, stdin)
166
- - 28 commands total, each focused on single responsibility
166
+ - 34 commands total, each focused on single responsibility
167
167
 
168
168
  - **`Configuration`**: Centralized ENV access (`configuration.rb`)
169
169
  - Single source of truth for paths and environment variables
@@ -208,6 +208,9 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
208
208
  - **`Distill`**: Fact extraction interface (`distill/`)
209
209
  - Pluggable distiller design (current: NullDistiller stub)
210
210
  - Extracts entities, facts, scope hints from content
211
+ - `ReferenceMaterialDetector`: classifies "X is a plugin/library/tool" templates, LOC counts, "by Firstname Lastname" attributions as reference material. Runs in `ManagementHandlers#store_extraction` so mislabeling can't persist
212
+ - `BareConclusionDetector` (0.11.0+): production-side mirror of the SessionStart prompt's reason-clause requirement. Pure function — flags `decision` / `convention` facts whose object lacks a reason-clause signal ("because", "so that", "to avoid", etc.). Powers the `quality_score` metric on the Trust panel and the digest's Quality section.
213
+ - SessionStart distillation prompt enforces reason clauses ("because…", "so that…") for `decision` and `convention` predicates — bare conclusions are explicitly disallowed
211
214
 
212
215
  - **`Resolve`**: Truth maintenance and conflict resolution (`resolve/`)
213
216
  - Determines equivalence, supersession, or conflicts
@@ -226,7 +229,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
226
229
  - Modes: shared (repo), local (uncommitted), home (user directory)
227
230
 
228
231
  - **`MCP`**: Model Context Protocol server and tools (`mcp/`)
229
- - Exposes memory tools to Claude Code (24 tools total)
232
+ - Exposes memory tools to Claude Code (25 tools total)
230
233
  - `Telemetry`: Records tool invocations to `mcp_tool_calls` table for usage stats
231
234
  - Dual content/structuredContent responses with compact mode
232
235
 
@@ -234,6 +237,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
234
237
  - Reads stdin JSON from Claude Code hooks
235
238
  - Routes to ingest/sweep/publish commands
236
239
  - `DistillationRunner`: Manages context hook injection with undistilled content for LLM extraction
240
+ - `AutoMemoryMirror` (0.10.0): On fresh sessions, scans `~/.claude/projects/<slug>/memory/*.md` for new/changed entries and surfaces them as extraction candidates in the SessionStart context. State diffed by md5 in `.claude/auto_memory_mirror.json`; bounded to 5 candidates per session, 1500 chars each.
237
241
 
238
242
  ### Database Schema
239
243
 
@@ -246,16 +250,19 @@ Key tables (defined in `sqlite_store.rb`):
246
250
  - `fact_links`: Supersession and conflict relationships
247
251
  - `conflicts`: Open contradictions
248
252
  - `mcp_tool_calls`: MCP server tool invocation telemetry (schema v13)
253
+ - `activity_events`: Hook/recall/context/sweep/nudge telemetry (schema v15) — powers the dashboard timeline, moments feed, efficacy reports. Event types: `hook_ingest`, `hook_context` (carries `context_tokens` since 0.11.0), `hook_sweep`, `hook_publish`, `recall`, `store_extraction`, `roi_nudge` (since 0.11.0).
254
+ - `moment_feedback`: Per-moment 👍/👎 verdicts with optional notes (schema v16) — unique on event_id, repeat clicks upsert
249
255
 
250
256
  Facts include:
251
257
  - `scope`: "global" or "project" (determines applicability)
252
258
  - `project_path`: Set for project-scoped facts
253
259
  - `valid_from`/`valid_to`: Temporal validity window
260
+ - `last_recalled_at` (schema v17): Set by `Sweep::RecallTimestampRefresher` from activity_events; powers `claude-memory stats --stale` and the dashboard's "stale" needs-review count
254
261
 
255
262
  ### Scope System
256
263
 
257
264
  Facts are scoped to control where they apply:
258
- - **project**: Current project only (e.g., "this app uses PostgreSQL")
265
+ - **project**: Current project only (e.g., "claude_memory uses SQLite for storage")
259
266
  - **global**: All projects (e.g., "I prefer 4-space indentation")
260
267
 
261
268
  Distiller detects signals like "always", "in all projects", "my preference" and sets `scope_hint: "global"`. Users can manually promote facts via `claude-memory promote <fact_id>` or the `memory.promote` MCP tool.
@@ -325,7 +332,7 @@ Also update `SECTION_MAP` if the predicate should appear in a specific snapshot
325
332
 
326
333
  - `lib/claude_memory.rb`: Main module, requires, database path helpers
327
334
  - `lib/claude_memory/cli.rb`: Thin command router (41 lines)
328
- - `lib/claude_memory/commands/`: Individual command classes (28 commands)
335
+ - `lib/claude_memory/commands/`: Individual command classes (34 commands)
329
336
  - `lib/claude_memory/configuration.rb`: Centralized configuration and ENV access
330
337
  - `lib/claude_memory/domain/`: Domain models (Fact, Entity, Provenance, Conflict)
331
338
  - `lib/claude_memory/core/`: Value objects and null objects
@@ -340,14 +347,14 @@ Also update `SECTION_MAP` if the predicate should appear in a specific snapshot
340
347
 
341
348
  The gem includes an MCP server (`claude-memory serve-mcp`) that exposes memory operations as tools. Configuration should be in `.mcp.json` at project root.
342
349
 
343
- Available MCP tools (24 total):
350
+ Available MCP tools (25 total):
344
351
  - **Query & Recall**: `memory.recall`, `memory.recall_index`, `memory.recall_details`, `memory.recall_semantic`, `memory.search_concepts`
345
352
  - **Provenance**: `memory.explain`, `memory.fact_graph`
346
353
  - **Shortcuts**: `memory.decisions`, `memory.conventions`, `memory.architecture`
347
354
  - **Context**: `memory.facts_by_tool`, `memory.facts_by_context`
348
355
  - **Management**: `memory.promote`, `memory.reject_fact`, `memory.store_extraction`
349
356
  - **Distillation**: `memory.undistilled`, `memory.mark_distilled`
350
- - **Monitoring**: `memory.status`, `memory.stats`, `memory.changes`, `memory.conflicts`
357
+ - **Monitoring**: `memory.status`, `memory.stats`, `memory.changes`, `memory.conflicts`, `memory.activity`
351
358
  - **Maintenance**: `memory.sweep_now`
352
359
  - **Discovery**: `memory.check_setup`, `memory.list_projects`
353
360
 
@@ -367,8 +374,25 @@ ClaudeMemory integrates with Claude Code via hooks in `.claude/settings.json`:
367
374
  - Runs time-bounded maintenance on both databases
368
375
  - Cleans up vec0 entries for superseded/expired facts
369
376
 
377
+ - **Nudge hook** (0.11.0+): Triggers on SessionEnd, fires after ingest+sweep
378
+ - Calls `claude-memory hook nudge`
379
+ - For the first 10 sessions only, prints "memory contributed N facts this session, %used = X" to stdout so new users see ROI inline before they discover the dashboard
380
+ - Records `roi_nudge` activity_events; quiets after `MAX_NUDGES` emissions
381
+ - Opt out with `CLAUDE_MEMORY_NO_NUDGE=1` (no event recorded on opt-out)
382
+ - Empty sessions (n=0) silently no-op so quiet sessions don't burn nudge slots
383
+
370
384
  Hook commands read JSON payloads from stdin for robustness. Supports `--async` flag for non-blocking execution.
371
385
 
386
+ ## Dashboard
387
+
388
+ Local web UI for inspecting memory state. Started via `claude-memory dashboard` (default port 3377). Reads from both global and project databases; no write side effects from page loads.
389
+
390
+ The dashboard is a thin web layer over the same `Recall`/`Conflicts`/`Trust`/`Moments`/`Knowledge`/`Reuse`/`Health`/`Timeline` classes the MCP server uses. Each panel is backed by a dedicated module under `lib/claude_memory/dashboard/`; `Dashboard::API` holds HTTP-shape glue and per-endpoint formatting (delegating non-trivial logic to the panel classes).
391
+
392
+ Connections are released after each request — never holds a WAL writer lock open across page loads.
393
+
394
+ See [docs/dashboard.md](docs/dashboard.md) for the user-facing guide (panels, common workflows, related CLI commands).
395
+
372
396
  ## Code Style
373
397
 
374
398
  This project uses [Standard Ruby](https://github.com/standardrb/standard) for linting. Run `bundle exec rake standard:fix` before committing.
data/README.md CHANGED
@@ -140,6 +140,69 @@ File-searchable questions ("what version is this?") and one-shot code generation
140
140
  - **Claude-Powered**: Uses Claude's intelligence to extract facts (no API key needed)
141
141
  - **Token Efficient**: 10x reduction in memory queries with progressive disclosure
142
142
  - **Database Maintenance**: Compact, export, and backup commands
143
+ - **Built-in Observability** (0.10.0+): `claude-memory dashboard` opens a local web UI with a moments feed, trust panel (token budget, quality score, utilization, feedback), conflicts dedup, knowledge index, and 👍/👎 feedback. See **[Dashboard guide →](docs/dashboard.md)**. `claude-memory digest` writes a weekly markdown report (Activity, Context cost, Quality, New knowledge, Utilization, Conflicts, Feedback); `claude-memory show` prints what would be injected next SessionStart; `claude-memory census` audits the predicate vocabulary across projects.
144
+
145
+ ## What's New in 0.11.0
146
+
147
+ Five user-visible signals so you can answer "is memory still worth it?" with
148
+ numbers, not vibes:
149
+
150
+ - **Token budget telemetry** — every SessionStart context injection now
151
+ records its estimated `context_tokens`. `claude-memory stats --tokens
152
+ [--since DAYS]` reports p50/p95/avg/min/max plus a histogram across
153
+ <500 / 500-1k / 1-2k / 2-5k / 5k+ buckets so you can see the per-session
154
+ cost at a glance. The dashboard's Trust panel and `claude-memory digest`
155
+ surface the same numbers.
156
+ - **Hallucination-rate metric** — the dashboard now scores how *clean* the
157
+ fact base is, not just how full it is. `Distill::BareConclusionDetector`
158
+ flags `decision` / `convention` facts that skipped the reason-clause
159
+ requirement. Trust panel shows `quality_score` (live 30-day window with
160
+ historical baseline beneath). `claude-memory digest` adds a Quality
161
+ section with rejection rate.
162
+ - **`claude-memory show`** — new command prints what memory *would* inject
163
+ at the next SessionStart in plain Markdown. Footer reports fact count,
164
+ ~token estimate, and char count so you see the cost at a glance. Default
165
+ hides the raw-transcript "Pending Knowledge" dump for readability;
166
+ `--pending` opts in. `--source startup|resume|clear` simulates each
167
+ fresh-session entrypoint.
168
+ - **First-week ROI nudge** — at SessionEnd, memory now prints
169
+ `memory contributed N facts this session, %used = X` for the first 10
170
+ sessions, then quiets. Cold-start trust signal — you don't have to know
171
+ about the dashboard. Opt out with `CLAUDE_MEMORY_NO_NUDGE=1`.
172
+ - **Harm benchmark prototype** — first ClaudeMemory benchmark that
173
+ measures whether memory can make Claude *wrong*. Three hand-written
174
+ cases (stale-tech, mismatched-scope, superseded-but-undetected) under
175
+ `spec/benchmarks/e2e/harm_bench_spec.rb`. Real-mode run on the 0.11
176
+ release reported 0/3 harm; the full 10-15-case corpus + release gate
177
+ lands in 0.12.
178
+
179
+ ## What's New in 0.10.0
180
+
181
+ Three behavior changes worth knowing about — they affect what you'll see in
182
+ extracted facts and SessionStart context, even if you don't change anything:
183
+
184
+ - **Auto-memory mirror** — On fresh sessions, the SessionStart context hook
185
+ scans `~/.claude/projects/<slug>/memory/*.md` and surfaces new or changed
186
+ entries as candidates for extraction into ClaudeMemory. You'll see a
187
+ "Pending Knowledge Extraction" section in Claude's startup context citing
188
+ files from your auto-memory directory. Claude reviews these and calls
189
+ `memory.store_extraction` for the high-signal ones; you don't need to
190
+ copy-paste manually anymore.
191
+ - **Why-clause enforcement** — When Claude distills `decision` and
192
+ `convention` facts, it's now required to embed a reason ("…because…",
193
+ "…so that…", "…to avoid…"). A bare conclusion is dead weight; a fact with
194
+ a reason stays useful when the situation changes. You'll see this
195
+ reflected in fact text being longer and more justified.
196
+ - **Reference predicate** — Active facts that look like reference material
197
+ (LOC counts, "X is a plugin/library/tool" templates, "by Firstname
198
+ Lastname" attributions) are auto-tagged `predicate=reference` instead of
199
+ `convention`. Keeps the conventions list signal-rich. Browse them in the
200
+ dashboard's Knowledge → References section, or run
201
+ `claude-memory reclassify-references --dry-run` to see candidates.
202
+
203
+ Plus: **staleness detection** (`claude-memory stats --stale`) lists active
204
+ facts that haven't been recalled in N days, so you can prune dead weight
205
+ explicitly. The dashboard's Trust → Needs review panel surfaces the count.
143
206
 
144
207
  ## Privacy Control
145
208
 
@@ -241,7 +304,8 @@ The uninstall command removes:
241
304
 
242
305
  - 📖 [Getting Started](docs/GETTING_STARTED.md) - Step-by-step onboarding
243
306
  - 💡 [Examples](docs/EXAMPLES.md) - Use cases and workflows
244
- - 🔧 [Plugin Setup](docs/PLUGIN.md) - Claude Code integration
307
+ - 📊 [Dashboard](docs/dashboard.md) - Local web UI for inspection and trust signals (0.10.0+)
308
+ - 🔧 [Plugin Setup](docs/plugin.md) - Claude Code integration
245
309
  - 🏗️ [Architecture](docs/architecture.md) - Technical deep dive
246
310
  - 📝 [Changelog](CHANGELOG.md) - Release notes
247
311
 
@@ -292,7 +356,7 @@ The benchmark dataset draws from real CLAUDE.md patterns and is designed specifi
292
356
 
293
357
  - **Language:** Ruby 3.2+
294
358
  - **Storage:** SQLite3 (no external services)
295
- - **Testing:** 1477 examples (1375 unit/integration + 102 benchmarks/evals), 100% core coverage
359
+ - **Testing:** 1964 examples (~1700 unit/integration + ~250 benchmarks/evals), 100% core coverage
296
360
  - **Code Style:** Standard Ruby
297
361
 
298
362
  ```bash
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Migration v15: Add activity_events table for debugging and observability
4
+ # Tracks hook executions, memory recalls, context injections, and sweep operations.
5
+ # Powers the dashboard timeline and efficacy reports.
6
+ Sequel.migration do
7
+ up do
8
+ create_table?(:activity_events) do
9
+ primary_key :id
10
+ String :event_type, null: false # "hook_ingest", "hook_context", "hook_sweep", "recall", "store_extraction"
11
+ String :session_id # Claude session that triggered the event
12
+ String :status, null: false # "success", "skipped", "error"
13
+ Integer :duration_ms # How long the operation took
14
+ String :detail_json, text: true # Event-specific details (JSON)
15
+ String :occurred_at, null: false # ISO 8601 timestamp
16
+ end
17
+
18
+ run "CREATE INDEX IF NOT EXISTS idx_activity_events_type ON activity_events(event_type)"
19
+ run "CREATE INDEX IF NOT EXISTS idx_activity_events_occurred_at ON activity_events(occurred_at)"
20
+ run "CREATE INDEX IF NOT EXISTS idx_activity_events_session ON activity_events(session_id)"
21
+ end
22
+
23
+ down do
24
+ drop_table?(:activity_events)
25
+ end
26
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Migration v16: Per-moment feedback (improvements.md #43).
4
+ # Tracks a single thumbs-up/down verdict (+ optional note) per activity_event
5
+ # so the dashboard can surface a trust-calibration signal. Unique on event_id
6
+ # so a given moment has at most one current verdict; repeat clicks upsert.
7
+ Sequel.migration do
8
+ up do
9
+ create_table?(:moment_feedback) do
10
+ primary_key :id
11
+ Integer :event_id, null: false
12
+ String :verdict, null: false # "up" | "down"
13
+ String :note, text: true # optional freeform note
14
+ String :recorded_at, null: false
15
+ index :event_id, unique: true
16
+ end
17
+ end
18
+
19
+ down do
20
+ drop_table?(:moment_feedback)
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Migration v17: Access-based staleness scoring (improvements.md #35).
4
+ # Records the last time a fact was surfaced via memory.recall or context
5
+ # injection, derived periodically from activity_events. Sweep-derived rather
6
+ # than per-call so we avoid WAL write contention on the recall hot path.
7
+ Sequel.migration do
8
+ up do
9
+ add_column :facts, :last_recalled_at, String
10
+ end
11
+
12
+ down do
13
+ drop_column :facts, :last_recalled_at
14
+ end
15
+ end