RubyGems - claude_memory - Versions diffs - 0.9.1 → 0.11.0 - Mend

claude_memory 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

checksums.yaml +4 -4
data/.claude/memory.sqlite3 +0 -0
data/.claude/skills/dashboard/SKILL.md +42 -0
data/.claude-plugin/marketplace.json +1 -1
data/.claude-plugin/plugin.json +1 -1
data/CHANGELOG.md +130 -0
data/CLAUDE.md +30 -6
data/README.md +66 -2
data/db/migrations/015_add_activity_events.rb +26 -0
data/db/migrations/016_add_moment_feedback.rb +22 -0
data/db/migrations/017_add_last_recalled_at.rb +15 -0
data/docs/1_0_punchlist.md +371 -0
data/docs/EXAMPLES.md +41 -2
data/docs/GETTING_STARTED.md +33 -4
data/docs/architecture.md +22 -7
data/docs/audit-queries.md +131 -0
data/docs/dashboard.md +192 -0
data/docs/improvements.md +650 -9
data/docs/influence/cq.md +187 -0
data/docs/plugin.md +13 -6
data/docs/quality_review.md +524 -172
data/docs/reflection_memory_as_accumulating_judgment.md +67 -0
data/lib/claude_memory/activity_log.rb +86 -0
data/lib/claude_memory/commands/census_command.rb +210 -0
data/lib/claude_memory/commands/completion_command.rb +3 -0
data/lib/claude_memory/commands/dashboard_command.rb +54 -0
data/lib/claude_memory/commands/dedupe_conflicts_command.rb +55 -0
data/lib/claude_memory/commands/digest_command.rb +273 -0
data/lib/claude_memory/commands/hook_command.rb +61 -2
data/lib/claude_memory/commands/initializers/hooks_configurator.rb +7 -4
data/lib/claude_memory/commands/reclassify_references_command.rb +56 -0
data/lib/claude_memory/commands/registry.rb +7 -1
data/lib/claude_memory/commands/show_command.rb +90 -0
data/lib/claude_memory/commands/skills/distill-transcripts.md +13 -1
data/lib/claude_memory/commands/stats_command.rb +131 -2
data/lib/claude_memory/commands/sweep_command.rb +2 -0
data/lib/claude_memory/configuration.rb +16 -0
data/lib/claude_memory/core/relative_time.rb +9 -0
data/lib/claude_memory/dashboard/api.rb +610 -0
data/lib/claude_memory/dashboard/conflicts.rb +279 -0
data/lib/claude_memory/dashboard/efficacy.rb +127 -0
data/lib/claude_memory/dashboard/fact_presenter.rb +109 -0
data/lib/claude_memory/dashboard/health.rb +175 -0
data/lib/claude_memory/dashboard/index.html +2707 -0
data/lib/claude_memory/dashboard/knowledge.rb +136 -0
data/lib/claude_memory/dashboard/moments.rb +244 -0
data/lib/claude_memory/dashboard/reuse.rb +97 -0
data/lib/claude_memory/dashboard/scoped_fact_resolver.rb +95 -0
data/lib/claude_memory/dashboard/server.rb +211 -0
data/lib/claude_memory/dashboard/timeline.rb +68 -0
data/lib/claude_memory/dashboard/trust.rb +454 -0
data/lib/claude_memory/distill/bare_conclusion_detector.rb +71 -0
data/lib/claude_memory/distill/reference_material_detector.rb +78 -0
data/lib/claude_memory/hook/auto_memory_mirror.rb +112 -0
data/lib/claude_memory/hook/context_injector.rb +97 -3
data/lib/claude_memory/hook/handler.rb +191 -3
data/lib/claude_memory/mcp/handlers/management_handlers.rb +8 -0
data/lib/claude_memory/mcp/query_guide.rb +11 -0
data/lib/claude_memory/mcp/text_summary.rb +29 -0
data/lib/claude_memory/mcp/tool_definitions.rb +13 -0
data/lib/claude_memory/mcp/tools.rb +148 -0
data/lib/claude_memory/publish.rb +13 -21
data/lib/claude_memory/recall/stale_detector.rb +67 -0
data/lib/claude_memory/resolve/predicate_policy.rb +2 -0
data/lib/claude_memory/resolve/resolver.rb +41 -11
data/lib/claude_memory/store/llm_cache.rb +68 -0
data/lib/claude_memory/store/metrics_aggregator.rb +96 -0
data/lib/claude_memory/store/schema_manager.rb +1 -1
data/lib/claude_memory/store/sqlite_store.rb +47 -143
data/lib/claude_memory/store/store_manager.rb +29 -0
data/lib/claude_memory/sweep/maintenance.rb +216 -0
data/lib/claude_memory/sweep/recall_timestamp_refresher.rb +83 -0
data/lib/claude_memory/sweep/sweeper.rb +2 -0
data/lib/claude_memory/templates/hooks.example.json +5 -0
data/lib/claude_memory/version.rb +1 -1
data/lib/claude_memory.rb +24 -0
metadata +51 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b6df0a3f58a88c1bbec82ec20e26789d51ad2712408d058337a196c5eac90654
-  data.tar.gz: beb9c2ef59ef6a45430eeb03466e37f6b1f741ef1745b5303a1443b02a7c84b4
+  metadata.gz: c2164011e2c50c7fdb0bcad468a25814f372384c3a49fa4c9414313ab3975e00
+  data.tar.gz: 3e2843979d9b9e0d4a21bfa3650f6cd6843ce18d2a95af884e303572259bca62
 SHA512:
-  metadata.gz: '06905bca1f77df5642caf0846cde7394ba9a1baf3c954138383ac39927fcaae2ef097ff79dd3c866e6930fa0eac0d0fb958366bded54a0616d8e356a316e616c'
-  data.tar.gz: 9a8e3c455c20ae616bc239b766e1d4e2aa4c6e5448f494294d9c6a646a8a613428e9b63218624c5cae7e30f389704dd3bee6b788e97a369cb719b115abffddd7
+  metadata.gz: 6c074b607c1e4f13743de36bb2074495d0ad24d0c826b62b49e0e827f311e3424bc881f42236db22a92dd2d5281e6ef13ca450966b1d9438ac1b36ceaa3ab2ce
+  data.tar.gz: 4e06c8fed9c323974ee4d7e5b41386ee4682ba5ba88b67797ac6864bbdf03663e5b74cb33497a371a8263eff4c24d405708a80e4862c4f578b221286bf40b236

data/.claude/memory.sqlite3 CHANGED Viewed

Binary file

data/.claude/skills/dashboard/SKILL.md ADDED Viewed

@@ -0,0 +1,42 @@
+---
+name: dashboard
+description: Launch a local web dashboard for ClaudeMemory debugging and observability
+---
+# Dashboard
+Launch the ClaudeMemory debugging dashboard to visualize memory system health, activity, and efficacy.
+## Task
+Start the dashboard web server so the user can inspect what's happening behind the scenes.
+## Steps
+1. Run the dashboard command:
+```bash
+claude-memory dashboard
+```
+This starts a local web server (default port 3377) and opens it in the browser.
+## What the Dashboard Shows
+- **Health Status**: Database health, hook configuration, vector index status
+- **Overview**: Fact/entity/content counts, top predicates, entity type distribution, 30-day activity timeline
+- **Activity**: Live event log of hook executions (ingest, context, sweep), memory recalls, and store extractions with timing and details
+- **Facts**: Searchable fact explorer with status filtering, predicate/object search
+- **Efficacy**: Recall hit rate, total results served, average results per query, top queries by result count
+## Options
+- `--port PORT` - Use a different port (default: 3377)
+- `--no-open` - Don't auto-open the browser
+## Notes
+- Dashboard auto-refreshes every 30 seconds
+- Activity events are recorded by hooks and MCP tools into the `activity_events` table
+- The dashboard reads from both global and project databases
+- Press Ctrl+C to stop the server

data/.claude-plugin/marketplace.json CHANGED Viewed

@@ -7,7 +7,7 @@
   "plugins": [
     {
       "name": "claude-memory",
-      "version": "0.9.1",
+      "version": "0.11.0",
       "source": "./",
       "description": "Long-term memory for Claude Code. Recalls architecture, conventions, and decisions across sessions — so Claude explains your codebase without file traversal, follows your patterns, and never re-asks what it already learned.",
       "repository": "https://github.com/codenamev/claude_memory"

data/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-memory",
-  "version": "0.9.1",
+  "version": "0.11.0",
   "description": "Long-term memory for Claude Code. Recalls architecture, conventions, and decisions across sessions — so Claude explains your codebase without file traversal, follows your patterns, and never re-asks what it already learned.",
   "author": {
     "name": "Valentino Stoll",

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,136 @@ All notable changes to this project will be documented in this file.
 ## [Unreleased]
+## [0.11.0] - 2026-04-30
+Theme: **Trust & Cost** — five user-visible signals that answer "is memory still worth it?" with numbers a skeptical user can read in <30 seconds.
+### Added
+- **Token budget telemetry** — every successful SessionStart context injection now records an estimated `context_tokens` count on its `activity_events` row. Surfaced three ways:
+  - Dashboard Trust panel emits a `token_budget` block with p50/p95/avg/sample_size over the last 30 days, so the JSON dashboard endpoint and any downstream consumer answer "what does memory cost per session?"
+  - `claude-memory digest` includes a "Context cost" subsection between activity and new-knowledge so the weekly report shows the price tag next to the value.
+  - `claude-memory stats --tokens [--since DAYS]` reports total sessions, p50/p95/avg/min/max, and a histogram across <500 / 500-1k / 1-2k / 2-5k / 5k+ buckets.
+- Pure additive — no schema migration. Historical events written before this release simply contribute zero samples until new injections accumulate.
+- First 0.11.0 milestone item from the 1.0 punchlist (Trust & Cost). Closes the "what % of my SessionStart token budget does memory consume?" gap.
+- **Hallucination rate metric** — the dashboard now quantifies how clean the fact base is, not just how full it is. `Distill::BareConclusionDetector` is the production-side mirror of the SessionStart prompt's reason-clause requirement (decision/convention facts must embed "because…" / "so that…" / "to avoid…"). Surfaced two ways:
+  - Dashboard Trust panel emits a `quality_score` block aggregating across project + global active facts: `suspect_count` (predicate=reference, retagged by ReferenceMaterialDetector), `bare_conclusion_count`, percentages, and an overall 0–100 score (higher = cleaner). Returns 100 on empty stores so fresh installs aren't penalized.
+  - `claude-memory digest` includes a "Quality" section showing the score breakdown plus the in-window rejection rate ("of facts created in the last 7 days, X% have been rejected since"), so calibration drift is visible.
+- Second 0.11.0 milestone item. Pairs with token-budget telemetry to answer "is memory still worth its cost?" via two skeptic-friendly numbers.
+- **`claude-memory show`** — new CLI command prints what memory would inject at the next SessionStart in plain Markdown. Runs the exact `Hook::ContextInjector` path real sessions use, so output matches what Claude actually receives. Footer reports fact count, ~token estimate, and char count so users see the SessionStart cost at a glance.
+  - Default suppresses the raw-transcript "Pending Knowledge Extraction" dump (intended for LLM distillation, not human reading); pass `--pending` to include it.
+  - `--source SOURCE` (startup/resume/clear) simulates each fresh-session entrypoint so users can preview which sections would appear.
+- Third 0.11.0 milestone item. Closes the inspectability gap — trust requires being able to see what memory will inject, the same way `cat CLAUDE.md` works.
+- **First-week ROI nudge** — at SessionEnd, memory now prints `memory contributed N facts this session, %used = X` for the first 10 sessions, then quiets. New users get user-visible proof memory is doing work for them without having to know about the dashboard. Once trust is established (or it isn't), the nudge gets out of the way.
+  - New `claude-memory hook nudge` subcommand + `Hook::Handler#nudge`. SessionEnd config now wires `[ingest, sweep, nudge]` in order.
+  - Silent on `CLAUDE_MEMORY_NO_NUDGE=1` opt-out, missing session_id, n=0 contributions, and after MAX_NUDGES emissions. The empty-session silent path doesn't burn a slot — quiet sessions don't count toward the 10.
+  - Activity event `roi_nudge` records `{n, used, pct, prior_count}` per emission so a future migration could change the threshold without re-counting from raw events.
+- Fourth 0.11.0 milestone item. Cold-start trust signal that pairs with #47 (token cost) and #48 (quality) to make the first-week answer to "is this worth it?" visible without effort.
+- **Harm benchmark prototype** — `spec/benchmarks/dataset/harm_scenarios.yml` + `spec/benchmarks/e2e/harm_bench_spec.rb`. Three hand-written cases spanning the riskiest harm classes (stale_tech, mismatched_scope, superseded_undetected). The first ClaudeMemory benchmark that measures whether memory can make Claude *wrong* — every other benchmark only measures whether memory helps.
+  - Structure validation (regex compile, fact loadability, harm-class coverage) runs in stub mode as part of `:benchmark` tag.
+  - Real-mode runner: `EVAL_MODE=real bundle exec rspec spec/benchmarks/e2e/harm_bench_spec.rb` — needs `claude` CLI on PATH, ~$2-8 per run. Reports harm rate; doesn't enforce a threshold yet (that's the 0.12 release gate).
+- 0.11.0 risk-de-risking item. If even one of these three surfaces a harm now, the full 10-15-case benchmark planned for 0.12 will likely reveal a fundamental issue — better to learn that at 0.11 than at 0.12. **Real-mode prototype run on 2026-04-30 reported 0/3 harm** — green light to expand to the full corpus in 0.12.
+### Changed
+- **Hallucination-rate metric calibration** — `Dashboard::Trust#quality_score` now reports a windowed (last 30d) "live" score as the headline plus a "historical" block over all active facts. Production verification on 2026-04-30 (recorded in `docs/quality_review.md`) showed the unwindowed metric was technically correct but pragmatically misleading: 97% of bare-conclusion facts pre-dated the 2026-04-20 reason-clause prompt commit, and the entire 7-day rejection cluster was a single-class systemic failure (a `/study-repo` burst), not ongoing noise. The split makes the metric actionable: live score = ongoing extraction quality, historical = legacy data. The digest's "Quality" section uses the live score as the headline.
+### Fixed
+- Real-eval CLI runner now passes `allowed_tools` through explicitly so the harm benchmark and other real-mode benches can pre-allow MCP memory tools without per-test wiring.
+### Upgrade Notes
+- No schema migration. All new features ship purely additive.
+- Hooks run the installed gem from PATH, not the working tree. After upgrading, `bundle exec rake install` (or `gem install claude_memory`) is required for the new SessionEnd nudge, `claude-memory show` command, `--tokens` stats flag, and `context_tokens` activity-event field to actually fire on real hook events.
+- Existing `quality_score` consumers will see additional fields (`window_days`, `historical`) in the snapshot. The original keys (`score`, `total_active`, `suspect_count`, `bare_conclusion_count`, `suspect_pct`, `bare_pct`) remain at the top level and now reflect the 30-day live window — historical numbers move to the `historical` sub-hash.
+## [0.10.0] - 2026-04-28
+### Added
+**Dashboard — feed-first redesign with observability built in**
+- New feed-first dashboard UI with scope-aware moments, fact detail modal, query tester, and activity drilldown. Reuse, Trust, Knowledge, Conflicts, and Moments panels each backed by a dedicated module (`Dashboard::{Reuse, Trust, Knowledge, Conflicts, Moments}`) under unit tests, replacing the prior all-in-API-class layout.
+- 👍/👎 feedback on individual moments with persisted verdicts (schema v16, `moment_feedback` table). Trust panel surfaces a 30-day up/down ratio so the dashboard can answer "when memory surfaces something, are users marking it useful?".
+- Utilization ratio panel — of facts extracted in the last 30 days, how many has Claude actually used in a recall or context injection? Color-coded (green ≥40%, yellow ≥15%, red below). Hidden on fresh installs to avoid misleading zeros.
+- Conflict deduping at the display layer: identical (subject, predicate, object_pair) detections collapse into one row with a `×N` badge. Sidebar "Needs review" count now reflects distinct contradictions, not raw row count.
+- Activity events drilldown: each moment opens a payload modal with prettified JSONL, recall trigger correlation (which user prompt motivated this lookup), and linked-fact resolution scoped per database.
+- Vector index health threshold and clickable remediation hints in the health dashboard.
+**CLI — observability surfaces and one-shot cleanups**
+- `claude-memory digest [--since DAYS] [--output FILE]` — weekly markdown report. Sections: Activity, New knowledge by predicate, Utilization (extracted vs used), Conflicts, Feedback. No new schema; renders from existing aggregates.
+- `claude-memory census [--root DIR]` — privacy-safe cross-project vocabulary scan. Aggregates per-DB predicate × status counts, novel predicates, synonym candidates. Suppresses object literals, entity names, and paths; per-DB IDs are SHA256-prefixed.
+- `claude-memory dedupe-conflicts [--scope SCOPE] [--dry-run]` — one-shot cleanup for historical conflict-row duplication that predates the Resolver dedup fix (commit f571ba4). Groups by (subject, predicate, normalized object pair), keeps the earliest, migrates provenance to the keeper.
+- `claude-memory reclassify-references [--scope SCOPE] [--dry-run]` — retags active convention facts that the new `Distill::ReferenceMaterialDetector` flags as reference material (LOC counts, star counts, "X is a plugin..." templates, "by Firstname Lastname" attributions).
+**Memory quality**
+- Access-based staleness scoring (improvements.md #35). Schema v17 adds `last_recalled_at` to facts. `Sweep::RecallTimestampRefresher` derives the field periodically from activity_events; `claude-memory stats --stale [--stale-days N]` lists facts that haven't been recalled inside the threshold. Replaces the prior "active facts minus seen-in-recalls" approximation.
+- Auto-memory mirror (improvements.md #36). On fresh sessions, the SessionStart context hook scans `~/.claude/projects/<slug>/memory/*.md` and surfaces new or changed entries as extraction candidates so users can promote auto-memory observations into claude_memory without manual copy-paste.
+- Reasoning requirement enforced in distillation (improvements.md #34). The SessionStart prompt and the `/distill-transcripts` skill now require a why clause for `decision` and `convention` predicates ("because…", "so that…", etc.). Audit found ~75% of facts were bare conclusions before this change.
+- `Distill::ReferenceMaterialDetector` reclassifies convention facts whose object text matches reference patterns. New `reference` predicate registered in `PredicatePolicy` with its own `:references` snapshot section. Detector runs at write time in `ManagementHandlers#store_extraction` so mislabeling can't persist.
+- Predicate census command (#30) for cross-project vocabulary audits — see CLI section above.
+**Benchmarks and observability**
+- Repeat-correction benchmark harness (improvements.md #32). `spec/benchmarks/e2e/repeat_correction_spec.rb` pre-loads a past correction as a memory fact, runs the prompt through real Claude under `EVAL_MODE=real`, and reports pass rate (no violation patterns matched). Starter set of 2 scenarios drawn from this project's recurring gotchas.
+- Relevance ratio metric (improvements.md #31). `Hook::ContextInjector#emitted_subjects` exposes the subjects injected at SessionStart; `BenchmarkHelpers::RelevanceMetrics` measures whether they appear in Claude's response. Trend signal for memory-application quality, integrated into `devmemeval_spec.rb`.
+- MCP server embeds the V=R/C ("Verify before Recommend / Correct") mental model in agent instructions so memory recommendations come with built-in verification cues.
+**Schema v15 → v17 (additive only, automatic on first run)**
+- Migration 015: adds `activity_events` table for hook/recall/context/sweep telemetry. Powers the dashboard timeline, moments feed, and efficacy reports.
+- Migration 016: adds `moment_feedback` table (unique on event_id) for the dashboard 👍/👎 surface.
+- Migration 017: adds nullable `facts.last_recalled_at` for access-based staleness scoring.
+**1.0 readiness track**
+- New `docs/1_0_punchlist.md` opens the path to 1.0: token-budget telemetry, hallucination-rate metric, negative-fact harm benchmark, CLAUDE.md baseline publication, `claude-memory show`, benchmark scoreboard. Ten entries (#47-56) added to `docs/improvements.md` with concrete file:line plumbing notes.
+### Changed
+- `Resolver#apply_conflict` no longer creates a duplicate disputed fact + conflict row when the same contradicting value is re-extracted. Looks up disputed facts in the same (subject, predicate) slot and reinforces with provenance instead.
+- `Resolver` no longer treats the distiller's `scope_hint` as a scope override. `scope_hint` is advisory metadata; `fact.scope` must match the DB the row lives in. Earlier behavior caused scope leakage where global-hinted distillations landed in the project DB.
+- `Hook::ContextInjector` adds `emitted_fact_ids` and `emitted_subjects` accessors so benchmark harnesses can attribute injection contributions per session.
+- `SQLiteStore` decomposed via module inclusion: `LLMCache` and `MetricsAggregator` extracted into `lib/claude_memory/store/`. SQLiteStore back under 600 LOC.
+- `Dashboard::API` decomposed: `FactPresenter`, `Conflicts`, `Efficacy::Reporter`, `Timeline`, `Health` extracted into dedicated classes following the boundary pattern. API now routes/delegates rather than aggregating.
+- Dashboard releases DB connections after each HTTP request (was holding connections open for the lifetime of the WEBrick session).
+- `Sweep::Maintenance` gains `dedupe_open_conflicts` and `reclassify_references` for the one-shot CLI commands above.
+- Round-trip migration specs from v12, v13, v14 → v17 (per-version migrations covered by `spec/claude_memory/store/migrations/`). Codifies the release-blocker convention: any schema bump must round-trip from each prior major-release boundary back ~3 releases.
+### Fixed
+- Dashboard surfaces an actionable hint when Recall hits FTS5 corruption (run `claude-memory compact` rather than a generic error).
+- Dashboard query tester unwraps the nested Recall result shape rather than printing the raw envelope.
+- Dashboard health checks correctly detect the claude-memory hook installation across the two-level Claude Code hooks structure (was reporting false negatives when hooks were installed under a matcher block).
+- Dashboard Efficacy "this session" correlation falls back to a time window when the recall event has no `session_id` (MCP tool calls don't thread session_id).
+- Bulk-reject in the Conflicts modal now retries with an actionable message when the server-side state is stale.
+### Upgrade Notes
+**Schema bump v14 → v17.** Three migrations run automatically on first launch after upgrade. All three are additive (no existing data is rewritten):
+1. Migration 015 creates `activity_events` (hook/recall telemetry).
+2. Migration 016 creates `moment_feedback` (dashboard verdicts).
+3. Migration 017 adds `facts.last_recalled_at` (NULL by default; `Sweep::RecallTimestampRefresher` populates it on the next sweep cycle from existing activity_events).
+The migration delta has round-trip spec coverage in `spec/claude_memory/store/migrations/`. Forward-compatibility: 0.10.0 databases cannot be opened by 0.9.x or earlier. Downgrade is destructive — back up `~/.claude/memory.sqlite3` and `.claude/memory.sqlite3` before downgrading.
+**Optional historical cleanups.** Two new admin commands address data tails left by earlier bugs that have since been fixed at the source:
+```bash
+claude-memory dedupe-conflicts --dry-run   # preview duplicate conflict rows
+claude-memory dedupe-conflicts             # consolidate them
+claude-memory reclassify-references --dry-run   # preview reference-material mislabels
+claude-memory reclassify-references             # retag them
+```
+Both are opt-in. Neither runs in the regular sweep cycle. Use `--scope global` to clean the global DB.
+**Telemetry footprint.** The `activity_events` table grows with hook activity. The dashboard surfaces this by default and powers the timeline/moments/efficacy panels. Retention pruning is not yet automatic (planned for a follow-up); manual cleanup via `DELETE FROM activity_events WHERE occurred_at < ?` is safe — the dashboard tolerates missing history.
 ## [0.9.1] - 2026-04-16
 ### Fixed

data/CLAUDE.md CHANGED Viewed

@@ -163,7 +163,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
   - Each command is a separate class (HelpCommand, DoctorCommand, etc.)
   - All commands inherit from BaseCommand
   - Dependency injection for I/O (stdout, stderr, stdin)
-  - 28 commands total, each focused on single responsibility
+  - 34 commands total, each focused on single responsibility
 - **`Configuration`**: Centralized ENV access (`configuration.rb`)
   - Single source of truth for paths and environment variables
@@ -208,6 +208,9 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
 - **`Distill`**: Fact extraction interface (`distill/`)
   - Pluggable distiller design (current: NullDistiller stub)
   - Extracts entities, facts, scope hints from content
+  - `ReferenceMaterialDetector`: classifies "X is a plugin/library/tool" templates, LOC counts, "by Firstname Lastname" attributions as reference material. Runs in `ManagementHandlers#store_extraction` so mislabeling can't persist
+  - `BareConclusionDetector` (0.11.0+): production-side mirror of the SessionStart prompt's reason-clause requirement. Pure function — flags `decision` / `convention` facts whose object lacks a reason-clause signal ("because", "so that", "to avoid", etc.). Powers the `quality_score` metric on the Trust panel and the digest's Quality section.
+  - SessionStart distillation prompt enforces reason clauses ("because…", "so that…") for `decision` and `convention` predicates — bare conclusions are explicitly disallowed
 - **`Resolve`**: Truth maintenance and conflict resolution (`resolve/`)
   - Determines equivalence, supersession, or conflicts
@@ -226,7 +229,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
   - Modes: shared (repo), local (uncommitted), home (user directory)
 - **`MCP`**: Model Context Protocol server and tools (`mcp/`)
-  - Exposes memory tools to Claude Code (24 tools total)
+  - Exposes memory tools to Claude Code (25 tools total)
   - `Telemetry`: Records tool invocations to `mcp_tool_calls` table for usage stats
   - Dual content/structuredContent responses with compact mode
@@ -234,6 +237,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
   - Reads stdin JSON from Claude Code hooks
   - Routes to ingest/sweep/publish commands
   - `DistillationRunner`: Manages context hook injection with undistilled content for LLM extraction
+  - `AutoMemoryMirror` (0.10.0): On fresh sessions, scans `~/.claude/projects/<slug>/memory/*.md` for new/changed entries and surfaces them as extraction candidates in the SessionStart context. State diffed by md5 in `.claude/auto_memory_mirror.json`; bounded to 5 candidates per session, 1500 chars each.
 ### Database Schema
@@ -246,16 +250,19 @@ Key tables (defined in `sqlite_store.rb`):
 - `fact_links`: Supersession and conflict relationships
 - `conflicts`: Open contradictions
 - `mcp_tool_calls`: MCP server tool invocation telemetry (schema v13)
+- `activity_events`: Hook/recall/context/sweep/nudge telemetry (schema v15) — powers the dashboard timeline, moments feed, efficacy reports. Event types: `hook_ingest`, `hook_context` (carries `context_tokens` since 0.11.0), `hook_sweep`, `hook_publish`, `recall`, `store_extraction`, `roi_nudge` (since 0.11.0).
+- `moment_feedback`: Per-moment 👍/👎 verdicts with optional notes (schema v16) — unique on event_id, repeat clicks upsert
 Facts include:
 - `scope`: "global" or "project" (determines applicability)
 - `project_path`: Set for project-scoped facts
 - `valid_from`/`valid_to`: Temporal validity window
+- `last_recalled_at` (schema v17): Set by `Sweep::RecallTimestampRefresher` from activity_events; powers `claude-memory stats --stale` and the dashboard's "stale" needs-review count
 ### Scope System
 Facts are scoped to control where they apply:
-- **project**: Current project only (e.g., "this app uses PostgreSQL")
+- **project**: Current project only (e.g., "claude_memory uses SQLite for storage")
 - **global**: All projects (e.g., "I prefer 4-space indentation")
 Distiller detects signals like "always", "in all projects", "my preference" and sets `scope_hint: "global"`. Users can manually promote facts via `claude-memory promote <fact_id>` or the `memory.promote` MCP tool.
@@ -325,7 +332,7 @@ Also update `SECTION_MAP` if the predicate should appear in a specific snapshot
 - `lib/claude_memory.rb`: Main module, requires, database path helpers
 - `lib/claude_memory/cli.rb`: Thin command router (41 lines)
-- `lib/claude_memory/commands/`: Individual command classes (28 commands)
+- `lib/claude_memory/commands/`: Individual command classes (34 commands)
 - `lib/claude_memory/configuration.rb`: Centralized configuration and ENV access
 - `lib/claude_memory/domain/`: Domain models (Fact, Entity, Provenance, Conflict)
 - `lib/claude_memory/core/`: Value objects and null objects
@@ -340,14 +347,14 @@ Also update `SECTION_MAP` if the predicate should appear in a specific snapshot
 The gem includes an MCP server (`claude-memory serve-mcp`) that exposes memory operations as tools. Configuration should be in `.mcp.json` at project root.
-Available MCP tools (24 total):
+Available MCP tools (25 total):
 - **Query & Recall**: `memory.recall`, `memory.recall_index`, `memory.recall_details`, `memory.recall_semantic`, `memory.search_concepts`
 - **Provenance**: `memory.explain`, `memory.fact_graph`
 - **Shortcuts**: `memory.decisions`, `memory.conventions`, `memory.architecture`
 - **Context**: `memory.facts_by_tool`, `memory.facts_by_context`
 - **Management**: `memory.promote`, `memory.reject_fact`, `memory.store_extraction`
 - **Distillation**: `memory.undistilled`, `memory.mark_distilled`
-- **Monitoring**: `memory.status`, `memory.stats`, `memory.changes`, `memory.conflicts`
+- **Monitoring**: `memory.status`, `memory.stats`, `memory.changes`, `memory.conflicts`, `memory.activity`
 - **Maintenance**: `memory.sweep_now`
 - **Discovery**: `memory.check_setup`, `memory.list_projects`
@@ -367,8 +374,25 @@ ClaudeMemory integrates with Claude Code via hooks in `.claude/settings.json`:
   - Runs time-bounded maintenance on both databases
   - Cleans up vec0 entries for superseded/expired facts
+- **Nudge hook** (0.11.0+): Triggers on SessionEnd, fires after ingest+sweep
+  - Calls `claude-memory hook nudge`
+  - For the first 10 sessions only, prints "memory contributed N facts this session, %used = X" to stdout so new users see ROI inline before they discover the dashboard
+  - Records `roi_nudge` activity_events; quiets after `MAX_NUDGES` emissions
+  - Opt out with `CLAUDE_MEMORY_NO_NUDGE=1` (no event recorded on opt-out)
+  - Empty sessions (n=0) silently no-op so quiet sessions don't burn nudge slots
 Hook commands read JSON payloads from stdin for robustness. Supports `--async` flag for non-blocking execution.
+## Dashboard
+Local web UI for inspecting memory state. Started via `claude-memory dashboard` (default port 3377). Reads from both global and project databases; no write side effects from page loads.
+The dashboard is a thin web layer over the same `Recall`/`Conflicts`/`Trust`/`Moments`/`Knowledge`/`Reuse`/`Health`/`Timeline` classes the MCP server uses. Each panel is backed by a dedicated module under `lib/claude_memory/dashboard/`; `Dashboard::API` holds HTTP-shape glue and per-endpoint formatting (delegating non-trivial logic to the panel classes).
+Connections are released after each request — never holds a WAL writer lock open across page loads.
+See [docs/dashboard.md](docs/dashboard.md) for the user-facing guide (panels, common workflows, related CLI commands).
 ## Code Style
 This project uses [Standard Ruby](https://github.com/standardrb/standard) for linting. Run `bundle exec rake standard:fix` before committing.

data/README.md CHANGED Viewed

@@ -140,6 +140,69 @@ File-searchable questions ("what version is this?") and one-shot code generation
 - **Claude-Powered**: Uses Claude's intelligence to extract facts (no API key needed)
 - **Token Efficient**: 10x reduction in memory queries with progressive disclosure
 - **Database Maintenance**: Compact, export, and backup commands
+- **Built-in Observability** (0.10.0+): `claude-memory dashboard` opens a local web UI with a moments feed, trust panel (token budget, quality score, utilization, feedback), conflicts dedup, knowledge index, and 👍/👎 feedback. See **[Dashboard guide →](docs/dashboard.md)**. `claude-memory digest` writes a weekly markdown report (Activity, Context cost, Quality, New knowledge, Utilization, Conflicts, Feedback); `claude-memory show` prints what would be injected next SessionStart; `claude-memory census` audits the predicate vocabulary across projects.
+## What's New in 0.11.0
+Five user-visible signals so you can answer "is memory still worth it?" with
+numbers, not vibes:
+- **Token budget telemetry** — every SessionStart context injection now
+  records its estimated `context_tokens`. `claude-memory stats --tokens
+  [--since DAYS]` reports p50/p95/avg/min/max plus a histogram across
+  <500 / 500-1k / 1-2k / 2-5k / 5k+ buckets so you can see the per-session
+  cost at a glance. The dashboard's Trust panel and `claude-memory digest`
+  surface the same numbers.
+- **Hallucination-rate metric** — the dashboard now scores how *clean* the
+  fact base is, not just how full it is. `Distill::BareConclusionDetector`
+  flags `decision` / `convention` facts that skipped the reason-clause
+  requirement. Trust panel shows `quality_score` (live 30-day window with
+  historical baseline beneath). `claude-memory digest` adds a Quality
+  section with rejection rate.
+- **`claude-memory show`** — new command prints what memory *would* inject
+  at the next SessionStart in plain Markdown. Footer reports fact count,
+  ~token estimate, and char count so you see the cost at a glance. Default
+  hides the raw-transcript "Pending Knowledge" dump for readability;
+  `--pending` opts in. `--source startup|resume|clear` simulates each
+  fresh-session entrypoint.
+- **First-week ROI nudge** — at SessionEnd, memory now prints
+  `memory contributed N facts this session, %used = X` for the first 10
+  sessions, then quiets. Cold-start trust signal — you don't have to know
+  about the dashboard. Opt out with `CLAUDE_MEMORY_NO_NUDGE=1`.
+- **Harm benchmark prototype** — first ClaudeMemory benchmark that
+  measures whether memory can make Claude *wrong*. Three hand-written
+  cases (stale-tech, mismatched-scope, superseded-but-undetected) under
+  `spec/benchmarks/e2e/harm_bench_spec.rb`. Real-mode run on the 0.11
+  release reported 0/3 harm; the full 10-15-case corpus + release gate
+  lands in 0.12.
+## What's New in 0.10.0
+Three behavior changes worth knowing about — they affect what you'll see in
+extracted facts and SessionStart context, even if you don't change anything:
+- **Auto-memory mirror** — On fresh sessions, the SessionStart context hook
+  scans `~/.claude/projects/<slug>/memory/*.md` and surfaces new or changed
+  entries as candidates for extraction into ClaudeMemory. You'll see a
+  "Pending Knowledge Extraction" section in Claude's startup context citing
+  files from your auto-memory directory. Claude reviews these and calls
+  `memory.store_extraction` for the high-signal ones; you don't need to
+  copy-paste manually anymore.
+- **Why-clause enforcement** — When Claude distills `decision` and
+  `convention` facts, it's now required to embed a reason ("…because…",
+  "…so that…", "…to avoid…"). A bare conclusion is dead weight; a fact with
+  a reason stays useful when the situation changes. You'll see this
+  reflected in fact text being longer and more justified.
+- **Reference predicate** — Active facts that look like reference material
+  (LOC counts, "X is a plugin/library/tool" templates, "by Firstname
+  Lastname" attributions) are auto-tagged `predicate=reference` instead of
+  `convention`. Keeps the conventions list signal-rich. Browse them in the
+  dashboard's Knowledge → References section, or run
+  `claude-memory reclassify-references --dry-run` to see candidates.
+Plus: **staleness detection** (`claude-memory stats --stale`) lists active
+facts that haven't been recalled in N days, so you can prune dead weight
+explicitly. The dashboard's Trust → Needs review panel surfaces the count.
 ## Privacy Control
@@ -241,7 +304,8 @@ The uninstall command removes:
 - 📖 [Getting Started](docs/GETTING_STARTED.md) - Step-by-step onboarding
 - 💡 [Examples](docs/EXAMPLES.md) - Use cases and workflows
-- 🔧 [Plugin Setup](docs/PLUGIN.md) - Claude Code integration
+- 📊 [Dashboard](docs/dashboard.md) - Local web UI for inspection and trust signals (0.10.0+)
+- 🔧 [Plugin Setup](docs/plugin.md) - Claude Code integration
 - 🏗️ [Architecture](docs/architecture.md) - Technical deep dive
 - 📝 [Changelog](CHANGELOG.md) - Release notes
@@ -292,7 +356,7 @@ The benchmark dataset draws from real CLAUDE.md patterns and is designed specifi
 - **Language:** Ruby 3.2+
 - **Storage:** SQLite3 (no external services)
-- **Testing:** 1477 examples (1375 unit/integration + 102 benchmarks/evals), 100% core coverage
+- **Testing:** 1964 examples (~1700 unit/integration + ~250 benchmarks/evals), 100% core coverage
 - **Code Style:** Standard Ruby
 ```bash

data/db/migrations/015_add_activity_events.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+# Migration v15: Add activity_events table for debugging and observability
+# Tracks hook executions, memory recalls, context injections, and sweep operations.
+# Powers the dashboard timeline and efficacy reports.
+Sequel.migration do
+  up do
+    create_table?(:activity_events) do
+      primary_key :id
+      String :event_type, null: false    # "hook_ingest", "hook_context", "hook_sweep", "recall", "store_extraction"
+      String :session_id                 # Claude session that triggered the event
+      String :status, null: false        # "success", "skipped", "error"
+      Integer :duration_ms               # How long the operation took
+      String :detail_json, text: true    # Event-specific details (JSON)
+      String :occurred_at, null: false   # ISO 8601 timestamp
+    end
+    run "CREATE INDEX IF NOT EXISTS idx_activity_events_type ON activity_events(event_type)"
+    run "CREATE INDEX IF NOT EXISTS idx_activity_events_occurred_at ON activity_events(occurred_at)"
+    run "CREATE INDEX IF NOT EXISTS idx_activity_events_session ON activity_events(session_id)"
+  end
+  down do
+    drop_table?(:activity_events)
+  end
+end

data/db/migrations/016_add_moment_feedback.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+# Migration v16: Per-moment feedback (improvements.md #43).
+# Tracks a single thumbs-up/down verdict (+ optional note) per activity_event
+# so the dashboard can surface a trust-calibration signal. Unique on event_id
+# so a given moment has at most one current verdict; repeat clicks upsert.
+Sequel.migration do
+  up do
+    create_table?(:moment_feedback) do
+      primary_key :id
+      Integer :event_id, null: false
+      String :verdict, null: false  # "up" | "down"
+      String :note, text: true      # optional freeform note
+      String :recorded_at, null: false
+      index :event_id, unique: true
+    end
+  end
+  down do
+    drop_table?(:moment_feedback)
+  end
+end

data/db/migrations/017_add_last_recalled_at.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+# Migration v17: Access-based staleness scoring (improvements.md #35).
+# Records the last time a fact was surfaced via memory.recall or context
+# injection, derived periodically from activity_events. Sweep-derived rather
+# than per-call so we avoid WAL write contention on the recall hot path.
+Sequel.migration do
+  up do
+    add_column :facts, :last_recalled_at, String
+  end
+  down do
+    drop_column :facts, :last_recalled_at
+  end
+end