claude_memory 0.9.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/skills/dashboard/SKILL.md +42 -0
- data/.claude-plugin/marketplace.json +1 -1
- data/.claude-plugin/plugin.json +1 -1
- data/CHANGELOG.md +130 -0
- data/CLAUDE.md +30 -6
- data/README.md +66 -2
- data/db/migrations/015_add_activity_events.rb +26 -0
- data/db/migrations/016_add_moment_feedback.rb +22 -0
- data/db/migrations/017_add_last_recalled_at.rb +15 -0
- data/docs/1_0_punchlist.md +371 -0
- data/docs/EXAMPLES.md +41 -2
- data/docs/GETTING_STARTED.md +33 -4
- data/docs/architecture.md +22 -7
- data/docs/audit-queries.md +131 -0
- data/docs/dashboard.md +192 -0
- data/docs/improvements.md +650 -9
- data/docs/influence/cq.md +187 -0
- data/docs/plugin.md +13 -6
- data/docs/quality_review.md +524 -172
- data/docs/reflection_memory_as_accumulating_judgment.md +67 -0
- data/lib/claude_memory/activity_log.rb +86 -0
- data/lib/claude_memory/commands/census_command.rb +210 -0
- data/lib/claude_memory/commands/completion_command.rb +3 -0
- data/lib/claude_memory/commands/dashboard_command.rb +54 -0
- data/lib/claude_memory/commands/dedupe_conflicts_command.rb +55 -0
- data/lib/claude_memory/commands/digest_command.rb +273 -0
- data/lib/claude_memory/commands/hook_command.rb +61 -2
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +7 -4
- data/lib/claude_memory/commands/reclassify_references_command.rb +56 -0
- data/lib/claude_memory/commands/registry.rb +7 -1
- data/lib/claude_memory/commands/show_command.rb +90 -0
- data/lib/claude_memory/commands/skills/distill-transcripts.md +13 -1
- data/lib/claude_memory/commands/stats_command.rb +131 -2
- data/lib/claude_memory/commands/sweep_command.rb +2 -0
- data/lib/claude_memory/configuration.rb +16 -0
- data/lib/claude_memory/core/relative_time.rb +9 -0
- data/lib/claude_memory/dashboard/api.rb +610 -0
- data/lib/claude_memory/dashboard/conflicts.rb +279 -0
- data/lib/claude_memory/dashboard/efficacy.rb +127 -0
- data/lib/claude_memory/dashboard/fact_presenter.rb +109 -0
- data/lib/claude_memory/dashboard/health.rb +175 -0
- data/lib/claude_memory/dashboard/index.html +2707 -0
- data/lib/claude_memory/dashboard/knowledge.rb +136 -0
- data/lib/claude_memory/dashboard/moments.rb +244 -0
- data/lib/claude_memory/dashboard/reuse.rb +97 -0
- data/lib/claude_memory/dashboard/scoped_fact_resolver.rb +95 -0
- data/lib/claude_memory/dashboard/server.rb +211 -0
- data/lib/claude_memory/dashboard/timeline.rb +68 -0
- data/lib/claude_memory/dashboard/trust.rb +454 -0
- data/lib/claude_memory/distill/bare_conclusion_detector.rb +71 -0
- data/lib/claude_memory/distill/reference_material_detector.rb +78 -0
- data/lib/claude_memory/hook/auto_memory_mirror.rb +112 -0
- data/lib/claude_memory/hook/context_injector.rb +97 -3
- data/lib/claude_memory/hook/handler.rb +191 -3
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +8 -0
- data/lib/claude_memory/mcp/query_guide.rb +11 -0
- data/lib/claude_memory/mcp/text_summary.rb +29 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +13 -0
- data/lib/claude_memory/mcp/tools.rb +148 -0
- data/lib/claude_memory/publish.rb +13 -21
- data/lib/claude_memory/recall/stale_detector.rb +67 -0
- data/lib/claude_memory/resolve/predicate_policy.rb +2 -0
- data/lib/claude_memory/resolve/resolver.rb +41 -11
- data/lib/claude_memory/store/llm_cache.rb +68 -0
- data/lib/claude_memory/store/metrics_aggregator.rb +96 -0
- data/lib/claude_memory/store/schema_manager.rb +1 -1
- data/lib/claude_memory/store/sqlite_store.rb +47 -143
- data/lib/claude_memory/store/store_manager.rb +29 -0
- data/lib/claude_memory/sweep/maintenance.rb +216 -0
- data/lib/claude_memory/sweep/recall_timestamp_refresher.rb +83 -0
- data/lib/claude_memory/sweep/sweeper.rb +2 -0
- data/lib/claude_memory/templates/hooks.example.json +5 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +24 -0
- metadata +51 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c2164011e2c50c7fdb0bcad468a25814f372384c3a49fa4c9414313ab3975e00
|
|
4
|
+
data.tar.gz: 3e2843979d9b9e0d4a21bfa3650f6cd6843ce18d2a95af884e303572259bca62
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6c074b607c1e4f13743de36bb2074495d0ad24d0c826b62b49e0e827f311e3424bc881f42236db22a92dd2d5281e6ef13ca450966b1d9438ac1b36ceaa3ab2ce
|
|
7
|
+
data.tar.gz: 4e06c8fed9c323974ee4d7e5b41386ee4682ba5ba88b67797ac6864bbdf03663e5b74cb33497a371a8263eff4c24d405708a80e4862c4f578b221286bf40b236
|
data/.claude/memory.sqlite3
CHANGED
|
Binary file
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: dashboard
|
|
3
|
+
description: Launch a local web dashboard for ClaudeMemory debugging and observability
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Dashboard
|
|
7
|
+
|
|
8
|
+
Launch the ClaudeMemory debugging dashboard to visualize memory system health, activity, and efficacy.
|
|
9
|
+
|
|
10
|
+
## Task
|
|
11
|
+
|
|
12
|
+
Start the dashboard web server so the user can inspect what's happening behind the scenes.
|
|
13
|
+
|
|
14
|
+
## Steps
|
|
15
|
+
|
|
16
|
+
1. Run the dashboard command:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
claude-memory dashboard
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
This starts a local web server (default port 3377) and opens it in the browser.
|
|
23
|
+
|
|
24
|
+
## What the Dashboard Shows
|
|
25
|
+
|
|
26
|
+
- **Health Status**: Database health, hook configuration, vector index status
|
|
27
|
+
- **Overview**: Fact/entity/content counts, top predicates, entity type distribution, 30-day activity timeline
|
|
28
|
+
- **Activity**: Live event log of hook executions (ingest, context, sweep), memory recalls, and store extractions with timing and details
|
|
29
|
+
- **Facts**: Searchable fact explorer with status filtering, predicate/object search
|
|
30
|
+
- **Efficacy**: Recall hit rate, total results served, average results per query, top queries by result count
|
|
31
|
+
|
|
32
|
+
## Options
|
|
33
|
+
|
|
34
|
+
- `--port PORT` - Use a different port (default: 3377)
|
|
35
|
+
- `--no-open` - Don't auto-open the browser
|
|
36
|
+
|
|
37
|
+
## Notes
|
|
38
|
+
|
|
39
|
+
- Dashboard auto-refreshes every 30 seconds
|
|
40
|
+
- Activity events are recorded by hooks and MCP tools into the `activity_events` table
|
|
41
|
+
- The dashboard reads from both global and project databases
|
|
42
|
+
- Press Ctrl+C to stop the server
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
"plugins": [
|
|
8
8
|
{
|
|
9
9
|
"name": "claude-memory",
|
|
10
|
-
"version": "0.
|
|
10
|
+
"version": "0.11.0",
|
|
11
11
|
"source": "./",
|
|
12
12
|
"description": "Long-term memory for Claude Code. Recalls architecture, conventions, and decisions across sessions — so Claude explains your codebase without file traversal, follows your patterns, and never re-asks what it already learned.",
|
|
13
13
|
"repository": "https://github.com/codenamev/claude_memory"
|
data/.claude-plugin/plugin.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-memory",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.0",
|
|
4
4
|
"description": "Long-term memory for Claude Code. Recalls architecture, conventions, and decisions across sessions — so Claude explains your codebase without file traversal, follows your patterns, and never re-asks what it already learned.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Valentino Stoll",
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,136 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.11.0] - 2026-04-30
|
|
8
|
+
|
|
9
|
+
Theme: **Trust & Cost** — five user-visible signals that answer "is memory still worth it?" with numbers a skeptical user can read in <30 seconds.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **Token budget telemetry** — every successful SessionStart context injection now records an estimated `context_tokens` count on its `activity_events` row. Surfaced three ways:
|
|
14
|
+
- Dashboard Trust panel emits a `token_budget` block with p50/p95/avg/sample_size over the last 30 days, so the JSON dashboard endpoint and any downstream consumer answer "what does memory cost per session?"
|
|
15
|
+
- `claude-memory digest` includes a "Context cost" subsection between activity and new-knowledge so the weekly report shows the price tag next to the value.
|
|
16
|
+
- `claude-memory stats --tokens [--since DAYS]` reports total sessions, p50/p95/avg/min/max, and a histogram across <500 / 500-1k / 1-2k / 2-5k / 5k+ buckets.
|
|
17
|
+
- Pure additive — no schema migration. Historical events written before this release simply contribute zero samples until new injections accumulate.
|
|
18
|
+
- First 0.11.0 milestone item from the 1.0 punchlist (Trust & Cost). Closes the "what % of my SessionStart token budget does memory consume?" gap.
|
|
19
|
+
- **Hallucination rate metric** — the dashboard now quantifies how clean the fact base is, not just how full it is. `Distill::BareConclusionDetector` is the production-side mirror of the SessionStart prompt's reason-clause requirement (decision/convention facts must embed "because…" / "so that…" / "to avoid…"). Surfaced two ways:
|
|
20
|
+
- Dashboard Trust panel emits a `quality_score` block aggregating across project + global active facts: `suspect_count` (predicate=reference, retagged by ReferenceMaterialDetector), `bare_conclusion_count`, percentages, and an overall 0–100 score (higher = cleaner). Returns 100 on empty stores so fresh installs aren't penalized.
|
|
21
|
+
- `claude-memory digest` includes a "Quality" section showing the score breakdown plus the in-window rejection rate ("of facts created in the last 7 days, X% have been rejected since"), so calibration drift is visible.
|
|
22
|
+
- Second 0.11.0 milestone item. Pairs with token-budget telemetry to answer "is memory still worth its cost?" via two skeptic-friendly numbers.
|
|
23
|
+
- **`claude-memory show`** — new CLI command prints what memory would inject at the next SessionStart in plain Markdown. Runs the exact `Hook::ContextInjector` path real sessions use, so output matches what Claude actually receives. Footer reports fact count, ~token estimate, and char count so users see the SessionStart cost at a glance.
|
|
24
|
+
- Default suppresses the raw-transcript "Pending Knowledge Extraction" dump (intended for LLM distillation, not human reading); pass `--pending` to include it.
|
|
25
|
+
- `--source SOURCE` (startup/resume/clear) simulates each fresh-session entrypoint so users can preview which sections would appear.
|
|
26
|
+
- Third 0.11.0 milestone item. Closes the inspectability gap — trust requires being able to see what memory will inject, the same way `cat CLAUDE.md` works.
|
|
27
|
+
- **First-week ROI nudge** — at SessionEnd, memory now prints `memory contributed N facts this session, %used = X` for the first 10 sessions, then quiets. New users get user-visible proof memory is doing work for them without having to know about the dashboard. Once trust is established (or it isn't), the nudge gets out of the way.
|
|
28
|
+
- New `claude-memory hook nudge` subcommand + `Hook::Handler#nudge`. SessionEnd config now wires `[ingest, sweep, nudge]` in order.
|
|
29
|
+
- Silent on `CLAUDE_MEMORY_NO_NUDGE=1` opt-out, missing session_id, n=0 contributions, and after MAX_NUDGES emissions. The empty-session silent path doesn't burn a slot — quiet sessions don't count toward the 10.
|
|
30
|
+
- Activity event `roi_nudge` records `{n, used, pct, prior_count}` per emission so a future migration could change the threshold without re-counting from raw events.
|
|
31
|
+
- Fourth 0.11.0 milestone item. Cold-start trust signal that pairs with #47 (token cost) and #48 (quality) to make the first-week answer to "is this worth it?" visible without effort.
|
|
32
|
+
- **Harm benchmark prototype** — `spec/benchmarks/dataset/harm_scenarios.yml` + `spec/benchmarks/e2e/harm_bench_spec.rb`. Three hand-written cases spanning the riskiest harm classes (stale_tech, mismatched_scope, superseded_undetected). The first ClaudeMemory benchmark that measures whether memory can make Claude *wrong* — every other benchmark only measures whether memory helps.
|
|
33
|
+
- Structure validation (regex compile, fact loadability, harm-class coverage) runs in stub mode as part of `:benchmark` tag.
|
|
34
|
+
- Real-mode runner: `EVAL_MODE=real bundle exec rspec spec/benchmarks/e2e/harm_bench_spec.rb` — needs `claude` CLI on PATH, ~$2-8 per run. Reports harm rate; doesn't enforce a threshold yet (that's the 0.12 release gate).
|
|
35
|
+
- 0.11.0 risk-de-risking item. If even one of these three surfaces a harm now, the full 10-15-case benchmark planned for 0.12 will likely reveal a fundamental issue — better to learn that at 0.11 than at 0.12. **Real-mode prototype run on 2026-04-30 reported 0/3 harm** — green light to expand to the full corpus in 0.12.
|
|
36
|
+
|
|
37
|
+
### Changed
|
|
38
|
+
|
|
39
|
+
- **Hallucination-rate metric calibration** — `Dashboard::Trust#quality_score` now reports a windowed (last 30d) "live" score as the headline plus a "historical" block over all active facts. Production verification on 2026-04-30 (recorded in `docs/quality_review.md`) showed the unwindowed metric was technically correct but pragmatically misleading: 97% of bare-conclusion facts pre-dated the 2026-04-20 reason-clause prompt commit, and the entire 7-day rejection cluster was a single-class systemic failure (a `/study-repo` burst), not ongoing noise. The split makes the metric actionable: live score = ongoing extraction quality, historical = legacy data. The digest's "Quality" section uses the live score as the headline.
|
|
40
|
+
|
|
41
|
+
### Fixed
|
|
42
|
+
|
|
43
|
+
- Real-eval CLI runner now passes `allowed_tools` through explicitly so the harm benchmark and other real-mode benches can pre-allow MCP memory tools without per-test wiring.
|
|
44
|
+
|
|
45
|
+
### Upgrade Notes
|
|
46
|
+
|
|
47
|
+
- No schema migration. All new features ship purely additive.
|
|
48
|
+
- Hooks run the installed gem from PATH, not the working tree. After upgrading, `bundle exec rake install` (or `gem install claude_memory`) is required for the new SessionEnd nudge, `claude-memory show` command, `--tokens` stats flag, and `context_tokens` activity-event field to actually fire on real hook events.
|
|
49
|
+
- Existing `quality_score` consumers will see additional fields (`window_days`, `historical`) in the snapshot. The original keys (`score`, `total_active`, `suspect_count`, `bare_conclusion_count`, `suspect_pct`, `bare_pct`) remain at the top level and now reflect the 30-day live window — historical numbers move to the `historical` sub-hash.
|
|
50
|
+
|
|
51
|
+
## [0.10.0] - 2026-04-28
|
|
52
|
+
|
|
53
|
+
### Added
|
|
54
|
+
|
|
55
|
+
**Dashboard — feed-first redesign with observability built in**
|
|
56
|
+
|
|
57
|
+
- New feed-first dashboard UI with scope-aware moments, fact detail modal, query tester, and activity drilldown. Reuse, Trust, Knowledge, Conflicts, and Moments panels each backed by a dedicated module (`Dashboard::{Reuse, Trust, Knowledge, Conflicts, Moments}`) under unit tests, replacing the prior all-in-API-class layout.
|
|
58
|
+
- 👍/👎 feedback on individual moments with persisted verdicts (schema v16, `moment_feedback` table). Trust panel surfaces a 30-day up/down ratio so the dashboard can answer "when memory surfaces something, are users marking it useful?".
|
|
59
|
+
- Utilization ratio panel — of facts extracted in the last 30 days, how many has Claude actually used in a recall or context injection? Color-coded (green ≥40%, yellow ≥15%, red below). Hidden on fresh installs to avoid misleading zeros.
|
|
60
|
+
- Conflict deduping at the display layer: identical (subject, predicate, object_pair) detections collapse into one row with a `×N` badge. Sidebar "Needs review" count now reflects distinct contradictions, not raw row count.
|
|
61
|
+
- Activity events drilldown: each moment opens a payload modal with prettified JSONL, recall trigger correlation (which user prompt motivated this lookup), and linked-fact resolution scoped per database.
|
|
62
|
+
- Vector index health threshold and clickable remediation hints in the health dashboard.
|
|
63
|
+
|
|
64
|
+
**CLI — observability surfaces and one-shot cleanups**
|
|
65
|
+
|
|
66
|
+
- `claude-memory digest [--since DAYS] [--output FILE]` — weekly markdown report. Sections: Activity, New knowledge by predicate, Utilization (extracted vs used), Conflicts, Feedback. No new schema; renders from existing aggregates.
|
|
67
|
+
- `claude-memory census [--root DIR]` — privacy-safe cross-project vocabulary scan. Aggregates per-DB predicate × status counts, novel predicates, synonym candidates. Suppresses object literals, entity names, and paths; per-DB IDs are SHA256-prefixed.
|
|
68
|
+
- `claude-memory dedupe-conflicts [--scope SCOPE] [--dry-run]` — one-shot cleanup for historical conflict-row duplication that predates the Resolver dedup fix (commit f571ba4). Groups by (subject, predicate, normalized object pair), keeps the earliest, migrates provenance to the keeper.
|
|
69
|
+
- `claude-memory reclassify-references [--scope SCOPE] [--dry-run]` — retags active convention facts that the new `Distill::ReferenceMaterialDetector` flags as reference material (LOC counts, star counts, "X is a plugin..." templates, "by Firstname Lastname" attributions).
|
|
70
|
+
|
|
71
|
+
**Memory quality**
|
|
72
|
+
|
|
73
|
+
- Access-based staleness scoring (improvements.md #35). Schema v17 adds `last_recalled_at` to facts. `Sweep::RecallTimestampRefresher` derives the field periodically from activity_events; `claude-memory stats --stale [--stale-days N]` lists facts that haven't been recalled inside the threshold. Replaces the prior "active facts minus seen-in-recalls" approximation.
|
|
74
|
+
- Auto-memory mirror (improvements.md #36). On fresh sessions, the SessionStart context hook scans `~/.claude/projects/<slug>/memory/*.md` and surfaces new or changed entries as extraction candidates so users can promote auto-memory observations into claude_memory without manual copy-paste.
|
|
75
|
+
- Reasoning requirement enforced in distillation (improvements.md #34). The SessionStart prompt and the `/distill-transcripts` skill now require a why clause for `decision` and `convention` predicates ("because…", "so that…", etc.). Audit found ~75% of facts were bare conclusions before this change.
|
|
76
|
+
- `Distill::ReferenceMaterialDetector` reclassifies convention facts whose object text matches reference patterns. New `reference` predicate registered in `PredicatePolicy` with its own `:references` snapshot section. Detector runs at write time in `ManagementHandlers#store_extraction` so mislabeling can't persist.
|
|
77
|
+
- Predicate census command (#30) for cross-project vocabulary audits — see CLI section above.
|
|
78
|
+
|
|
79
|
+
**Benchmarks and observability**
|
|
80
|
+
|
|
81
|
+
- Repeat-correction benchmark harness (improvements.md #32). `spec/benchmarks/e2e/repeat_correction_spec.rb` pre-loads a past correction as a memory fact, runs the prompt through real Claude under `EVAL_MODE=real`, and reports pass rate (no violation patterns matched). Starter set of 2 scenarios drawn from this project's recurring gotchas.
|
|
82
|
+
- Relevance ratio metric (improvements.md #31). `Hook::ContextInjector#emitted_subjects` exposes the subjects injected at SessionStart; `BenchmarkHelpers::RelevanceMetrics` measures whether they appear in Claude's response. Trend signal for memory-application quality, integrated into `devmemeval_spec.rb`.
|
|
83
|
+
- MCP server embeds the V=R/C ("Verify before Recommend / Correct") mental model in agent instructions so memory recommendations come with built-in verification cues.
|
|
84
|
+
|
|
85
|
+
**Schema v15 → v17 (additive only, automatic on first run)**
|
|
86
|
+
|
|
87
|
+
- Migration 015: adds `activity_events` table for hook/recall/context/sweep telemetry. Powers the dashboard timeline, moments feed, and efficacy reports.
|
|
88
|
+
- Migration 016: adds `moment_feedback` table (unique on event_id) for the dashboard 👍/👎 surface.
|
|
89
|
+
- Migration 017: adds nullable `facts.last_recalled_at` for access-based staleness scoring.
|
|
90
|
+
|
|
91
|
+
**1.0 readiness track**
|
|
92
|
+
|
|
93
|
+
- New `docs/1_0_punchlist.md` opens the path to 1.0: token-budget telemetry, hallucination-rate metric, negative-fact harm benchmark, CLAUDE.md baseline publication, `claude-memory show`, benchmark scoreboard. Ten entries (#47-56) added to `docs/improvements.md` with concrete file:line plumbing notes.
|
|
94
|
+
|
|
95
|
+
### Changed
|
|
96
|
+
|
|
97
|
+
- `Resolver#apply_conflict` no longer creates a duplicate disputed fact + conflict row when the same contradicting value is re-extracted. Looks up disputed facts in the same (subject, predicate) slot and reinforces with provenance instead.
|
|
98
|
+
- `Resolver` no longer treats the distiller's `scope_hint` as a scope override. `scope_hint` is advisory metadata; `fact.scope` must match the DB the row lives in. Earlier behavior caused scope leakage where global-hinted distillations landed in the project DB.
|
|
99
|
+
- `Hook::ContextInjector` adds `emitted_fact_ids` and `emitted_subjects` accessors so benchmark harnesses can attribute injection contributions per session.
|
|
100
|
+
- `SQLiteStore` decomposed via module inclusion: `LLMCache` and `MetricsAggregator` extracted into `lib/claude_memory/store/`. SQLiteStore back under 600 LOC.
|
|
101
|
+
- `Dashboard::API` decomposed: `FactPresenter`, `Conflicts`, `Efficacy::Reporter`, `Timeline`, `Health` extracted into dedicated classes following the boundary pattern. API now routes/delegates rather than aggregating.
|
|
102
|
+
- Dashboard releases DB connections after each HTTP request (was holding connections open for the lifetime of the WEBrick session).
|
|
103
|
+
- `Sweep::Maintenance` gains `dedupe_open_conflicts` and `reclassify_references` for the one-shot CLI commands above.
|
|
104
|
+
- Round-trip migration specs from v12, v13, v14 → v17 (per-version migrations covered by `spec/claude_memory/store/migrations/`). Codifies the release-blocker convention: any schema bump must round-trip from each prior major-release boundary back ~3 releases.
|
|
105
|
+
|
|
106
|
+
### Fixed
|
|
107
|
+
|
|
108
|
+
- Dashboard surfaces an actionable hint when Recall hits FTS5 corruption (run `claude-memory compact` rather than a generic error).
|
|
109
|
+
- Dashboard query tester unwraps the nested Recall result shape rather than printing the raw envelope.
|
|
110
|
+
- Dashboard health checks correctly detect the claude-memory hook installation across the two-level Claude Code hooks structure (was reporting false negatives when hooks were installed under a matcher block).
|
|
111
|
+
- Dashboard Efficacy "this session" correlation falls back to a time window when the recall event has no `session_id` (MCP tool calls don't thread session_id).
|
|
112
|
+
- Bulk-reject in the Conflicts modal now retries with an actionable message when the server-side state is stale.
|
|
113
|
+
|
|
114
|
+
### Upgrade Notes
|
|
115
|
+
|
|
116
|
+
**Schema bump v14 → v17.** Three migrations run automatically on first launch after upgrade. All three are additive (no existing data is rewritten):
|
|
117
|
+
|
|
118
|
+
1. Migration 015 creates `activity_events` (hook/recall telemetry).
|
|
119
|
+
2. Migration 016 creates `moment_feedback` (dashboard verdicts).
|
|
120
|
+
3. Migration 017 adds `facts.last_recalled_at` (NULL by default; `Sweep::RecallTimestampRefresher` populates it on the next sweep cycle from existing activity_events).
|
|
121
|
+
|
|
122
|
+
The migration delta has round-trip spec coverage in `spec/claude_memory/store/migrations/`. Forward-compatibility: 0.10.0 databases cannot be opened by 0.9.x or earlier. Downgrade is destructive — back up `~/.claude/memory.sqlite3` and `.claude/memory.sqlite3` before downgrading.
|
|
123
|
+
|
|
124
|
+
**Optional historical cleanups.** Two new admin commands address data tails left by earlier bugs that have since been fixed at the source:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
claude-memory dedupe-conflicts --dry-run # preview duplicate conflict rows
|
|
128
|
+
claude-memory dedupe-conflicts # consolidate them
|
|
129
|
+
claude-memory reclassify-references --dry-run # preview reference-material mislabels
|
|
130
|
+
claude-memory reclassify-references # retag them
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Both are opt-in. Neither runs in the regular sweep cycle. Use `--scope global` to clean the global DB.
|
|
134
|
+
|
|
135
|
+
**Telemetry footprint.** The `activity_events` table grows with hook activity. The dashboard surfaces this by default and powers the timeline/moments/efficacy panels. Retention pruning is not yet automatic (planned for a follow-up); manual cleanup via `DELETE FROM activity_events WHERE occurred_at < ?` is safe — the dashboard tolerates missing history.
|
|
136
|
+
|
|
7
137
|
## [0.9.1] - 2026-04-16
|
|
8
138
|
|
|
9
139
|
### Fixed
|
data/CLAUDE.md
CHANGED
|
@@ -163,7 +163,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
|
|
|
163
163
|
- Each command is a separate class (HelpCommand, DoctorCommand, etc.)
|
|
164
164
|
- All commands inherit from BaseCommand
|
|
165
165
|
- Dependency injection for I/O (stdout, stderr, stdin)
|
|
166
|
-
-
|
|
166
|
+
- 34 commands total, each focused on single responsibility
|
|
167
167
|
|
|
168
168
|
- **`Configuration`**: Centralized ENV access (`configuration.rb`)
|
|
169
169
|
- Single source of truth for paths and environment variables
|
|
@@ -208,6 +208,9 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
|
|
|
208
208
|
- **`Distill`**: Fact extraction interface (`distill/`)
|
|
209
209
|
- Pluggable distiller design (current: NullDistiller stub)
|
|
210
210
|
- Extracts entities, facts, scope hints from content
|
|
211
|
+
- `ReferenceMaterialDetector`: classifies "X is a plugin/library/tool" templates, LOC counts, "by Firstname Lastname" attributions as reference material. Runs in `ManagementHandlers#store_extraction` so mislabeling can't persist
|
|
212
|
+
- `BareConclusionDetector` (0.11.0+): production-side mirror of the SessionStart prompt's reason-clause requirement. Pure function — flags `decision` / `convention` facts whose object lacks a reason-clause signal ("because", "so that", "to avoid", etc.). Powers the `quality_score` metric on the Trust panel and the digest's Quality section.
|
|
213
|
+
- SessionStart distillation prompt enforces reason clauses ("because…", "so that…") for `decision` and `convention` predicates — bare conclusions are explicitly disallowed
|
|
211
214
|
|
|
212
215
|
- **`Resolve`**: Truth maintenance and conflict resolution (`resolve/`)
|
|
213
216
|
- Determines equivalence, supersession, or conflicts
|
|
@@ -226,7 +229,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
|
|
|
226
229
|
- Modes: shared (repo), local (uncommitted), home (user directory)
|
|
227
230
|
|
|
228
231
|
- **`MCP`**: Model Context Protocol server and tools (`mcp/`)
|
|
229
|
-
- Exposes memory tools to Claude Code (
|
|
232
|
+
- Exposes memory tools to Claude Code (25 tools total)
|
|
230
233
|
- `Telemetry`: Records tool invocations to `mcp_tool_calls` table for usage stats
|
|
231
234
|
- Dual content/structuredContent responses with compact mode
|
|
232
235
|
|
|
@@ -234,6 +237,7 @@ New MCP tools `memory.undistilled` and `memory.mark_distilled` support the pipel
|
|
|
234
237
|
- Reads stdin JSON from Claude Code hooks
|
|
235
238
|
- Routes to ingest/sweep/publish commands
|
|
236
239
|
- `DistillationRunner`: Manages context hook injection with undistilled content for LLM extraction
|
|
240
|
+
- `AutoMemoryMirror` (0.10.0): On fresh sessions, scans `~/.claude/projects/<slug>/memory/*.md` for new/changed entries and surfaces them as extraction candidates in the SessionStart context. State diffed by md5 in `.claude/auto_memory_mirror.json`; bounded to 5 candidates per session, 1500 chars each.
|
|
237
241
|
|
|
238
242
|
### Database Schema
|
|
239
243
|
|
|
@@ -246,16 +250,19 @@ Key tables (defined in `sqlite_store.rb`):
|
|
|
246
250
|
- `fact_links`: Supersession and conflict relationships
|
|
247
251
|
- `conflicts`: Open contradictions
|
|
248
252
|
- `mcp_tool_calls`: MCP server tool invocation telemetry (schema v13)
|
|
253
|
+
- `activity_events`: Hook/recall/context/sweep/nudge telemetry (schema v15) — powers the dashboard timeline, moments feed, efficacy reports. Event types: `hook_ingest`, `hook_context` (carries `context_tokens` since 0.11.0), `hook_sweep`, `hook_publish`, `recall`, `store_extraction`, `roi_nudge` (since 0.11.0).
|
|
254
|
+
- `moment_feedback`: Per-moment 👍/👎 verdicts with optional notes (schema v16) — unique on event_id, repeat clicks upsert
|
|
249
255
|
|
|
250
256
|
Facts include:
|
|
251
257
|
- `scope`: "global" or "project" (determines applicability)
|
|
252
258
|
- `project_path`: Set for project-scoped facts
|
|
253
259
|
- `valid_from`/`valid_to`: Temporal validity window
|
|
260
|
+
- `last_recalled_at` (schema v17): Set by `Sweep::RecallTimestampRefresher` from activity_events; powers `claude-memory stats --stale` and the dashboard's "stale" needs-review count
|
|
254
261
|
|
|
255
262
|
### Scope System
|
|
256
263
|
|
|
257
264
|
Facts are scoped to control where they apply:
|
|
258
|
-
- **project**: Current project only (e.g., "
|
|
265
|
+
- **project**: Current project only (e.g., "claude_memory uses SQLite for storage")
|
|
259
266
|
- **global**: All projects (e.g., "I prefer 4-space indentation")
|
|
260
267
|
|
|
261
268
|
Distiller detects signals like "always", "in all projects", "my preference" and sets `scope_hint: "global"`. Users can manually promote facts via `claude-memory promote <fact_id>` or the `memory.promote` MCP tool.
|
|
@@ -325,7 +332,7 @@ Also update `SECTION_MAP` if the predicate should appear in a specific snapshot
|
|
|
325
332
|
|
|
326
333
|
- `lib/claude_memory.rb`: Main module, requires, database path helpers
|
|
327
334
|
- `lib/claude_memory/cli.rb`: Thin command router (41 lines)
|
|
328
|
-
- `lib/claude_memory/commands/`: Individual command classes (
|
|
335
|
+
- `lib/claude_memory/commands/`: Individual command classes (34 commands)
|
|
329
336
|
- `lib/claude_memory/configuration.rb`: Centralized configuration and ENV access
|
|
330
337
|
- `lib/claude_memory/domain/`: Domain models (Fact, Entity, Provenance, Conflict)
|
|
331
338
|
- `lib/claude_memory/core/`: Value objects and null objects
|
|
@@ -340,14 +347,14 @@ Also update `SECTION_MAP` if the predicate should appear in a specific snapshot
|
|
|
340
347
|
|
|
341
348
|
The gem includes an MCP server (`claude-memory serve-mcp`) that exposes memory operations as tools. Configuration should be in `.mcp.json` at project root.
|
|
342
349
|
|
|
343
|
-
Available MCP tools (
|
|
350
|
+
Available MCP tools (25 total):
|
|
344
351
|
- **Query & Recall**: `memory.recall`, `memory.recall_index`, `memory.recall_details`, `memory.recall_semantic`, `memory.search_concepts`
|
|
345
352
|
- **Provenance**: `memory.explain`, `memory.fact_graph`
|
|
346
353
|
- **Shortcuts**: `memory.decisions`, `memory.conventions`, `memory.architecture`
|
|
347
354
|
- **Context**: `memory.facts_by_tool`, `memory.facts_by_context`
|
|
348
355
|
- **Management**: `memory.promote`, `memory.reject_fact`, `memory.store_extraction`
|
|
349
356
|
- **Distillation**: `memory.undistilled`, `memory.mark_distilled`
|
|
350
|
-
- **Monitoring**: `memory.status`, `memory.stats`, `memory.changes`, `memory.conflicts`
|
|
357
|
+
- **Monitoring**: `memory.status`, `memory.stats`, `memory.changes`, `memory.conflicts`, `memory.activity`
|
|
351
358
|
- **Maintenance**: `memory.sweep_now`
|
|
352
359
|
- **Discovery**: `memory.check_setup`, `memory.list_projects`
|
|
353
360
|
|
|
@@ -367,8 +374,25 @@ ClaudeMemory integrates with Claude Code via hooks in `.claude/settings.json`:
|
|
|
367
374
|
- Runs time-bounded maintenance on both databases
|
|
368
375
|
- Cleans up vec0 entries for superseded/expired facts
|
|
369
376
|
|
|
377
|
+
- **Nudge hook** (0.11.0+): Triggers on SessionEnd, fires after ingest+sweep
|
|
378
|
+
- Calls `claude-memory hook nudge`
|
|
379
|
+
- For the first 10 sessions only, prints "memory contributed N facts this session, %used = X" to stdout so new users see ROI inline before they discover the dashboard
|
|
380
|
+
- Records `roi_nudge` activity_events; quiets after `MAX_NUDGES` emissions
|
|
381
|
+
- Opt out with `CLAUDE_MEMORY_NO_NUDGE=1` (no event recorded on opt-out)
|
|
382
|
+
- Empty sessions (n=0) silently no-op so quiet sessions don't burn nudge slots
|
|
383
|
+
|
|
370
384
|
Hook commands read JSON payloads from stdin for robustness. Supports `--async` flag for non-blocking execution.
|
|
371
385
|
|
|
386
|
+
## Dashboard
|
|
387
|
+
|
|
388
|
+
Local web UI for inspecting memory state. Started via `claude-memory dashboard` (default port 3377). Reads from both global and project databases; no write side effects from page loads.
|
|
389
|
+
|
|
390
|
+
The dashboard is a thin web layer over the same `Recall`/`Conflicts`/`Trust`/`Moments`/`Knowledge`/`Reuse`/`Health`/`Timeline` classes the MCP server uses. Each panel is backed by a dedicated module under `lib/claude_memory/dashboard/`; `Dashboard::API` holds HTTP-shape glue and per-endpoint formatting (delegating non-trivial logic to the panel classes).
|
|
391
|
+
|
|
392
|
+
Connections are released after each request — never holds a WAL writer lock open across page loads.
|
|
393
|
+
|
|
394
|
+
See [docs/dashboard.md](docs/dashboard.md) for the user-facing guide (panels, common workflows, related CLI commands).
|
|
395
|
+
|
|
372
396
|
## Code Style
|
|
373
397
|
|
|
374
398
|
This project uses [Standard Ruby](https://github.com/standardrb/standard) for linting. Run `bundle exec rake standard:fix` before committing.
|
data/README.md
CHANGED
|
@@ -140,6 +140,69 @@ File-searchable questions ("what version is this?") and one-shot code generation
|
|
|
140
140
|
- **Claude-Powered**: Uses Claude's intelligence to extract facts (no API key needed)
|
|
141
141
|
- **Token Efficient**: 10x reduction in memory queries with progressive disclosure
|
|
142
142
|
- **Database Maintenance**: Compact, export, and backup commands
|
|
143
|
+
- **Built-in Observability** (0.10.0+): `claude-memory dashboard` opens a local web UI with a moments feed, trust panel (token budget, quality score, utilization, feedback), conflicts dedup, knowledge index, and 👍/👎 feedback. See **[Dashboard guide →](docs/dashboard.md)**. `claude-memory digest` writes a weekly markdown report (Activity, Context cost, Quality, New knowledge, Utilization, Conflicts, Feedback); `claude-memory show` prints what would be injected next SessionStart; `claude-memory census` audits the predicate vocabulary across projects.
|
|
144
|
+
|
|
145
|
+
## What's New in 0.11.0
|
|
146
|
+
|
|
147
|
+
Five user-visible signals so you can answer "is memory still worth it?" with
|
|
148
|
+
numbers, not vibes:
|
|
149
|
+
|
|
150
|
+
- **Token budget telemetry** — every SessionStart context injection now
|
|
151
|
+
records its estimated `context_tokens`. `claude-memory stats --tokens
|
|
152
|
+
[--since DAYS]` reports p50/p95/avg/min/max plus a histogram across
|
|
153
|
+
<500 / 500-1k / 1-2k / 2-5k / 5k+ buckets so you can see the per-session
|
|
154
|
+
cost at a glance. The dashboard's Trust panel and `claude-memory digest`
|
|
155
|
+
surface the same numbers.
|
|
156
|
+
- **Hallucination-rate metric** — the dashboard now scores how *clean* the
|
|
157
|
+
fact base is, not just how full it is. `Distill::BareConclusionDetector`
|
|
158
|
+
flags `decision` / `convention` facts that skipped the reason-clause
|
|
159
|
+
requirement. Trust panel shows `quality_score` (live 30-day window with
|
|
160
|
+
historical baseline beneath). `claude-memory digest` adds a Quality
|
|
161
|
+
section with rejection rate.
|
|
162
|
+
- **`claude-memory show`** — new command prints what memory *would* inject
|
|
163
|
+
at the next SessionStart in plain Markdown. Footer reports fact count,
|
|
164
|
+
~token estimate, and char count so you see the cost at a glance. Default
|
|
165
|
+
hides the raw-transcript "Pending Knowledge" dump for readability;
|
|
166
|
+
`--pending` opts in. `--source startup|resume|clear` simulates each
|
|
167
|
+
fresh-session entrypoint.
|
|
168
|
+
- **First-week ROI nudge** — at SessionEnd, memory now prints
|
|
169
|
+
`memory contributed N facts this session, %used = X` for the first 10
|
|
170
|
+
sessions, then quiets. Cold-start trust signal — you don't have to know
|
|
171
|
+
about the dashboard. Opt out with `CLAUDE_MEMORY_NO_NUDGE=1`.
|
|
172
|
+
- **Harm benchmark prototype** — first ClaudeMemory benchmark that
|
|
173
|
+
measures whether memory can make Claude *wrong*. Three hand-written
|
|
174
|
+
cases (stale-tech, mismatched-scope, superseded-but-undetected) under
|
|
175
|
+
`spec/benchmarks/e2e/harm_bench_spec.rb`. Real-mode run on the 0.11
|
|
176
|
+
release reported 0/3 harm; the full 10-15-case corpus + release gate
|
|
177
|
+
lands in 0.12.
|
|
178
|
+
|
|
179
|
+
## What's New in 0.10.0
|
|
180
|
+
|
|
181
|
+
Three behavior changes worth knowing about — they affect what you'll see in
|
|
182
|
+
extracted facts and SessionStart context, even if you don't change anything:
|
|
183
|
+
|
|
184
|
+
- **Auto-memory mirror** — On fresh sessions, the SessionStart context hook
|
|
185
|
+
scans `~/.claude/projects/<slug>/memory/*.md` and surfaces new or changed
|
|
186
|
+
entries as candidates for extraction into ClaudeMemory. You'll see a
|
|
187
|
+
"Pending Knowledge Extraction" section in Claude's startup context citing
|
|
188
|
+
files from your auto-memory directory. Claude reviews these and calls
|
|
189
|
+
`memory.store_extraction` for the high-signal ones; you don't need to
|
|
190
|
+
copy-paste manually anymore.
|
|
191
|
+
- **Why-clause enforcement** — When Claude distills `decision` and
|
|
192
|
+
`convention` facts, it's now required to embed a reason ("…because…",
|
|
193
|
+
"…so that…", "…to avoid…"). A bare conclusion is dead weight; a fact with
|
|
194
|
+
a reason stays useful when the situation changes. You'll see this
|
|
195
|
+
reflected in fact text being longer and more justified.
|
|
196
|
+
- **Reference predicate** — Active facts that look like reference material
|
|
197
|
+
(LOC counts, "X is a plugin/library/tool" templates, "by Firstname
|
|
198
|
+
Lastname" attributions) are auto-tagged `predicate=reference` instead of
|
|
199
|
+
`convention`. Keeps the conventions list signal-rich. Browse them in the
|
|
200
|
+
dashboard's Knowledge → References section, or run
|
|
201
|
+
`claude-memory reclassify-references --dry-run` to see candidates.
|
|
202
|
+
|
|
203
|
+
Plus: **staleness detection** (`claude-memory stats --stale`) lists active
|
|
204
|
+
facts that haven't been recalled in N days, so you can prune dead weight
|
|
205
|
+
explicitly. The dashboard's Trust → Needs review panel surfaces the count.
|
|
143
206
|
|
|
144
207
|
## Privacy Control
|
|
145
208
|
|
|
@@ -241,7 +304,8 @@ The uninstall command removes:
|
|
|
241
304
|
|
|
242
305
|
- 📖 [Getting Started](docs/GETTING_STARTED.md) - Step-by-step onboarding
|
|
243
306
|
- 💡 [Examples](docs/EXAMPLES.md) - Use cases and workflows
|
|
244
|
-
-
|
|
307
|
+
- 📊 [Dashboard](docs/dashboard.md) - Local web UI for inspection and trust signals (0.10.0+)
|
|
308
|
+
- 🔧 [Plugin Setup](docs/plugin.md) - Claude Code integration
|
|
245
309
|
- 🏗️ [Architecture](docs/architecture.md) - Technical deep dive
|
|
246
310
|
- 📝 [Changelog](CHANGELOG.md) - Release notes
|
|
247
311
|
|
|
@@ -292,7 +356,7 @@ The benchmark dataset draws from real CLAUDE.md patterns and is designed specifi
|
|
|
292
356
|
|
|
293
357
|
- **Language:** Ruby 3.2+
|
|
294
358
|
- **Storage:** SQLite3 (no external services)
|
|
295
|
-
- **Testing:**
|
|
359
|
+
- **Testing:** 1964 examples (~1700 unit/integration + ~250 benchmarks/evals), 100% core coverage
|
|
296
360
|
- **Code Style:** Standard Ruby
|
|
297
361
|
|
|
298
362
|
```bash
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Migration v15: Add activity_events table for debugging and observability
|
|
4
|
+
# Tracks hook executions, memory recalls, context injections, and sweep operations.
|
|
5
|
+
# Powers the dashboard timeline and efficacy reports.
|
|
6
|
+
Sequel.migration do
|
|
7
|
+
up do
|
|
8
|
+
create_table?(:activity_events) do
|
|
9
|
+
primary_key :id
|
|
10
|
+
String :event_type, null: false # "hook_ingest", "hook_context", "hook_sweep", "recall", "store_extraction"
|
|
11
|
+
String :session_id # Claude session that triggered the event
|
|
12
|
+
String :status, null: false # "success", "skipped", "error"
|
|
13
|
+
Integer :duration_ms # How long the operation took
|
|
14
|
+
String :detail_json, text: true # Event-specific details (JSON)
|
|
15
|
+
String :occurred_at, null: false # ISO 8601 timestamp
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
run "CREATE INDEX IF NOT EXISTS idx_activity_events_type ON activity_events(event_type)"
|
|
19
|
+
run "CREATE INDEX IF NOT EXISTS idx_activity_events_occurred_at ON activity_events(occurred_at)"
|
|
20
|
+
run "CREATE INDEX IF NOT EXISTS idx_activity_events_session ON activity_events(session_id)"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
down do
|
|
24
|
+
drop_table?(:activity_events)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Migration v16: Per-moment feedback (improvements.md #43).
|
|
4
|
+
# Tracks a single thumbs-up/down verdict (+ optional note) per activity_event
|
|
5
|
+
# so the dashboard can surface a trust-calibration signal. Unique on event_id
|
|
6
|
+
# so a given moment has at most one current verdict; repeat clicks upsert.
|
|
7
|
+
Sequel.migration do
|
|
8
|
+
up do
|
|
9
|
+
create_table?(:moment_feedback) do
|
|
10
|
+
primary_key :id
|
|
11
|
+
Integer :event_id, null: false
|
|
12
|
+
String :verdict, null: false # "up" | "down"
|
|
13
|
+
String :note, text: true # optional freeform note
|
|
14
|
+
String :recorded_at, null: false
|
|
15
|
+
index :event_id, unique: true
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
down do
|
|
20
|
+
drop_table?(:moment_feedback)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Migration v17: Access-based staleness scoring (improvements.md #35).
|
|
4
|
+
# Records the last time a fact was surfaced via memory.recall or context
|
|
5
|
+
# injection, derived periodically from activity_events. Sweep-derived rather
|
|
6
|
+
# than per-call so we avoid WAL write contention on the recall hot path.
|
|
7
|
+
Sequel.migration do
|
|
8
|
+
up do
|
|
9
|
+
add_column :facts, :last_recalled_at, String
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
down do
|
|
13
|
+
drop_column :facts, :last_recalled_at
|
|
14
|
+
end
|
|
15
|
+
end
|