claude_memory 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/rules/claude_memory.generated.md +54 -85
- data/.claude/skills/release/SKILL.md +44 -6
- data/.claude/skills/study-repo/SKILL.md +15 -0
- data/.claude-plugin/commands/audit-memory.md +68 -0
- data/.claude-plugin/marketplace.json +1 -1
- data/.claude-plugin/plugin.json +2 -4
- data/CHANGELOG.md +50 -0
- data/CLAUDE.md +11 -4
- data/README.md +40 -1
- data/db/migrations/018_add_otel_telemetry.rb +81 -0
- data/docs/1_0_punchlist.md +318 -66
- data/docs/api_stability.md +346 -0
- data/docs/audit_runbook.md +209 -0
- data/docs/claude_monitoring.md +956 -0
- data/docs/improvements.md +148 -9
- data/docs/influence/ai-memory-systems-2026.md +403 -0
- data/docs/memory_audit_2026-05-21.md +303 -0
- data/docs/plugin.md +1 -1
- data/docs/soak/audit_2026-06-03_agent-training-program.json +53 -0
- data/docs/soak/audit_2026-06-03_agentic.json +31 -0
- data/docs/soak/audit_2026-06-03_ai-software-architect.json +19 -0
- data/docs/soak/audit_2026-06-03_chaos_to_the_rescue.json +60 -0
- data/docs/soak/audit_2026-06-03_claude_memory.json +55 -0
- data/docs/soak/audit_2026-06-03_daily-vibe.json +59 -0
- data/docs/soak/audit_2026-06-03_minerva-sky.json +19 -0
- data/docs/soak/audit_2026-06-03_nowreading.dev.json +19 -0
- data/docs/soak/audit_2026-06-03_ups.dev.json +55 -0
- data/docs/soak/baseline_2026-06-03.md +145 -0
- data/lib/claude_memory/audit/checks.rb +239 -0
- data/lib/claude_memory/audit/finding.rb +33 -0
- data/lib/claude_memory/audit/runner.rb +73 -0
- data/lib/claude_memory/commands/audit_command.rb +117 -0
- data/lib/claude_memory/commands/checks/embeddings_check.rb +97 -0
- data/lib/claude_memory/commands/dashboard_command.rb +2 -1
- data/lib/claude_memory/commands/doctor_command.rb +1 -0
- data/lib/claude_memory/commands/import_auto_memory_command.rb +180 -0
- data/lib/claude_memory/commands/otel_command.rb +240 -0
- data/lib/claude_memory/commands/registry.rb +5 -1
- data/lib/claude_memory/commands/setup_vectors_command.rb +182 -0
- data/lib/claude_memory/configuration.rb +60 -0
- data/lib/claude_memory/core/fact_query_builder.rb +1 -0
- data/lib/claude_memory/dashboard/api.rb +8 -0
- data/lib/claude_memory/dashboard/index.html +140 -1
- data/lib/claude_memory/dashboard/prompt_journey.rb +48 -0
- data/lib/claude_memory/dashboard/server.rb +86 -0
- data/lib/claude_memory/dashboard/telemetry.rb +156 -0
- data/lib/claude_memory/deprecations.rb +106 -0
- data/lib/claude_memory/distill/reference_material_detector.rb +37 -4
- data/lib/claude_memory/hook/auto_memory_mirror.rb +7 -3
- data/lib/claude_memory/hook/context_injector.rb +11 -2
- data/lib/claude_memory/mcp/tool_definitions.rb +3 -3
- data/lib/claude_memory/otel/attributes.rb +118 -0
- data/lib/claude_memory/otel/constants.rb +32 -0
- data/lib/claude_memory/otel/ingestor.rb +54 -0
- data/lib/claude_memory/otel/otlp_json_envelope.rb +254 -0
- data/lib/claude_memory/otel/prompt_scope.rb +108 -0
- data/lib/claude_memory/otel/settings_writer.rb +122 -0
- data/lib/claude_memory/otel/status.rb +58 -0
- data/lib/claude_memory/recall/staleness_annotator.rb +73 -0
- data/lib/claude_memory/resolve/predicate_policy.rb +17 -1
- data/lib/claude_memory/resolve/resolver.rb +30 -3
- data/lib/claude_memory/shortcuts.rb +61 -18
- data/lib/claude_memory/store/prompt_journey_query.rb +87 -0
- data/lib/claude_memory/store/schema_manager.rb +1 -1
- data/lib/claude_memory/store/sqlite_store.rb +136 -0
- data/lib/claude_memory/sweep/maintenance.rb +31 -1
- data/lib/claude_memory/sweep/sweeper.rb +6 -0
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +20 -0
- metadata +38 -1
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ok": false,
|
|
3
|
+
"checks_run": 10,
|
|
4
|
+
"counts": {
|
|
5
|
+
"error": 1,
|
|
6
|
+
"warn": 0,
|
|
7
|
+
"info": 1
|
|
8
|
+
},
|
|
9
|
+
"stats": {
|
|
10
|
+
"global": {
|
|
11
|
+
"active_facts": 4,
|
|
12
|
+
"predicate_counts": {
|
|
13
|
+
"convention": 4
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"project": {
|
|
17
|
+
"active_facts": 15,
|
|
18
|
+
"predicate_counts": {
|
|
19
|
+
"convention": 13,
|
|
20
|
+
"decision": 1,
|
|
21
|
+
"uses_framework": 1
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"findings": [
|
|
26
|
+
{
|
|
27
|
+
"id": "C003",
|
|
28
|
+
"severity": "error",
|
|
29
|
+
"title": "107 content items not yet deeply distilled",
|
|
30
|
+
"detail": "Backlog grows when SessionStart distillation prompts aren't acknowledged with memory.mark_distilled. A large backlog means the same text gets re-extracted across sessions, increasing hallucination rate.",
|
|
31
|
+
"suggestion": "Triage with /distill-transcripts (interactive) OR mark all distilled if you accept the backlog is noise: claude-memory sweep --mark-all-distilled",
|
|
32
|
+
"fact_ids": []
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"id": "C007",
|
|
36
|
+
"severity": "info",
|
|
37
|
+
"title": "79% of decisions/conventions lack reason clauses (11/14)",
|
|
38
|
+
"detail": "Facts without 'because/so that/to avoid/...' lose their justification once context fades. Bare conclusions are dead weight when the team grows or you revisit a year later.",
|
|
39
|
+
"suggestion": "Inspect with: claude-memory explain <fact_id>. Reject low-value bare facts or rewrite with reason clauses via memory.store_extraction.",
|
|
40
|
+
"fact_ids": [
|
|
41
|
+
2,
|
|
42
|
+
3,
|
|
43
|
+
4,
|
|
44
|
+
5,
|
|
45
|
+
6,
|
|
46
|
+
8,
|
|
47
|
+
9,
|
|
48
|
+
10,
|
|
49
|
+
12,
|
|
50
|
+
14,
|
|
51
|
+
15
|
|
52
|
+
]
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# 0.12 Soak Baseline — 2026-06-03
|
|
2
|
+
|
|
3
|
+
Day-0 audit snapshot taken across this repo plus three real-world projects that have used claude_memory at various points. Baseline for tracking memory-health drift through the 0.12 → 1.0 soak window (target ship ~2026-06-22 to 2026-07-01).
|
|
4
|
+
|
|
5
|
+
## Day-0 hygiene pass (this repo only, 2026-06-03)
|
|
6
|
+
|
|
7
|
+
Before locking in the baseline, ran a focused cleanup on `claude_memory`'s own memory state:
|
|
8
|
+
|
|
9
|
+
1. **Rejected 9 auto-memory duplicates** (IDs 30, 31, 36, 81, 82, 87, 143, 144, 146) — older terse facts whose auto-memory-imported richer versions (in the 213/218/225/227/228/229/230/231 range) cover the same ground better.
|
|
10
|
+
2. **Appended explicit reason clauses to 38 bare-conclusion facts** via single transactional SQL UPDATE — each fact's prose already implied a `because` / `so that` / `to avoid` / etc., the BareConclusionDetector just needs the literal marker. The appends are documented per-ID in `git log -p .claude/memory.sqlite3` (commit details).
|
|
11
|
+
|
|
12
|
+
**Effect on `claude_memory` audit:**
|
|
13
|
+
- C007 bare-conclusion ratio: **89% (47/53) → 0% (0/44)** ✅
|
|
14
|
+
- Project active facts: 76 → 67 (the 9 dup rejects)
|
|
15
|
+
- Audit `ok`: false → true (no errors, two C010 warnings remain as historical scar-tissue, one C009 info)
|
|
16
|
+
|
|
17
|
+
Other projects were not touched — their bare-conclusion ratios (79–100%) remain as captured below and are the right reference points for measuring cross-project drift.
|
|
18
|
+
|
|
19
|
+
Audit JSONs preserved at `docs/soak/audit_2026-06-03_<project>.json` so each can be diffed against future snapshots.
|
|
20
|
+
|
|
21
|
+
## Installed gem state
|
|
22
|
+
|
|
23
|
+
- `claude-memory --version` → **0.12.0**
|
|
24
|
+
- Binary path: `/Users/valentinostoll/.gem/ruby/4.0.2/bin/claude-memory`
|
|
25
|
+
- Every project's hooks resolve to the same global binary, so "upgrading" is a single `rake install` from `claude_memory/` — already done at release time.
|
|
26
|
+
|
|
27
|
+
## Cross-project comparison (9 projects)
|
|
28
|
+
|
|
29
|
+
Captured against installed gem **0.12.0**. Audit JSONs at `docs/soak/audit_2026-06-03_<project>.json`.
|
|
30
|
+
|
|
31
|
+
| Project | OK | Err | Warn | Info | Active facts | Setup state | Notable findings |
|
|
32
|
+
|---|---|---|---|---|---|---|---|
|
|
33
|
+
| `claude_memory` | ✅ | 0 | 2 | 1 | 67 | full | Post Day-0 cleanup; **0% bare conclusions** (was 89%); 2 C010 historical-churn warns |
|
|
34
|
+
| `agent-training-program` | ✅ | 0 | 0 | 2 | 27 | DB only | Post Day-0 cleanup; **35% bare** (was 100%); 24 unimported auto-memory files |
|
|
35
|
+
| `nowreading.dev` | ✅ | 0 | 0 | 0 | 0 | none | No project DB; not in use |
|
|
36
|
+
| `ups.dev` | ❌ | 1 | 0 | 1 | 15 | full | **107-item distillation backlog (C003)**; 79% bare |
|
|
37
|
+
| `agentic` | ✅ | 0 | 1 | 0 | 0 | full | **C008 starvation** — DB exists with 0 active facts (rejected or never ingested) |
|
|
38
|
+
| `ai-software-architect` | ✅ | 0 | 0 | 0 | 0 | settings only | Hooks configured, never received a session that wrote facts |
|
|
39
|
+
| `chaos_to_the_rescue` | ✅ | 0 | 1 | 1 | 17 | DB only | **46-item distillation backlog (C003 warn)**; bare-conclusion ratio not flagged (clean? — needs spot check); no settings.json |
|
|
40
|
+
| `daily-vibe` | ❌ | 1 | 0 | 1 | 17 | full | **160-item distillation backlog (C003 error)** — biggest in the set; 79% bare |
|
|
41
|
+
| `minerva-sky` | ✅ | 0 | 0 | 0 | 0 | none | Never set up |
|
|
42
|
+
|
|
43
|
+
All projects' global stores aligned at **4 active facts** (same `~/.claude/memory.sqlite3`).
|
|
44
|
+
|
|
45
|
+
## Cross-project patterns (cleaner view with 9 projects)
|
|
46
|
+
|
|
47
|
+
### Distillation backlog is structural, not project-specific
|
|
48
|
+
|
|
49
|
+
Three of the six setups with project DBs have meaningful C003 backlogs:
|
|
50
|
+
|
|
51
|
+
| Project | Backlog (undistilled items) |
|
|
52
|
+
|---|---|
|
|
53
|
+
| `daily-vibe` | **160** (Error) |
|
|
54
|
+
| `ups.dev` | **107** (Error) |
|
|
55
|
+
| `chaos_to_the_rescue` | 46 (Warn) |
|
|
56
|
+
|
|
57
|
+
That's a cross-project signal: Layer-2 SessionStart context injection isn't catching up in projects with long-lived sessions and steady transcript intake. The injection only fires on fresh sessions (`startup` / `resume` / `clear` per `Hook::ContextInjector#fresh_session?`); long sessions without restart silently skip the distillation prompt indefinitely.
|
|
58
|
+
|
|
59
|
+
**0.13 candidate:** add a CLI command (e.g. `claude-memory distill --drain` or similar) that processes undistilled items in batches without requiring an in-Claude-Code session, OR auto-trigger Layer-2 injection mid-session after N new content items accumulate. Without this, the backlog grows monotonically in any actively-used project.
|
|
60
|
+
|
|
61
|
+
### Setup drift is a real failure mode
|
|
62
|
+
|
|
63
|
+
Three out of six "set-up at some point" projects have partial/stale state:
|
|
64
|
+
|
|
65
|
+
- `agent-training-program`, `chaos_to_the_rescue`: DB present, no `settings.json` — hooks aren't firing locally; the DB is frozen at the last point hooks ran.
|
|
66
|
+
- `ai-software-architect`: `settings.json` present, no DB — hooks configured but never accumulated facts.
|
|
67
|
+
|
|
68
|
+
Today the audit doesn't surface this — `claude-memory check-setup` MCP tool covers it, but it's not in the CLI audit. **0.13 candidate:** an audit check that flags the DB/settings-disagreement state.
|
|
69
|
+
|
|
70
|
+
### Detector regex strictness (newly discovered)
|
|
71
|
+
|
|
72
|
+
The C007 pass on `agent-training-program` exposed a detector blind spot. `BareConclusionDetector::REASON_PATTERNS` accepts `to avoid / prevent / ensure / support / allow / enable / make / fix / handle` — but rejects natural variants like `to keep / capture / preserve / match` even though those are equally valid causal phrasings. 7 of the 20 reason-clause appends on `agent-training-program` used non-accepted verbs and re-flagged. **0.13 candidate:** widen the regex (or move to a small embedding-based check) so the visibility signal isn't punishing valid reasoning.
|
|
73
|
+
|
|
74
|
+
### C007 effect of the Day-0 cleanup
|
|
75
|
+
|
|
76
|
+
| Project | Pre-cleanup | Post-cleanup |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| `claude_memory` | 89% (47/53) | **0% (0/44)** |
|
|
79
|
+
| `agent-training-program` | 100% (24/24) | **35% (7/20)** |
|
|
80
|
+
|
|
81
|
+
Untouched: `ups.dev` 79%, `daily-vibe` 79%, `chaos_to_the_rescue` (not flagged but worth a spot check). These remain reference points for measuring the bare-conclusion-rate drift over the soak window.
|
|
82
|
+
|
|
83
|
+
## Cross-project patterns
|
|
84
|
+
|
|
85
|
+
### Universal: bare-conclusion rate (C007)
|
|
86
|
+
|
|
87
|
+
Every active project carries the long-tail of pre-0.11 facts without reason clauses:
|
|
88
|
+
|
|
89
|
+
- `claude_memory`: 89% (47/53)
|
|
90
|
+
- `agent-training-program`: **100%** (24/24)
|
|
91
|
+
- `ups.dev`: 79% (11/14)
|
|
92
|
+
|
|
93
|
+
The 0.11 reason-clause distillation prompt only protects *newly extracted* facts. Existing fact bases everywhere need either incremental rewrite or selective reject. This is the most awkward signal for the 1.0 visibility pillar.
|
|
94
|
+
|
|
95
|
+
### Activation drift
|
|
96
|
+
|
|
97
|
+
Two projects have `.claude/memory.sqlite3` but no `.claude/settings.json` — they were set up at some point and then drifted. Effect: hooks don't fire locally, so memory stops accumulating but the DB lingers. Worth a one-line `claude-memory init` to reactivate (or formal removal if abandoned).
|
|
98
|
+
|
|
99
|
+
### Distillation backlog (`ups.dev`)
|
|
100
|
+
|
|
101
|
+
107 content items ingested but never deeply distilled. Layer 2 (SessionStart context injection acting as distiller) isn't catching up. Two paths:
|
|
102
|
+
|
|
103
|
+
1. Run `/distill-transcripts` in `ups.dev` to clear the backlog.
|
|
104
|
+
2. Investigate why Layer 2 isn't keeping up — context-hook injection of pending items only fires on fresh sessions (`startup`/`resume`/`clear`); if `ups.dev` sessions are long-lived without restart, the prompt never appears.
|
|
105
|
+
|
|
106
|
+
## What to watch during soak (per project)
|
|
107
|
+
|
|
108
|
+
Re-run on a weekly cadence and diff against the 2026-06-03 baseline. Five-signal watch:
|
|
109
|
+
|
|
110
|
+
| Signal | Tool | Threshold to investigate |
|
|
111
|
+
|---|---|---|
|
|
112
|
+
| Harm rate | `EVAL_MODE=real HARM_BENCH_RUNS=3 bundle exec rspec spec/benchmarks/e2e/harm_bench_spec.rb` (claude_memory repo only) | Any harm = patch before 1.0 |
|
|
113
|
+
| Open conflicts | `claude-memory audit --json` per project | Any growing trend |
|
|
114
|
+
| Bare-conclusion ratio | `audit` C007 | Worsens (the floor is "stays at current %") |
|
|
115
|
+
| Distillation backlog (C003) | `audit` C003 | Grows in any project that was zero |
|
|
116
|
+
| Active fact count drift | `audit.stats.project.active_facts` | Sudden spikes (re-contamination) or drops (mass reject) |
|
|
117
|
+
|
|
118
|
+
## Suggested 0.12.x patch candidates (optional, not 1.0 blockers)
|
|
119
|
+
|
|
120
|
+
- ✅ `claude_memory` C007 cleanup (done 2026-06-03).
|
|
121
|
+
- ✅ `agent-training-program` C007 partial cleanup (done 2026-06-03; 100% → 35%; residual 7 facts blocked by detector regex strictness — see 0.13 candidate below).
|
|
122
|
+
- `agent-training-program`: run `claude-memory import-auto-memory` once to drain the 24 pending auto-memory files. Fast win.
|
|
123
|
+
- `agent-training-program` and `chaos_to_the_rescue`: re-add `.claude/settings.json` if the projects are still active, otherwise archive the DBs.
|
|
124
|
+
- `ups.dev`, `daily-vibe`, `chaos_to_the_rescue`: distillation backlogs (107/160/46). `/distill-transcripts` is a Claude Code skill that runs *in-session* in each project — I can't invoke it from the `claude_memory` repo. Two paths: (a) open Claude Code in each affected project and run `/distill-transcripts`; (b) wait for the 0.13 CLI drain command (see 0.13 candidates below).
|
|
125
|
+
- `agentic` C008 starvation: DB has 0 active facts despite settings.json being configured. Either no Claude Code sessions have written facts there yet, or all facts have been rejected. Investigate when convenient.
|
|
126
|
+
- `nowreading.dev`, `minerva-sky`: decide — initialize with claude_memory or accept "untouched" as the steady state.
|
|
127
|
+
|
|
128
|
+
## 0.13 candidates surfaced by this cross-project audit
|
|
129
|
+
|
|
130
|
+
1. **Drainable distillation backlog.** Add a CLI command that processes undistilled content items without requiring a fresh Claude Code session, or trigger Layer-2 injection mid-session when the backlog exceeds N items. The current Layer-2 path only fires on `fresh_session?` (startup/resume/clear), so long-lived sessions silently accrue backlog forever. Most impactful 0.13 item per the cross-project signal — 3 of 6 active-setup projects are affected, one with 160 items.
|
|
131
|
+
2. **Setup-drift audit check.** New C-code check that compares `.claude/memory.sqlite3` presence against `.claude/settings.json` presence and flags the disagreement state. Three of nine projects today have one without the other.
|
|
132
|
+
3. **Widen `BareConclusionDetector::REASON_PATTERNS`.** Accept `to keep / capture / preserve / match / record / track / store / serve / protect / safeguard` (and similar) — the current allowlist is too narrow and punishes natural reasoning. Surfaced when 7 of 20 honest reason-clause appends on `agent-training-program` failed the regex even though each contained valid causal language.
|
|
133
|
+
4. **Cross-project audit aggregation.** A `claude-memory audit --multi <dir>...` mode that runs the audit across N projects and emits one comparison table. The Day-0 baseline doc had to be assembled by hand; should be automated by 1.0 if cross-project soak comparisons are going to be a recurring practice.
|
|
134
|
+
|
|
135
|
+
## Reproducing this snapshot
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
mkdir -p docs/soak
|
|
139
|
+
./exe/claude-memory audit --json --no-exit > docs/soak/audit_<DATE>_claude_memory.json
|
|
140
|
+
(cd ../agent-training-program && claude-memory audit --json --no-exit) > docs/soak/audit_<DATE>_agent-training-program.json
|
|
141
|
+
(cd ../nowreading.dev && claude-memory audit --json --no-exit) > docs/soak/audit_<DATE>_nowreading.dev.json
|
|
142
|
+
(cd ../ups.dev && claude-memory audit --json --no-exit) > docs/soak/audit_<DATE>_ups.dev.json
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Diff against this baseline with `jq` or any JSON diff tool of choice.
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Audit
|
|
5
|
+
# Individual audit checks. Each method takes a Store::StoreManager and
|
|
6
|
+
# returns an Array<Finding>. Checks must be read-only — write
|
|
7
|
+
# operations belong in dedicated commands the user opts into.
|
|
8
|
+
#
|
|
9
|
+
# Adding a new check:
|
|
10
|
+
# 1. Define a method here with an explicit C### id assignment.
|
|
11
|
+
# 2. Append the method name to Runner::CHECK_METHODS.
|
|
12
|
+
# 3. Document it in docs/audit_runbook.md.
|
|
13
|
+
module Checks
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
# C001 — Open conflicts in either DB.
|
|
17
|
+
def open_conflicts(manager)
|
|
18
|
+
findings = []
|
|
19
|
+
{project: manager.store_if_exists("project"), global: manager.store_if_exists("global")}.each do |scope, store|
|
|
20
|
+
next unless store
|
|
21
|
+
conflicts = store.open_conflicts
|
|
22
|
+
next if conflicts.empty?
|
|
23
|
+
findings << Finding.new(
|
|
24
|
+
id: "C001",
|
|
25
|
+
severity: :error,
|
|
26
|
+
title: "#{conflicts.size} open conflict(s) in #{scope} DB",
|
|
27
|
+
detail: "Open conflicts indicate unresolved single-cardinality disputes. Each will keep re-firing until the losing fact is rejected.",
|
|
28
|
+
suggestion: "claude-memory conflicts && claude-memory reject <fact_id>",
|
|
29
|
+
fact_ids: conflicts.flat_map { |c| [c[:fact_a_id], c[:fact_b_id]] }.uniq
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
findings
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# C002 — Single-cardinality predicates with > 1 active fact.
|
|
36
|
+
SINGLE_CARDINALITY_PREDICATES = %w[uses_database deployment_platform auth_method].freeze
|
|
37
|
+
|
|
38
|
+
def single_cardinality_multiplicity(manager)
|
|
39
|
+
store = manager.store_if_exists("project")
|
|
40
|
+
return [] unless store
|
|
41
|
+
|
|
42
|
+
SINGLE_CARDINALITY_PREDICATES.flat_map do |predicate|
|
|
43
|
+
rows = store.facts.where(status: "active", predicate: predicate).all
|
|
44
|
+
next [] if rows.size <= 1
|
|
45
|
+
[Finding.new(
|
|
46
|
+
id: "C002",
|
|
47
|
+
severity: :error,
|
|
48
|
+
title: "predicate=#{predicate} has #{rows.size} active facts (single-cardinality)",
|
|
49
|
+
detail: "Single-cardinality predicates must have at most one active value. Multiple actives mean resolver dropped a supersession or distillation produced contradictory claims.",
|
|
50
|
+
suggestion: "Inspect with: claude-memory explain <fact_id>. Reject the wrong ones: claude-memory reject <fact_id>",
|
|
51
|
+
fact_ids: rows.map { |r| r[:id] }
|
|
52
|
+
)]
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# C003 — Distillation backlog (warn ≥ 25, error ≥ 100).
|
|
57
|
+
def distillation_backlog(manager)
|
|
58
|
+
store = manager.store_if_exists("project")
|
|
59
|
+
return [] unless store
|
|
60
|
+
distilled_ids = store.ingestion_metrics.select(:content_item_id).distinct
|
|
61
|
+
pending = store.content_items.exclude(id: distilled_ids).count
|
|
62
|
+
return [] if pending < 25
|
|
63
|
+
|
|
64
|
+
severity = (pending >= 100) ? :error : :warn
|
|
65
|
+
[Finding.new(
|
|
66
|
+
id: "C003",
|
|
67
|
+
severity: severity,
|
|
68
|
+
title: "#{pending} content items not yet deeply distilled",
|
|
69
|
+
detail: "Backlog grows when SessionStart distillation prompts aren't acknowledged with memory.mark_distilled. A large backlog means the same text gets re-extracted across sessions, increasing hallucination rate.",
|
|
70
|
+
suggestion: "Triage with /distill-transcripts (interactive) OR mark all distilled if you accept the backlog is noise: claude-memory sweep --mark-all-distilled",
|
|
71
|
+
fact_ids: []
|
|
72
|
+
)]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# C004 — memory.decisions leaking non-decision predicates.
|
|
76
|
+
def shortcut_decision_leak(manager)
|
|
77
|
+
results = Shortcuts.decisions(manager, limit: 50)
|
|
78
|
+
leaked = results.map { |r| r[:fact][:predicate] }.uniq - ["decision"]
|
|
79
|
+
return [] if leaked.empty?
|
|
80
|
+
|
|
81
|
+
[Finding.new(
|
|
82
|
+
id: "C004",
|
|
83
|
+
severity: :error,
|
|
84
|
+
title: "memory.decisions returns non-decision predicates: #{leaked.inspect}",
|
|
85
|
+
detail: "memory.decisions should return only `decision`-predicate facts. Predicate leakage suggests the shortcut implementation has regressed back to text-search filtering (pre-2026-05-21 audit).",
|
|
86
|
+
suggestion: "Inspect lib/claude_memory/shortcuts.rb — SHORTCUTS[:decisions][:predicates] should equal ['decision']. Run `bundle exec rspec spec/claude_memory/shortcuts_spec.rb`.",
|
|
87
|
+
fact_ids: results.select { |r| leaked.include?(r[:fact][:predicate]) }.map { |r| r[:fact][:id] }
|
|
88
|
+
)]
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# C005 — memory.conventions returns no project facts despite project conventions existing.
|
|
92
|
+
def shortcut_convention_scope(manager)
|
|
93
|
+
project_store = manager.store_if_exists("project")
|
|
94
|
+
return [] unless project_store
|
|
95
|
+
project_count = project_store.facts.where(status: "active", predicate: "convention").count
|
|
96
|
+
return [] if project_count.zero?
|
|
97
|
+
|
|
98
|
+
results = Shortcuts.conventions(manager, limit: 50)
|
|
99
|
+
project_returned = results.count { |r| r[:source] == "project" }
|
|
100
|
+
return [] if project_returned > 0
|
|
101
|
+
|
|
102
|
+
[Finding.new(
|
|
103
|
+
id: "C005",
|
|
104
|
+
severity: :warn,
|
|
105
|
+
title: "memory.conventions returned 0 project facts despite #{project_count} project conventions existing",
|
|
106
|
+
detail: "Pre-2026-05-21 audit, memory.conventions was hardcoded to scope=global. If you're seeing 0 project facts in a project with conventions, the shortcut has regressed.",
|
|
107
|
+
suggestion: "Check Shortcuts.collect_facts in lib/claude_memory/shortcuts.rb. Re-run `bundle exec rspec spec/claude_memory/shortcuts_spec.rb`.",
|
|
108
|
+
fact_ids: []
|
|
109
|
+
)]
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# C006 — Duplicate global convention candidates (near-identical text).
|
|
113
|
+
def duplicate_global_conventions(manager)
|
|
114
|
+
store = manager.store_if_exists("global")
|
|
115
|
+
return [] unless store
|
|
116
|
+
rows = store.facts.where(status: "active", predicate: "convention").select(:id, :object_literal).all
|
|
117
|
+
return [] if rows.size < 2
|
|
118
|
+
|
|
119
|
+
# Group by normalized object text (lowercased, stripped of leading
|
|
120
|
+
# "uses"/"prefers"/punctuation). Pairs with the same normalized
|
|
121
|
+
# key are likely near-duplicates.
|
|
122
|
+
groups = rows.group_by { |r| normalize_convention(r[:object_literal]) }
|
|
123
|
+
dupe_groups = groups.select { |_, list| list.size > 1 }
|
|
124
|
+
return [] if dupe_groups.empty?
|
|
125
|
+
|
|
126
|
+
[Finding.new(
|
|
127
|
+
id: "C006",
|
|
128
|
+
severity: :info,
|
|
129
|
+
title: "#{dupe_groups.size} near-duplicate global convention group(s)",
|
|
130
|
+
detail: "Multiple global conventions normalize to the same phrasing. Pick the cleanest and reject the rest to keep memory.conventions output tight.",
|
|
131
|
+
suggestion: "Review with: claude-memory recall <concept> --scope=global. Reject duplicates: claude-memory reject <fact_id>",
|
|
132
|
+
fact_ids: dupe_groups.values.flatten.map { |r| r[:id] }
|
|
133
|
+
)]
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# C007 — Bare-conclusion decisions/conventions (no reason clause).
|
|
137
|
+
def bare_conclusion_rate(manager)
|
|
138
|
+
store = manager.store_if_exists("project")
|
|
139
|
+
return [] unless store
|
|
140
|
+
detector = Distill::BareConclusionDetector.new
|
|
141
|
+
rows = store.facts.where(status: "active", predicate: %w[decision convention]).select(:id, :predicate, :object_literal).all
|
|
142
|
+
bare = rows.select { |r| detector.bare_conclusion?(predicate: r[:predicate], object_literal: r[:object_literal]) }
|
|
143
|
+
return [] if bare.empty?
|
|
144
|
+
|
|
145
|
+
ratio = bare.size.to_f / rows.size
|
|
146
|
+
return [] if ratio < 0.3
|
|
147
|
+
|
|
148
|
+
[Finding.new(
|
|
149
|
+
id: "C007",
|
|
150
|
+
severity: :info,
|
|
151
|
+
title: "#{(ratio * 100).round}% of decisions/conventions lack reason clauses (#{bare.size}/#{rows.size})",
|
|
152
|
+
detail: "Facts without 'because/so that/to avoid/...' lose their justification once context fades. Bare conclusions are dead weight when the team grows or you revisit a year later.",
|
|
153
|
+
suggestion: "Inspect with: claude-memory explain <fact_id>. Reject low-value bare facts or rewrite with reason clauses via memory.store_extraction.",
|
|
154
|
+
fact_ids: bare.map { |r| r[:id] }
|
|
155
|
+
)]
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# C008 — Project DB starvation (< 5 active facts may indicate broken ingest).
|
|
159
|
+
def project_starvation(manager)
|
|
160
|
+
store = manager.store_if_exists("project")
|
|
161
|
+
return [] unless store
|
|
162
|
+
count = store.facts.where(status: "active").count
|
|
163
|
+
return [] if count >= 5
|
|
164
|
+
|
|
165
|
+
[Finding.new(
|
|
166
|
+
id: "C008",
|
|
167
|
+
severity: :warn,
|
|
168
|
+
title: "Only #{count} active project fact(s)",
|
|
169
|
+
detail: "A nearly-empty project DB suggests either a fresh install (ignore) OR a broken ingest pipeline / overzealous rejection. Verify hooks are firing: claude-memory doctor.",
|
|
170
|
+
suggestion: "claude-memory doctor; claude-memory stats; check .claude/settings.json hook configuration.",
|
|
171
|
+
fact_ids: []
|
|
172
|
+
)]
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# C009 — Auto-memory drift (markdown files newer than project DB facts).
|
|
176
|
+
def auto_memory_unimported(manager)
|
|
177
|
+
config = Configuration.new
|
|
178
|
+
dir = Hook::AutoMemoryMirror.default_dir(config.project_dir, config.claude_config_dir)
|
|
179
|
+
return [] unless Dir.exist?(dir)
|
|
180
|
+
|
|
181
|
+
md_files = Dir.glob(File.join(dir, "*.md")).reject { |f| File.basename(f) == "MEMORY.md" }
|
|
182
|
+
return [] if md_files.empty?
|
|
183
|
+
|
|
184
|
+
store = manager.store_if_exists("project")
|
|
185
|
+
return [] unless store
|
|
186
|
+
|
|
187
|
+
# Look for auto_memory_import content items as evidence of prior
|
|
188
|
+
# import. Count files that would be new on the next import.
|
|
189
|
+
imported_count = store.content_items.where(source: "auto_memory_import").count
|
|
190
|
+
net_new = md_files.size - imported_count
|
|
191
|
+
return [] if net_new <= 0
|
|
192
|
+
|
|
193
|
+
[Finding.new(
|
|
194
|
+
id: "C009",
|
|
195
|
+
severity: :info,
|
|
196
|
+
title: "#{net_new} auto-memory file(s) not yet imported",
|
|
197
|
+
detail: "~/.claude/projects/<slug>/memory/*.md files contain durable knowledge that isn't reachable via memory.recall until imported. AutoMemoryMirror only surfaces them transiently at SessionStart.",
|
|
198
|
+
suggestion: "Preview: claude-memory import-auto-memory --dry-run. Import: claude-memory import-auto-memory.",
|
|
199
|
+
fact_ids: []
|
|
200
|
+
)]
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# C010 — Recurring single-cardinality churn (history shows the same
|
|
204
|
+
# predicate has accumulated many superseded/disputed facts — sign of
|
|
205
|
+
# a persistent contamination source).
|
|
206
|
+
CHURN_THRESHOLD = 5
|
|
207
|
+
|
|
208
|
+
def single_cardinality_churn(manager)
|
|
209
|
+
store = manager.store_if_exists("project")
|
|
210
|
+
return [] unless store
|
|
211
|
+
|
|
212
|
+
SINGLE_CARDINALITY_PREDICATES.flat_map do |predicate|
|
|
213
|
+
non_active = store.facts
|
|
214
|
+
.where(predicate: predicate, status: %w[superseded disputed rejected])
|
|
215
|
+
.count
|
|
216
|
+
next [] if non_active < CHURN_THRESHOLD
|
|
217
|
+
|
|
218
|
+
[Finding.new(
|
|
219
|
+
id: "C010",
|
|
220
|
+
severity: :warn,
|
|
221
|
+
title: "predicate=#{predicate} shows churn: #{non_active} historical non-active facts",
|
|
222
|
+
detail: "Repeated supersession/dispute on a single-cardinality predicate usually means a contamination source (e.g., example text in CLAUDE.md or docs) keeps re-introducing the same hallucination.",
|
|
223
|
+
suggestion: "Find the contamination source: claude-memory recall <bad_value> --scope=project. Wrap the trigger text in <no-memory> tags. See docs/audit_runbook.md.",
|
|
224
|
+
fact_ids: []
|
|
225
|
+
)]
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def normalize_convention(text)
|
|
230
|
+
text.to_s
|
|
231
|
+
.downcase
|
|
232
|
+
.gsub(/\b(?:uses|prefers|always|never)\b/, "")
|
|
233
|
+
.gsub(/[[:punct:]]/, "")
|
|
234
|
+
.gsub(/\s+/, " ")
|
|
235
|
+
.strip
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Audit
|
|
5
|
+
# A single audit finding. Immutable value object emitted by checks
|
|
6
|
+
# (see Audit::Checks) and aggregated by Audit::Runner.
|
|
7
|
+
#
|
|
8
|
+
# Severity levels:
|
|
9
|
+
# - :error — a contract violation; CI/automation should fail
|
|
10
|
+
# - :warn — likely problem requiring attention but not blocking
|
|
11
|
+
# - :info — observation; suggests an optimization or cleanup
|
|
12
|
+
#
|
|
13
|
+
# Each finding embeds the suggested remediation command(s) as plain
|
|
14
|
+
# strings so the audit output is directly actionable. The skill
|
|
15
|
+
# `/audit-memory` reads these and offers to run them for the user.
|
|
16
|
+
Finding = Data.define(:id, :severity, :title, :detail, :suggestion, :fact_ids) do
|
|
17
|
+
def error? = severity == :error
|
|
18
|
+
def warn? = severity == :warn
|
|
19
|
+
def info? = severity == :info
|
|
20
|
+
|
|
21
|
+
def to_h
|
|
22
|
+
{
|
|
23
|
+
id: id,
|
|
24
|
+
severity: severity,
|
|
25
|
+
title: title,
|
|
26
|
+
detail: detail,
|
|
27
|
+
suggestion: suggestion,
|
|
28
|
+
fact_ids: fact_ids
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Audit
|
|
5
|
+
# Orchestrates the audit: opens a StoreManager, runs every check in
|
|
6
|
+
# CHECK_METHODS, collects findings, computes an exit code.
|
|
7
|
+
#
|
|
8
|
+
# The runner itself is read-only. Suggestions in each Finding name
|
|
9
|
+
# the commands a user (or skill) would run to remediate; the audit
|
|
10
|
+
# never writes.
|
|
11
|
+
class Runner
|
|
12
|
+
CHECK_METHODS = %i[
|
|
13
|
+
open_conflicts
|
|
14
|
+
single_cardinality_multiplicity
|
|
15
|
+
single_cardinality_churn
|
|
16
|
+
distillation_backlog
|
|
17
|
+
shortcut_decision_leak
|
|
18
|
+
shortcut_convention_scope
|
|
19
|
+
duplicate_global_conventions
|
|
20
|
+
bare_conclusion_rate
|
|
21
|
+
project_starvation
|
|
22
|
+
auto_memory_unimported
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
Result = Data.define(:findings, :stats) do
|
|
26
|
+
def errors = findings.select(&:error?)
|
|
27
|
+
def warnings = findings.select(&:warn?)
|
|
28
|
+
def info = findings.select(&:info?)
|
|
29
|
+
def ok? = errors.empty?
|
|
30
|
+
def exit_code = ok? ? 0 : 1
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def initialize(manager: nil)
|
|
34
|
+
@manager = manager || Store::StoreManager.new
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def run
|
|
38
|
+
findings = CHECK_METHODS.flat_map { |method| Checks.public_send(method, @manager) }
|
|
39
|
+
Result.new(findings: findings, stats: collect_stats)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def collect_stats
|
|
45
|
+
global = @manager.store_if_exists("global")
|
|
46
|
+
project = @manager.store_if_exists("project")
|
|
47
|
+
{
|
|
48
|
+
checks_run: CHECK_METHODS.size,
|
|
49
|
+
global: store_stats(global),
|
|
50
|
+
project: store_stats(project)
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def store_stats(store)
|
|
55
|
+
return nil unless store
|
|
56
|
+
{
|
|
57
|
+
active_facts: store.facts.where(status: "active").count,
|
|
58
|
+
predicate_counts: predicate_distribution(store)
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def predicate_distribution(store)
|
|
63
|
+
store.facts
|
|
64
|
+
.where(status: "active")
|
|
65
|
+
.group_and_count(:predicate)
|
|
66
|
+
.all
|
|
67
|
+
.map { |row| [row[:predicate], row[:count]] }
|
|
68
|
+
.sort_by { |_, c| -c }
|
|
69
|
+
.to_h
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "optparse"
|
|
5
|
+
|
|
6
|
+
module ClaudeMemory
|
|
7
|
+
module Commands
|
|
8
|
+
# Runs the memory health audit and prints findings. Exits non-zero
|
|
9
|
+
# when error-severity findings are present (unless --no-exit is
|
|
10
|
+
# given). JSON output is the stable surface — humans should not
|
|
11
|
+
# script against the text output.
|
|
12
|
+
class AuditCommand < BaseCommand
|
|
13
|
+
SEVERITY_RANK = {info: 0, warn: 1, error: 2}.freeze
|
|
14
|
+
|
|
15
|
+
def call(args)
|
|
16
|
+
opts = parse_opts(args)
|
|
17
|
+
return 1 if opts.nil?
|
|
18
|
+
|
|
19
|
+
manager = Store::StoreManager.new
|
|
20
|
+
result = Audit::Runner.new(manager: manager).run
|
|
21
|
+
filtered = filter_by_severity(result.findings, opts[:severity])
|
|
22
|
+
|
|
23
|
+
if opts[:json]
|
|
24
|
+
stdout.puts JSON.pretty_generate(payload(result, filtered))
|
|
25
|
+
else
|
|
26
|
+
render_text(result, filtered)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
manager.close
|
|
30
|
+
opts[:no_exit] ? 0 : result.exit_code
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def filter_by_severity(findings, threshold)
|
|
34
|
+
return findings if threshold.nil?
|
|
35
|
+
floor = SEVERITY_RANK.fetch(threshold) { return findings }
|
|
36
|
+
findings.select { |f| SEVERITY_RANK[f.severity] >= floor }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def parse_opts(args)
|
|
42
|
+
options = {json: false, no_exit: false, severity: nil}
|
|
43
|
+
parser = OptionParser.new do |o|
|
|
44
|
+
o.banner = "Usage: claude-memory audit [--json] [--no-exit] [--severity=error|warn|info]"
|
|
45
|
+
o.on("--json", "Emit JSON instead of text") { options[:json] = true }
|
|
46
|
+
o.on("--no-exit", "Always exit 0 even on error-severity findings") { options[:no_exit] = true }
|
|
47
|
+
o.on("--severity LEVEL", "Only show findings at or above LEVEL (error|warn|info)") { |v| options[:severity] = v.to_sym }
|
|
48
|
+
end
|
|
49
|
+
parser.parse!(args.dup)
|
|
50
|
+
options
|
|
51
|
+
rescue OptionParser::InvalidOption => e
|
|
52
|
+
stderr.puts e.message
|
|
53
|
+
nil
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def payload(result, filtered)
|
|
57
|
+
{
|
|
58
|
+
ok: result.ok?,
|
|
59
|
+
checks_run: result.stats[:checks_run],
|
|
60
|
+
counts: {
|
|
61
|
+
error: result.errors.size,
|
|
62
|
+
warn: result.warnings.size,
|
|
63
|
+
info: result.info.size
|
|
64
|
+
},
|
|
65
|
+
stats: result.stats.except(:checks_run),
|
|
66
|
+
findings: filtered.map(&:to_h)
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def render_text(result, filtered)
|
|
71
|
+
stdout.puts "Memory health audit — #{Time.now.utc.iso8601}"
|
|
72
|
+
stdout.puts("=" * 60)
|
|
73
|
+
render_stats(result.stats)
|
|
74
|
+
stdout.puts ""
|
|
75
|
+
render_summary(result)
|
|
76
|
+
stdout.puts ""
|
|
77
|
+
render_findings(filtered)
|
|
78
|
+
stdout.puts ""
|
|
79
|
+
stdout.puts(result.ok? ? "OK" : "FAIL")
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def render_stats(stats)
|
|
83
|
+
%i[global project].each do |scope|
|
|
84
|
+
s = stats[scope]
|
|
85
|
+
next unless s
|
|
86
|
+
preds = s[:predicate_counts].map { |k, v| "#{k}=#{v}" }.join(", ")
|
|
87
|
+
stdout.puts "#{scope.to_s.capitalize.ljust(7)} #{s[:active_facts]} active facts #{preds}"
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def render_summary(result)
|
|
92
|
+
stdout.puts "Checks run: #{result.stats[:checks_run]}"
|
|
93
|
+
stdout.puts "Errors: #{result.errors.size} Warnings: #{result.warnings.size} Info: #{result.info.size}"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def render_findings(findings)
|
|
97
|
+
if findings.empty?
|
|
98
|
+
stdout.puts "No findings."
|
|
99
|
+
return
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
findings.each do |f|
|
|
103
|
+
marker = case f.severity
|
|
104
|
+
when :error then "[ERROR]"
|
|
105
|
+
when :warn then "[WARN]"
|
|
106
|
+
when :info then "[INFO]"
|
|
107
|
+
end
|
|
108
|
+
stdout.puts "#{marker} #{f.id} #{f.title}"
|
|
109
|
+
stdout.puts " #{f.detail}"
|
|
110
|
+
stdout.puts " → #{f.suggestion}"
|
|
111
|
+
stdout.puts " fact_ids: #{f.fact_ids.first(20).inspect}#{" (+#{f.fact_ids.size - 20} more)" if f.fact_ids.size > 20}" if f.fact_ids.any?
|
|
112
|
+
stdout.puts ""
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|