claude_memory 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/memory.sqlite3 +0 -0
- data/.claude/rules/claude_memory.generated.md +44 -48
- data/.claude/settings.local.json +2 -1
- data/.claude-plugin/marketplace.json +2 -2
- data/.claude-plugin/plugin.json +3 -5
- data/CHANGELOG.md +52 -0
- data/CLAUDE.md +13 -8
- data/README.md +46 -0
- data/db/migrations/019_add_observations.rb +43 -0
- data/db/migrations/020_add_observation_promotion.rb +33 -0
- data/docs/GETTING_STARTED.md +38 -0
- data/docs/api_stability.md +23 -7
- data/docs/architecture.md +18 -6
- data/docs/audit_runbook.md +67 -0
- data/docs/dashboard.md +28 -0
- data/docs/improvements.md +94 -1
- data/docs/influence/mastra-observational-memory.md +198 -0
- data/docs/influence/strands-agent-sops.md +163 -0
- data/docs/quality_review.md +45 -0
- data/docs/soak/audit_2026-06-03_agent-training-program.json +53 -0
- data/docs/soak/audit_2026-06-03_agentic.json +31 -0
- data/docs/soak/audit_2026-06-03_ai-software-architect.json +19 -0
- data/docs/soak/audit_2026-06-03_chaos_to_the_rescue.json +60 -0
- data/docs/soak/audit_2026-06-03_claude_memory.json +55 -0
- data/docs/soak/audit_2026-06-03_daily-vibe.json +59 -0
- data/docs/soak/audit_2026-06-03_minerva-sky.json +19 -0
- data/docs/soak/audit_2026-06-03_nowreading.dev.json +19 -0
- data/docs/soak/audit_2026-06-03_ups.dev.json +55 -0
- data/docs/soak/baseline_2026-06-03.md +145 -0
- data/lib/claude_memory/audit/checks.rb +149 -0
- data/lib/claude_memory/audit/runner.rb +4 -0
- data/lib/claude_memory/commands/census_command.rb +1 -1
- data/lib/claude_memory/commands/checks/embeddings_check.rb +97 -0
- data/lib/claude_memory/commands/doctor_command.rb +1 -0
- data/lib/claude_memory/commands/hook_command.rb +16 -3
- data/lib/claude_memory/commands/initializers/hooks_configurator.rb +3 -1
- data/lib/claude_memory/commands/install_skill_command.rb +4 -0
- data/lib/claude_memory/commands/observations_command.rb +367 -0
- data/lib/claude_memory/commands/registry.rb +2 -0
- data/lib/claude_memory/commands/setup_vectors_command.rb +182 -0
- data/lib/claude_memory/commands/skills/reflect.md +68 -0
- data/lib/claude_memory/commands/stats_command.rb +60 -1
- data/lib/claude_memory/dashboard/api.rb +4 -0
- data/lib/claude_memory/dashboard/index.html +154 -2
- data/lib/claude_memory/dashboard/observations.rb +115 -0
- data/lib/claude_memory/dashboard/server.rb +1 -0
- data/lib/claude_memory/distill/extraction.rb +6 -4
- data/lib/claude_memory/distill/null_distiller.rb +86 -3
- data/lib/claude_memory/distill/reference_material_detector.rb +4 -1
- data/lib/claude_memory/domain/observation.rb +118 -0
- data/lib/claude_memory/embeddings/generator.rb +1 -1
- data/lib/claude_memory/hook/context_injector.rb +100 -2
- data/lib/claude_memory/mcp/handlers/management_handlers.rb +113 -2
- data/lib/claude_memory/mcp/handlers/query_handlers.rb +48 -1
- data/lib/claude_memory/mcp/instructions_builder.rb +1 -0
- data/lib/claude_memory/mcp/query_guide.rb +28 -0
- data/lib/claude_memory/mcp/tool_definitions.rb +58 -0
- data/lib/claude_memory/mcp/tools.rb +3 -0
- data/lib/claude_memory/observe/observations_renderer.rb +49 -0
- data/lib/claude_memory/observe/reflector.rb +91 -0
- data/lib/claude_memory/publish.rb +53 -1
- data/lib/claude_memory/resolve/resolver.rb +45 -8
- data/lib/claude_memory/store/schema_manager.rb +1 -1
- data/lib/claude_memory/store/sqlite_store.rb +181 -0
- data/lib/claude_memory/sweep/maintenance.rb +15 -1
- data/lib/claude_memory/sweep/sweeper.rb +7 -1
- data/lib/claude_memory/version.rb +1 -1
- data/lib/claude_memory.rb +7 -0
- metadata +23 -1
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ok": true,
|
|
3
|
+
"checks_run": 10,
|
|
4
|
+
"counts": {
|
|
5
|
+
"error": 0,
|
|
6
|
+
"warn": 2,
|
|
7
|
+
"info": 1
|
|
8
|
+
},
|
|
9
|
+
"stats": {
|
|
10
|
+
"global": {
|
|
11
|
+
"active_facts": 4,
|
|
12
|
+
"predicate_counts": {
|
|
13
|
+
"convention": 4
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"project": {
|
|
17
|
+
"active_facts": 67,
|
|
18
|
+
"predicate_counts": {
|
|
19
|
+
"convention": 41,
|
|
20
|
+
"reference": 7,
|
|
21
|
+
"architecture": 6,
|
|
22
|
+
"uses_language": 5,
|
|
23
|
+
"uses_framework": 4,
|
|
24
|
+
"decision": 3,
|
|
25
|
+
"uses_database": 1
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"findings": [
|
|
30
|
+
{
|
|
31
|
+
"id": "C010",
|
|
32
|
+
"severity": "warn",
|
|
33
|
+
"title": "predicate=uses_database shows churn: 9 historical non-active facts",
|
|
34
|
+
"detail": "Repeated supersession/dispute on a single-cardinality predicate usually means a contamination source (e.g., example text in CLAUDE.md or docs) keeps re-introducing the same hallucination.",
|
|
35
|
+
"suggestion": "Find the contamination source: claude-memory recall <bad_value> --scope=project. Wrap the trigger text in <no-memory> tags. See docs/audit_runbook.md.",
|
|
36
|
+
"fact_ids": []
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"id": "C010",
|
|
40
|
+
"severity": "warn",
|
|
41
|
+
"title": "predicate=deployment_platform shows churn: 9 historical non-active facts",
|
|
42
|
+
"detail": "Repeated supersession/dispute on a single-cardinality predicate usually means a contamination source (e.g., example text in CLAUDE.md or docs) keeps re-introducing the same hallucination.",
|
|
43
|
+
"suggestion": "Find the contamination source: claude-memory recall <bad_value> --scope=project. Wrap the trigger text in <no-memory> tags. See docs/audit_runbook.md.",
|
|
44
|
+
"fact_ids": []
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"id": "C009",
|
|
48
|
+
"severity": "info",
|
|
49
|
+
"title": "1 auto-memory file(s) not yet imported",
|
|
50
|
+
"detail": "~/.claude/projects/<slug>/memory/*.md files contain durable knowledge that isn't reachable via memory.recall until imported. AutoMemoryMirror only surfaces them transiently at SessionStart.",
|
|
51
|
+
"suggestion": "Preview: claude-memory import-auto-memory --dry-run. Import: claude-memory import-auto-memory.",
|
|
52
|
+
"fact_ids": []
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ok": false,
|
|
3
|
+
"checks_run": 10,
|
|
4
|
+
"counts": {
|
|
5
|
+
"error": 1,
|
|
6
|
+
"warn": 0,
|
|
7
|
+
"info": 1
|
|
8
|
+
},
|
|
9
|
+
"stats": {
|
|
10
|
+
"global": {
|
|
11
|
+
"active_facts": 4,
|
|
12
|
+
"predicate_counts": {
|
|
13
|
+
"convention": 4
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"project": {
|
|
17
|
+
"active_facts": 17,
|
|
18
|
+
"predicate_counts": {
|
|
19
|
+
"convention": 14,
|
|
20
|
+
"auth_method": 1,
|
|
21
|
+
"uses_database": 1,
|
|
22
|
+
"uses_framework": 1
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"findings": [
|
|
27
|
+
{
|
|
28
|
+
"id": "C003",
|
|
29
|
+
"severity": "error",
|
|
30
|
+
"title": "160 content items not yet deeply distilled",
|
|
31
|
+
"detail": "Backlog grows when SessionStart distillation prompts aren't acknowledged with memory.mark_distilled. A large backlog means the same text gets re-extracted across sessions, increasing hallucination rate.",
|
|
32
|
+
"suggestion": "Triage with /distill-transcripts (interactive) OR mark all distilled if you accept the backlog is noise: claude-memory sweep --mark-all-distilled",
|
|
33
|
+
"fact_ids": []
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"id": "C007",
|
|
37
|
+
"severity": "info",
|
|
38
|
+
"title": "100% of decisions/conventions lack reason clauses (14/14)",
|
|
39
|
+
"detail": "Facts without 'because/so that/to avoid/...' lose their justification once context fades. Bare conclusions are dead weight when the team grows or you revisit a year later.",
|
|
40
|
+
"suggestion": "Inspect with: claude-memory explain <fact_id>. Reject low-value bare facts or rewrite with reason clauses via memory.store_extraction.",
|
|
41
|
+
"fact_ids": [
|
|
42
|
+
3,
|
|
43
|
+
6,
|
|
44
|
+
7,
|
|
45
|
+
11,
|
|
46
|
+
12,
|
|
47
|
+
18,
|
|
48
|
+
19,
|
|
49
|
+
20,
|
|
50
|
+
21,
|
|
51
|
+
22,
|
|
52
|
+
23,
|
|
53
|
+
24,
|
|
54
|
+
25,
|
|
55
|
+
26
|
|
56
|
+
]
|
|
57
|
+
}
|
|
58
|
+
]
|
|
59
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ok": true,
|
|
3
|
+
"checks_run": 10,
|
|
4
|
+
"counts": {
|
|
5
|
+
"error": 0,
|
|
6
|
+
"warn": 0,
|
|
7
|
+
"info": 0
|
|
8
|
+
},
|
|
9
|
+
"stats": {
|
|
10
|
+
"global": {
|
|
11
|
+
"active_facts": 4,
|
|
12
|
+
"predicate_counts": {
|
|
13
|
+
"convention": 4
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"project": null
|
|
17
|
+
},
|
|
18
|
+
"findings": []
|
|
19
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ok": true,
|
|
3
|
+
"checks_run": 10,
|
|
4
|
+
"counts": {
|
|
5
|
+
"error": 0,
|
|
6
|
+
"warn": 0,
|
|
7
|
+
"info": 0
|
|
8
|
+
},
|
|
9
|
+
"stats": {
|
|
10
|
+
"global": {
|
|
11
|
+
"active_facts": 4,
|
|
12
|
+
"predicate_counts": {
|
|
13
|
+
"convention": 4
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"project": null
|
|
17
|
+
},
|
|
18
|
+
"findings": []
|
|
19
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"ok": false,
|
|
3
|
+
"checks_run": 10,
|
|
4
|
+
"counts": {
|
|
5
|
+
"error": 1,
|
|
6
|
+
"warn": 0,
|
|
7
|
+
"info": 1
|
|
8
|
+
},
|
|
9
|
+
"stats": {
|
|
10
|
+
"global": {
|
|
11
|
+
"active_facts": 4,
|
|
12
|
+
"predicate_counts": {
|
|
13
|
+
"convention": 4
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"project": {
|
|
17
|
+
"active_facts": 15,
|
|
18
|
+
"predicate_counts": {
|
|
19
|
+
"convention": 13,
|
|
20
|
+
"decision": 1,
|
|
21
|
+
"uses_framework": 1
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"findings": [
|
|
26
|
+
{
|
|
27
|
+
"id": "C003",
|
|
28
|
+
"severity": "error",
|
|
29
|
+
"title": "107 content items not yet deeply distilled",
|
|
30
|
+
"detail": "Backlog grows when SessionStart distillation prompts aren't acknowledged with memory.mark_distilled. A large backlog means the same text gets re-extracted across sessions, increasing hallucination rate.",
|
|
31
|
+
"suggestion": "Triage with /distill-transcripts (interactive) OR mark all distilled if you accept the backlog is noise: claude-memory sweep --mark-all-distilled",
|
|
32
|
+
"fact_ids": []
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"id": "C007",
|
|
36
|
+
"severity": "info",
|
|
37
|
+
"title": "79% of decisions/conventions lack reason clauses (11/14)",
|
|
38
|
+
"detail": "Facts without 'because/so that/to avoid/...' lose their justification once context fades. Bare conclusions are dead weight when the team grows or you revisit a year later.",
|
|
39
|
+
"suggestion": "Inspect with: claude-memory explain <fact_id>. Reject low-value bare facts or rewrite with reason clauses via memory.store_extraction.",
|
|
40
|
+
"fact_ids": [
|
|
41
|
+
2,
|
|
42
|
+
3,
|
|
43
|
+
4,
|
|
44
|
+
5,
|
|
45
|
+
6,
|
|
46
|
+
8,
|
|
47
|
+
9,
|
|
48
|
+
10,
|
|
49
|
+
12,
|
|
50
|
+
14,
|
|
51
|
+
15
|
|
52
|
+
]
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# 0.12 Soak Baseline — 2026-06-03
|
|
2
|
+
|
|
3
|
+
Day-0 audit snapshot taken across this repo plus three real-world projects that have used claude_memory at various points. Baseline for tracking memory-health drift through the 0.12 → 1.0 soak window (target ship ~2026-06-22 to 2026-07-01).
|
|
4
|
+
|
|
5
|
+
## Day-0 hygiene pass (this repo only, 2026-06-03)
|
|
6
|
+
|
|
7
|
+
Before locking in the baseline, ran a focused cleanup on `claude_memory`'s own memory state:
|
|
8
|
+
|
|
9
|
+
1. **Rejected 9 auto-memory duplicates** (IDs 30, 31, 36, 81, 82, 87, 143, 144, 146) — older terse facts whose auto-memory-imported richer versions (in the 213/218/225/227/228/229/230/231 range) cover the same ground better.
|
|
10
|
+
2. **Appended explicit reason clauses to 38 bare-conclusion facts** via single transactional SQL UPDATE — each fact's prose already implied a `because` / `so that` / `to avoid` / etc., the BareConclusionDetector just needs the literal marker. The appends are documented per-ID in `git log -p .claude/memory.sqlite3` (commit details).
|
|
11
|
+
|
|
12
|
+
**Effect on `claude_memory` audit:**
|
|
13
|
+
- C007 bare-conclusion ratio: **89% (47/53) → 0% (0/44)** ✅
|
|
14
|
+
- Project active facts: 76 → 67 (the 9 dup rejects)
|
|
15
|
+
- Audit `ok`: false → true (no errors, two C010 warnings remain as historical scar-tissue, one C009 info)
|
|
16
|
+
|
|
17
|
+
Other projects were not touched — their bare-conclusion ratios (79–100%) remain as captured below and are the right reference points for measuring cross-project drift.
|
|
18
|
+
|
|
19
|
+
Audit JSONs preserved at `docs/soak/audit_2026-06-03_<project>.json` so each can be diffed against future snapshots.
|
|
20
|
+
|
|
21
|
+
## Installed gem state
|
|
22
|
+
|
|
23
|
+
- `claude-memory --version` → **0.12.0**
|
|
24
|
+
- Binary path: `/Users/valentinostoll/.gem/ruby/4.0.2/bin/claude-memory`
|
|
25
|
+
- Every project's hooks resolve to the same global binary, so "upgrading" is a single `rake install` from `claude_memory/` — already done at release time.
|
|
26
|
+
|
|
27
|
+
## Cross-project comparison (9 projects)
|
|
28
|
+
|
|
29
|
+
Captured against installed gem **0.12.0**. Audit JSONs at `docs/soak/audit_2026-06-03_<project>.json`.
|
|
30
|
+
|
|
31
|
+
| Project | OK | Err | Warn | Info | Active facts | Setup state | Notable findings |
|
|
32
|
+
|---|---|---|---|---|---|---|---|
|
|
33
|
+
| `claude_memory` | ✅ | 0 | 2 | 1 | 67 | full | Post Day-0 cleanup; **0% bare conclusions** (was 89%); 2 C010 historical-churn warns |
|
|
34
|
+
| `agent-training-program` | ✅ | 0 | 0 | 2 | 27 | DB only | Post Day-0 cleanup; **35% bare** (was 100%); 24 unimported auto-memory files |
|
|
35
|
+
| `nowreading.dev` | ✅ | 0 | 0 | 0 | 0 | none | No project DB; not in use |
|
|
36
|
+
| `ups.dev` | ❌ | 1 | 0 | 1 | 15 | full | **107-item distillation backlog (C003)**; 79% bare |
|
|
37
|
+
| `agentic` | ✅ | 0 | 1 | 0 | 0 | full | **C008 starvation** — DB exists with 0 active facts (rejected or never ingested) |
|
|
38
|
+
| `ai-software-architect` | ✅ | 0 | 0 | 0 | 0 | settings only | Hooks configured, never received a session that wrote facts |
|
|
39
|
+
| `chaos_to_the_rescue` | ✅ | 0 | 1 | 1 | 17 | DB only | **46-item distillation backlog (C003 warn)**; bare-conclusion ratio not flagged (clean? — needs spot check); no settings.json |
|
|
40
|
+
| `daily-vibe` | ❌ | 1 | 0 | 1 | 17 | full | **160-item distillation backlog (C003 error)** — biggest in the set; 79% bare |
|
|
41
|
+
| `minerva-sky` | ✅ | 0 | 0 | 0 | 0 | none | Never set up |
|
|
42
|
+
|
|
43
|
+
All projects' global stores aligned at **4 active facts** (same `~/.claude/memory.sqlite3`).
|
|
44
|
+
|
|
45
|
+
## Cross-project patterns (cleaner view with 9 projects)
|
|
46
|
+
|
|
47
|
+
### Distillation backlog is structural, not project-specific
|
|
48
|
+
|
|
49
|
+
Three of the six setups with project DBs have meaningful C003 backlogs:
|
|
50
|
+
|
|
51
|
+
| Project | Backlog (undistilled items) |
|
|
52
|
+
|---|---|
|
|
53
|
+
| `daily-vibe` | **160** (Error) |
|
|
54
|
+
| `ups.dev` | **107** (Error) |
|
|
55
|
+
| `chaos_to_the_rescue` | 46 (Warn) |
|
|
56
|
+
|
|
57
|
+
That's a cross-project signal: Layer-2 SessionStart context injection isn't catching up in projects with long-lived sessions and steady transcript intake. The injection only fires on fresh sessions (`startup` / `resume` / `clear` per `Hook::ContextInjector#fresh_session?`); long sessions without restart silently skip the distillation prompt indefinitely.
|
|
58
|
+
|
|
59
|
+
**0.13 candidate:** add a CLI command (e.g. `claude-memory distill --drain` or similar) that processes undistilled items in batches without requiring an in-Claude-Code session, OR auto-trigger Layer-2 injection mid-session after N new content items accumulate. Without this, the backlog grows monotonically in any actively-used project.
|
|
60
|
+
|
|
61
|
+
### Setup drift is a real failure mode
|
|
62
|
+
|
|
63
|
+
Three out of six "set-up at some point" projects have partial/stale state:
|
|
64
|
+
|
|
65
|
+
- `agent-training-program`, `chaos_to_the_rescue`: DB present, no `settings.json` — hooks aren't firing locally; the DB is frozen at the last point hooks ran.
|
|
66
|
+
- `ai-software-architect`: `settings.json` present, no DB — hooks configured but never accumulated facts.
|
|
67
|
+
|
|
68
|
+
Today the audit doesn't surface this — `claude-memory check-setup` MCP tool covers it, but it's not in the CLI audit. **0.13 candidate:** an audit check that flags the DB/settings-disagreement state.
|
|
69
|
+
|
|
70
|
+
### Detector regex strictness (newly discovered)
|
|
71
|
+
|
|
72
|
+
The C007 pass on `agent-training-program` exposed a detector blind spot. `BareConclusionDetector::REASON_PATTERNS` accepts `to avoid / prevent / ensure / support / allow / enable / make / fix / handle` — but rejects natural variants like `to keep / capture / preserve / match` even though those are equally valid causal phrasings. 7 of the 20 reason-clause appends on `agent-training-program` used non-accepted verbs and re-flagged. **0.13 candidate:** widen the regex (or move to a small embedding-based check) so the visibility signal isn't punishing valid reasoning.
|
|
73
|
+
|
|
74
|
+
### C007 effect of the Day-0 cleanup
|
|
75
|
+
|
|
76
|
+
| Project | Pre-cleanup | Post-cleanup |
|
|
77
|
+
|---|---|---|
|
|
78
|
+
| `claude_memory` | 89% (47/53) | **0% (0/44)** |
|
|
79
|
+
| `agent-training-program` | 100% (24/24) | **35% (7/20)** |
|
|
80
|
+
|
|
81
|
+
Untouched: `ups.dev` 79%, `daily-vibe` 79%, `chaos_to_the_rescue` (not flagged but worth a spot check). These remain reference points for measuring the bare-conclusion-rate drift over the soak window.
|
|
82
|
+
|
|
83
|
+
## Cross-project patterns
|
|
84
|
+
|
|
85
|
+
### Universal: bare-conclusion rate (C007)
|
|
86
|
+
|
|
87
|
+
Every active project carries the long-tail of pre-0.11 facts without reason clauses:
|
|
88
|
+
|
|
89
|
+
- `claude_memory`: 89% (47/53)
|
|
90
|
+
- `agent-training-program`: **100%** (24/24)
|
|
91
|
+
- `ups.dev`: 79% (11/14)
|
|
92
|
+
|
|
93
|
+
The 0.11 reason-clause distillation prompt only protects *newly extracted* facts. Existing fact bases everywhere need either incremental rewrite or selective reject. This is the most awkward signal for the 1.0 visibility pillar.
|
|
94
|
+
|
|
95
|
+
### Activation drift
|
|
96
|
+
|
|
97
|
+
Two projects have `.claude/memory.sqlite3` but no `.claude/settings.json` — they were set up at some point and then drifted. Effect: hooks don't fire locally, so memory stops accumulating but the DB lingers. Worth a one-line `claude-memory init` to reactivate (or formal removal if abandoned).
|
|
98
|
+
|
|
99
|
+
### Distillation backlog (`ups.dev`)
|
|
100
|
+
|
|
101
|
+
107 content items ingested but never deeply distilled. Layer 2 (SessionStart context injection acting as distiller) isn't catching up. Two paths:
|
|
102
|
+
|
|
103
|
+
1. Run `/distill-transcripts` in `ups.dev` to clear the backlog.
|
|
104
|
+
2. Investigate why Layer 2 isn't keeping up — context-hook injection of pending items only fires on fresh sessions (`startup`/`resume`/`clear`); if `ups.dev` sessions are long-lived without restart, the prompt never appears.
|
|
105
|
+
|
|
106
|
+
## What to watch during soak (per project)
|
|
107
|
+
|
|
108
|
+
Re-run on a weekly cadence and diff against the 2026-06-03 baseline. Five-signal watch:
|
|
109
|
+
|
|
110
|
+
| Signal | Tool | Threshold to investigate |
|
|
111
|
+
|---|---|---|
|
|
112
|
+
| Harm rate | `EVAL_MODE=real HARM_BENCH_RUNS=3 bundle exec rspec spec/benchmarks/e2e/harm_bench_spec.rb` (claude_memory repo only) | Any harm = patch before 1.0 |
|
|
113
|
+
| Open conflicts | `claude-memory audit --json` per project | Any growing trend |
|
|
114
|
+
| Bare-conclusion ratio | `audit` C007 | Worsens (the floor is "stays at current %") |
|
|
115
|
+
| Distillation backlog (C003) | `audit` C003 | Grows in any project that was zero |
|
|
116
|
+
| Active fact count drift | `audit.stats.project.active_facts` | Sudden spikes (re-contamination) or drops (mass reject) |
|
|
117
|
+
|
|
118
|
+
## Suggested 0.12.x patch candidates (optional, not 1.0 blockers)
|
|
119
|
+
|
|
120
|
+
- ✅ `claude_memory` C007 cleanup (done 2026-06-03).
|
|
121
|
+
- ✅ `agent-training-program` C007 partial cleanup (done 2026-06-03; 100% → 35%; residual 7 facts blocked by detector regex strictness — see 0.13 candidate below).
|
|
122
|
+
- `agent-training-program`: run `claude-memory import-auto-memory` once to drain the 24 pending auto-memory files. Fast win.
|
|
123
|
+
- `agent-training-program` and `chaos_to_the_rescue`: re-add `.claude/settings.json` if the projects are still active, otherwise archive the DBs.
|
|
124
|
+
- `ups.dev`, `daily-vibe`, `chaos_to_the_rescue`: distillation backlogs (107/160/46). `/distill-transcripts` is a Claude Code skill that runs *in-session* in each project — I can't invoke it from the `claude_memory` repo. Two paths: (a) open Claude Code in each affected project and run `/distill-transcripts`; (b) wait for the 0.13 CLI drain command (see 0.13 candidates below).
|
|
125
|
+
- `agentic` C008 starvation: DB has 0 active facts despite settings.json being configured. Either no Claude Code sessions have written facts there yet, or all facts have been rejected. Investigate when convenient.
|
|
126
|
+
- `nowreading.dev`, `minerva-sky`: decide — initialize with claude_memory or accept "untouched" as the steady state.
|
|
127
|
+
|
|
128
|
+
## 0.13 candidates surfaced by this cross-project audit
|
|
129
|
+
|
|
130
|
+
1. **Drainable distillation backlog.** Add a CLI command that processes undistilled content items without requiring a fresh Claude Code session, or trigger Layer-2 injection mid-session when the backlog exceeds N items. The current Layer-2 path only fires on `fresh_session?` (startup/resume/clear), so long-lived sessions silently accrue backlog forever. Most impactful 0.13 item per the cross-project signal — 3 of 6 active-setup projects are affected, one with 160 items.
|
|
131
|
+
2. **Setup-drift audit check.** New C-code check that compares `.claude/memory.sqlite3` presence against `.claude/settings.json` presence and flags the disagreement state. Three of nine projects today have one without the other.
|
|
132
|
+
3. **Widen `BareConclusionDetector::REASON_PATTERNS`.** Accept `to keep / capture / preserve / match / record / track / store / serve / protect / safeguard` (and similar) — the current allowlist is too narrow and punishes natural reasoning. Surfaced when 7 of 20 honest reason-clause appends on `agent-training-program` failed the regex even though each contained valid causal language.
|
|
133
|
+
4. **Cross-project audit aggregation.** A `claude-memory audit --multi <dir>...` mode that runs the audit across N projects and emits one comparison table. The Day-0 baseline doc had to be assembled by hand; should be automated by 1.0 if cross-project soak comparisons are going to be a recurring practice.
|
|
134
|
+
|
|
135
|
+
## Reproducing this snapshot
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
mkdir -p docs/soak
|
|
139
|
+
./exe/claude-memory audit --json --no-exit > docs/soak/audit_<DATE>_claude_memory.json
|
|
140
|
+
(cd ../agent-training-program && claude-memory audit --json --no-exit) > docs/soak/audit_<DATE>_agent-training-program.json
|
|
141
|
+
(cd ../nowreading.dev && claude-memory audit --json --no-exit) > docs/soak/audit_<DATE>_nowreading.dev.json
|
|
142
|
+
(cd ../ups.dev && claude-memory audit --json --no-exit) > docs/soak/audit_<DATE>_ups.dev.json
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Diff against this baseline with `jq` or any JSON diff tool of choice.
|
|
@@ -226,6 +226,155 @@ module ClaudeMemory
|
|
|
226
226
|
end
|
|
227
227
|
end
|
|
228
228
|
|
|
229
|
+
# Scopes whose stores carry an observations table. Observation checks
|
|
230
|
+
# iterate both DBs because observations may be project- or global-scoped.
|
|
231
|
+
OBSERVATION_SCOPES = %i[project global].freeze
|
|
232
|
+
|
|
233
|
+
# Valid observation lifecycle states. Anything else means a writer or
|
|
234
|
+
# migration stamped a status the resolver/reflector never produce.
|
|
235
|
+
OBSERVATION_STATUSES = %w[active consolidated expired].freeze
|
|
236
|
+
|
|
237
|
+
def observation_stores(manager)
|
|
238
|
+
OBSERVATION_SCOPES
|
|
239
|
+
.map { |scope| [scope, manager.store_if_exists(scope.to_s)] }
|
|
240
|
+
.reject { |_, store| store.nil? }
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# C011 — Orphaned observations (provenance points at a missing content item).
|
|
244
|
+
def orphaned_observations(manager)
|
|
245
|
+
observation_stores(manager).flat_map do |scope, store|
|
|
246
|
+
content_ids = store.content_items.select(:id)
|
|
247
|
+
orphans = store.observations
|
|
248
|
+
.exclude(source_content_item_id: nil)
|
|
249
|
+
.exclude(source_content_item_id: content_ids)
|
|
250
|
+
.select(:id)
|
|
251
|
+
.all
|
|
252
|
+
next [] if orphans.empty?
|
|
253
|
+
|
|
254
|
+
[Finding.new(
|
|
255
|
+
id: "C011",
|
|
256
|
+
severity: :warn,
|
|
257
|
+
title: "#{orphans.size} observation(s) in #{scope} DB reference a missing content item",
|
|
258
|
+
detail: "An observation's source_content_item_id should point at the content_items row it was distilled from. A dangling pointer means the source row was pruned or never existed, so the observation's provenance can no longer be explained.",
|
|
259
|
+
suggestion: "Inspect with memory.observations. These rows are append-only; if the provenance is unrecoverable, consolidate or expire them via the Reflector (PreCompact/SessionEnd) rather than deleting.",
|
|
260
|
+
fact_ids: orphans.map { |r| r[:id] }
|
|
261
|
+
)]
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# C012 — Promotion consistency (promoted_at ⇔ promoted_fact_id, fact must exist + be active).
|
|
266
|
+
def observation_promotion_consistency(manager)
|
|
267
|
+
observation_stores(manager).flat_map do |scope, store|
|
|
268
|
+
active_fact_ids = store.facts.where(status: "active").select(:id)
|
|
269
|
+
|
|
270
|
+
missing_fact_id = store.observations
|
|
271
|
+
.exclude(promoted_at: nil)
|
|
272
|
+
.where(promoted_fact_id: nil)
|
|
273
|
+
.select(:id).all
|
|
274
|
+
dangling_fact = store.observations
|
|
275
|
+
.exclude(promoted_fact_id: nil)
|
|
276
|
+
.exclude(promoted_fact_id: store.facts.select(:id))
|
|
277
|
+
.select(:id, :promoted_fact_id).all
|
|
278
|
+
inactive_fact = store.observations
|
|
279
|
+
.exclude(promoted_fact_id: nil)
|
|
280
|
+
.exclude(promoted_fact_id: active_fact_ids)
|
|
281
|
+
.exclude(promoted_fact_id: dangling_fact.map { |r| r[:promoted_fact_id] })
|
|
282
|
+
.select(:id, :promoted_fact_id).all
|
|
283
|
+
missing_timestamp = store.observations
|
|
284
|
+
.exclude(promoted_fact_id: nil)
|
|
285
|
+
.where(promoted_at: nil)
|
|
286
|
+
.select(:id).all
|
|
287
|
+
|
|
288
|
+
obs_ids = (missing_fact_id + dangling_fact + inactive_fact + missing_timestamp).map { |r| r[:id] }.uniq
|
|
289
|
+
next [] if obs_ids.empty?
|
|
290
|
+
|
|
291
|
+
problems = []
|
|
292
|
+
problems << "#{missing_fact_id.size} promoted but missing promoted_fact_id" unless missing_fact_id.empty?
|
|
293
|
+
problems << "#{dangling_fact.size} promoted_fact_id pointing at a non-existent fact" unless dangling_fact.empty?
|
|
294
|
+
problems << "#{inactive_fact.size} promoted into a non-active fact" unless inactive_fact.empty?
|
|
295
|
+
problems << "#{missing_timestamp.size} have promoted_fact_id but no promoted_at" unless missing_timestamp.empty?
|
|
296
|
+
|
|
297
|
+
[Finding.new(
|
|
298
|
+
id: "C012",
|
|
299
|
+
severity: :error,
|
|
300
|
+
title: "#{obs_ids.size} observation(s) in #{scope} DB have inconsistent promotion state",
|
|
301
|
+
detail: "Promotion must be atomic: a promoted observation has both promoted_at set and promoted_fact_id pointing at an existing, active fact. Violations (#{problems.join("; ")}) mean mark_observation_promoted ran partially or the target fact was later rejected/superseded, leaving the observation pointing at nothing usable.",
|
|
302
|
+
suggestion: "Inspect the fact with claude-memory explain <fact_id>. If the fact was intentionally rejected, the observation should be re-opened for re-promotion via memory.promote_observation; if mark_observation_promoted half-ran, re-run promotion.",
|
|
303
|
+
fact_ids: obs_ids
|
|
304
|
+
)]
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# C013 — Tombstone-chain validity (consolidated_into must point to a real,
|
|
309
|
+
# non-self row and a consolidated observation must not stay active).
|
|
310
|
+
def observation_tombstone_chain(manager)
|
|
311
|
+
observation_stores(manager).flat_map do |scope, store|
|
|
312
|
+
obs_ids = store.observations.select(:id)
|
|
313
|
+
|
|
314
|
+
dangling = store.observations
|
|
315
|
+
.exclude(consolidated_into: nil)
|
|
316
|
+
.exclude(consolidated_into: obs_ids)
|
|
317
|
+
.select(:id, :consolidated_into).all
|
|
318
|
+
self_link = store.observations
|
|
319
|
+
.exclude(consolidated_into: nil)
|
|
320
|
+
.where(Sequel[:consolidated_into] => Sequel[:id])
|
|
321
|
+
.select(:id).all
|
|
322
|
+
active_but_tombstoned = store.observations
|
|
323
|
+
.exclude(consolidated_into: nil)
|
|
324
|
+
.where(status: "active")
|
|
325
|
+
.select(:id).all
|
|
326
|
+
consolidated_without_link = store.observations
|
|
327
|
+
.where(status: "consolidated", consolidated_into: nil)
|
|
328
|
+
.select(:id).all
|
|
329
|
+
|
|
330
|
+
flagged = (dangling + self_link + active_but_tombstoned + consolidated_without_link).map { |r| r[:id] }.uniq
|
|
331
|
+
next [] if flagged.empty?
|
|
332
|
+
|
|
333
|
+
problems = []
|
|
334
|
+
problems << "#{dangling.size} consolidated_into → missing observation" unless dangling.empty?
|
|
335
|
+
problems << "#{self_link.size} consolidated_into self-link" unless self_link.empty?
|
|
336
|
+
problems << "#{active_but_tombstoned.size} active yet have a consolidated_into target" unless active_but_tombstoned.empty?
|
|
337
|
+
problems << "#{consolidated_without_link.size} status=consolidated with no consolidated_into keeper" unless consolidated_without_link.empty?
|
|
338
|
+
|
|
339
|
+
[Finding.new(
|
|
340
|
+
id: "C013",
|
|
341
|
+
severity: :error,
|
|
342
|
+
title: "#{flagged.size} observation(s) in #{scope} DB have a broken tombstone chain",
|
|
343
|
+
detail: "Tombstoning is append-only: a superseded observation gets status=consolidated and consolidated_into pointing at the surviving keeper. Violations (#{problems.join("; ")}) corrupt the lineage — recall could surface a tombstoned row, or a consolidated row could orphan its history.",
|
|
344
|
+
suggestion: "Inspect with memory.observations. Re-run the deterministic Reflector (fires on PreCompact/SessionEnd) to re-derive consolidation; a self-link or active+tombstoned row indicates a Reflector bug — file it rather than hand-editing the append-only table.",
|
|
345
|
+
fact_ids: flagged
|
|
346
|
+
)]
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# C014 — Status / corroboration sanity (known status set, corroboration ≥ 1).
|
|
351
|
+
def observation_status_corroboration(manager)
|
|
352
|
+
observation_stores(manager).flat_map do |scope, store|
|
|
353
|
+
bad_status = store.observations
|
|
354
|
+
.exclude(status: OBSERVATION_STATUSES)
|
|
355
|
+
.select(:id).all
|
|
356
|
+
bad_corroboration = store.observations
|
|
357
|
+
.where { corroboration_count < 1 }
|
|
358
|
+
.select(:id).all
|
|
359
|
+
|
|
360
|
+
flagged = (bad_status + bad_corroboration).map { |r| r[:id] }.uniq
|
|
361
|
+
next [] if flagged.empty?
|
|
362
|
+
|
|
363
|
+
problems = []
|
|
364
|
+
problems << "#{bad_status.size} with status outside #{OBSERVATION_STATUSES.inspect}" unless bad_status.empty?
|
|
365
|
+
problems << "#{bad_corroboration.size} with corroboration_count < 1" unless bad_corroboration.empty?
|
|
366
|
+
|
|
367
|
+
[Finding.new(
|
|
368
|
+
id: "C014",
|
|
369
|
+
severity: :warn,
|
|
370
|
+
title: "#{flagged.size} observation(s) in #{scope} DB have invalid status/corroboration",
|
|
371
|
+
detail: "Every observation should carry a known lifecycle status (#{OBSERVATION_STATUSES.join("/")}) and at least one sighting (corroboration_count ≥ 1; a fresh insert counts as 1). Violations (#{problems.join("; ")}) break the promotion gate (which keys off corroboration) and the recall filters (which key off status).",
|
|
372
|
+
suggestion: "Inspect with memory.observations. A corroboration_count < 1 means increment_corroboration math went negative; an unknown status means a migration or external writer bypassed insert_observation. Re-derive via the Reflector if possible.",
|
|
373
|
+
fact_ids: flagged
|
|
374
|
+
)]
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
229
378
|
def normalize_convention(text)
|
|
230
379
|
text.to_s
|
|
231
380
|
.downcase
|
|
@@ -20,6 +20,10 @@ module ClaudeMemory
|
|
|
20
20
|
bare_conclusion_rate
|
|
21
21
|
project_starvation
|
|
22
22
|
auto_memory_unimported
|
|
23
|
+
orphaned_observations
|
|
24
|
+
observation_promotion_consistency
|
|
25
|
+
observation_tombstone_chain
|
|
26
|
+
observation_status_corroboration
|
|
23
27
|
].freeze
|
|
24
28
|
|
|
25
29
|
Result = Data.define(:findings, :stats) do
|
|
@@ -138,7 +138,7 @@ module ClaudeMemory
|
|
|
138
138
|
|
|
139
139
|
predicates = db[:facts].select(:predicate, :status).group_and_count(:predicate, :status).all
|
|
140
140
|
.each_with_object(Hash.new { |h, k| h[k] = Hash.new(0) }) do |row, acc|
|
|
141
|
-
|
|
141
|
+
acc[row[:predicate].to_s][row[:status].to_s] += row[:count].to_i
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
entity_types = db[:entities].group_and_count(:type).all.each_with_object(Hash.new(0)) do |row, acc|
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClaudeMemory
|
|
4
|
+
module Commands
|
|
5
|
+
module Checks
|
|
6
|
+
# Surfaces the active embedding provider, model, and dimension
|
|
7
|
+
# alignment between provider and stored vectors.
|
|
8
|
+
#
|
|
9
|
+
# Doctor previously had VecCheck (sqlite-vec extension + index
|
|
10
|
+
# coverage) but no signal about which provider was actually in use —
|
|
11
|
+
# so a user could see "sqlite-vec available ✓" while silently
|
|
12
|
+
# running on tfidf default when fastembed was loadable. This check
|
|
13
|
+
# closes that visibility gap and points users at
|
|
14
|
+
# `claude-memory setup-vectors` to opt into fastembed.
|
|
15
|
+
class EmbeddingsCheck
|
|
16
|
+
FASTEMBED_HINT = "Set CLAUDE_MEMORY_EMBEDDING_PROVIDER=fastembed for higher-quality semantic recall (fastembed is loadable on this system). " \
|
|
17
|
+
"Run 'claude-memory setup-vectors' to configure."
|
|
18
|
+
|
|
19
|
+
FASTEMBED_INSTALL_HINT = "fastembed is not installed; semantic recall is using tfidf (lower quality). " \
|
|
20
|
+
"Run 'claude-memory setup-vectors' to install fastembed and switch."
|
|
21
|
+
|
|
22
|
+
def call
|
|
23
|
+
provider = Embeddings.resolve
|
|
24
|
+
provider_name = provider.name
|
|
25
|
+
warnings = []
|
|
26
|
+
|
|
27
|
+
# Hint when user is on default tfidf — different message
|
|
28
|
+
# depending on whether fastembed is even loadable.
|
|
29
|
+
if provider_name == "tfidf"
|
|
30
|
+
warnings << (fastembed_loadable? ? FASTEMBED_HINT : FASTEMBED_INSTALL_HINT)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
dim_mismatches = check_dimension_alignment(provider)
|
|
34
|
+
warnings.concat(dim_mismatches)
|
|
35
|
+
|
|
36
|
+
{
|
|
37
|
+
status: warnings.any? ? :warning : :ok,
|
|
38
|
+
label: "embeddings",
|
|
39
|
+
message: "Embedding provider: #{provider_name}, dimensions: #{provider.dimensions}",
|
|
40
|
+
details: {
|
|
41
|
+
provider: provider_name,
|
|
42
|
+
dimensions: provider.dimensions,
|
|
43
|
+
fastembed_loadable: fastembed_loadable?
|
|
44
|
+
},
|
|
45
|
+
warnings: warnings
|
|
46
|
+
}
|
|
47
|
+
rescue => e
|
|
48
|
+
{
|
|
49
|
+
status: :warning,
|
|
50
|
+
label: "embeddings",
|
|
51
|
+
message: "Embedding provider check failed: #{e.message}",
|
|
52
|
+
details: {},
|
|
53
|
+
warnings: []
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def fastembed_loadable?
|
|
60
|
+
return @fastembed_loadable if defined?(@fastembed_loadable)
|
|
61
|
+
@fastembed_loadable = begin
|
|
62
|
+
require "fastembed"
|
|
63
|
+
true
|
|
64
|
+
rescue LoadError
|
|
65
|
+
false
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def check_dimension_alignment(provider)
|
|
70
|
+
config = Configuration.new
|
|
71
|
+
mismatches = []
|
|
72
|
+
|
|
73
|
+
[config.global_db_path, config.project_db_path].each do |db_path|
|
|
74
|
+
next unless File.exist?(db_path)
|
|
75
|
+
|
|
76
|
+
store = nil
|
|
77
|
+
begin
|
|
78
|
+
store = Store::SQLiteStore.new(db_path)
|
|
79
|
+
result = Embeddings::DimensionCheck.call(store, provider)
|
|
80
|
+
next unless result.status == :mismatch
|
|
81
|
+
|
|
82
|
+
mismatches << "Dimension mismatch in #{File.basename(File.dirname(db_path))} DB: " \
|
|
83
|
+
"stored=#{result.stored} but current provider produces #{result.current}. " \
|
|
84
|
+
"Run 'claude-memory index --force' to re-embed under the current provider."
|
|
85
|
+
rescue => e
|
|
86
|
+
ClaudeMemory.logger.debug("EmbeddingsCheck dimension check failed for #{db_path}: #{e.message}")
|
|
87
|
+
ensure
|
|
88
|
+
store&.close
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
mismatches
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -23,6 +23,7 @@ module ClaudeMemory
|
|
|
23
23
|
Checks::DistillCheck.new(manager.global_db_path, "global"),
|
|
24
24
|
Checks::DistillCheck.new(manager.project_db_path, "project"),
|
|
25
25
|
Checks::VecCheck.new,
|
|
26
|
+
Checks::EmbeddingsCheck.new,
|
|
26
27
|
Checks::SnapshotCheck.new,
|
|
27
28
|
Checks::ClaudeMdCheck.new,
|
|
28
29
|
Checks::HooksCheck.new
|