claude_memory 0.12.1 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/rules/claude_memory.generated.md +6 -1
  4. data/.claude/settings.local.json +2 -1
  5. data/.claude-plugin/marketplace.json +2 -2
  6. data/.claude-plugin/plugin.json +2 -2
  7. data/CHANGELOG.md +38 -0
  8. data/CLAUDE.md +11 -6
  9. data/README.md +35 -0
  10. data/db/migrations/019_add_observations.rb +43 -0
  11. data/db/migrations/020_add_observation_promotion.rb +33 -0
  12. data/docs/GETTING_STARTED.md +38 -0
  13. data/docs/api_stability.md +16 -5
  14. data/docs/architecture.md +18 -6
  15. data/docs/audit_runbook.md +67 -0
  16. data/docs/dashboard.md +28 -0
  17. data/docs/improvements.md +173 -1
  18. data/docs/influence/mastra-observational-memory.md +198 -0
  19. data/docs/influence/strands-agent-sops.md +163 -0
  20. data/docs/quality_review.md +45 -0
  21. data/lib/claude_memory/audit/checks.rb +149 -0
  22. data/lib/claude_memory/audit/runner.rb +4 -0
  23. data/lib/claude_memory/commands/census_command.rb +1 -1
  24. data/lib/claude_memory/commands/hook_command.rb +16 -3
  25. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +3 -1
  26. data/lib/claude_memory/commands/install_skill_command.rb +4 -0
  27. data/lib/claude_memory/commands/observations_command.rb +367 -0
  28. data/lib/claude_memory/commands/registry.rb +1 -0
  29. data/lib/claude_memory/commands/skills/reflect.md +68 -0
  30. data/lib/claude_memory/commands/stats_command.rb +60 -1
  31. data/lib/claude_memory/dashboard/api.rb +4 -0
  32. data/lib/claude_memory/dashboard/index.html +154 -2
  33. data/lib/claude_memory/dashboard/observations.rb +115 -0
  34. data/lib/claude_memory/dashboard/server.rb +1 -0
  35. data/lib/claude_memory/distill/extraction.rb +6 -4
  36. data/lib/claude_memory/distill/null_distiller.rb +108 -3
  37. data/lib/claude_memory/distill/reference_material_detector.rb +4 -1
  38. data/lib/claude_memory/domain/observation.rb +118 -0
  39. data/lib/claude_memory/embeddings/generator.rb +1 -1
  40. data/lib/claude_memory/hook/context_injector.rb +125 -2
  41. data/lib/claude_memory/mcp/handlers/management_handlers.rb +113 -2
  42. data/lib/claude_memory/mcp/handlers/query_handlers.rb +48 -1
  43. data/lib/claude_memory/mcp/instructions_builder.rb +1 -0
  44. data/lib/claude_memory/mcp/query_guide.rb +28 -0
  45. data/lib/claude_memory/mcp/tool_definitions.rb +58 -0
  46. data/lib/claude_memory/mcp/tools.rb +3 -0
  47. data/lib/claude_memory/observe/observations_renderer.rb +49 -0
  48. data/lib/claude_memory/observe/reflector.rb +107 -0
  49. data/lib/claude_memory/observe/token_overlap_matcher.rb +55 -0
  50. data/lib/claude_memory/publish.rb +53 -1
  51. data/lib/claude_memory/resolve/resolver.rb +45 -8
  52. data/lib/claude_memory/store/schema_manager.rb +1 -1
  53. data/lib/claude_memory/store/sqlite_store.rb +181 -0
  54. data/lib/claude_memory/sweep/maintenance.rb +15 -1
  55. data/lib/claude_memory/sweep/sweeper.rb +7 -1
  56. data/lib/claude_memory/version.rb +1 -1
  57. data/lib/claude_memory.rb +6 -0
  58. metadata +12 -1
@@ -0,0 +1,163 @@
1
+ # Strands Agent SOPs Analysis
2
+
3
+ *Analysis Date: 2026-05-01*
4
+ *Source: AWS Open Source Blog — "Introducing Strands Agent SOPs: Natural Language Workflows for AI Agents"*
5
+ *URL: https://aws.amazon.com/blogs/opensource/introducing-strands-agent-sops-natural-language-workflows-for-ai-agents/*
6
+ *Type: Article (not a repo). PyPI package `strands-agents-sops`, GitHub `strands-agents/agent-sop`.*
7
+
8
+ ---
9
+
10
+ ## Executive Summary
11
+
12
+ **Agent SOPs** are markdown-based "Standard Operating Procedures" that wrap an AI agent's instructions in a parameterized, RFC-2119-keyworded, chain-able format. Amazon teams use thousands of them internally; the open-source release ships four reference SOPs (`codebase-summary`, `pdd`, `code-task-generator`, `code-assist`) and tooling to author/load them via Strands Agents, MCP prompts, Claude Skills, or raw model calls.
13
+
14
+ **Verdict for ClaudeMemory**: ClaudeMemory already implements most of what SOPs propose, under different names. Skills (`/distill-transcripts`, `/release`, `/study-repo`) *are* SOPs. The hook context injection pipeline already chains stages (ingest → distill → resolve → publish). The current distillation prompt already uses RFC-2119-ish "MUST" language for the reason-clause requirement.
15
+
16
+ The genuinely novel ideas — and the only ones worth a closer look — are:
17
+ 1. **Explicit parameter contracts** at SOP entry (Required/Optional with defaults), versus our skills that take freeform `$ARGUMENTS`.
18
+ 2. **Progress checkpoints + resumability** for long-running workflows (`✅ Step 1 complete` style markers the agent emits).
19
+ 3. **Self-describing format spec** (`strands-agents-sops rule` command) that lets Claude author new SOPs from a description.
20
+
21
+ The rest is old news for us. **Recommendation: do not adopt the Strands library or format. Borrow two narrow ideas (resumability + explicit parameters) into `/distill-transcripts` if and only if real distillation runs are large enough to fail mid-batch.**
22
+
23
+ ## What an SOP Actually Is
24
+
25
+ Per the article, an SOP is markdown with these conventions:
26
+
27
+ - **RFC 2119 keywords** (MUST / SHOULD / MAY) for behavioral control.
28
+ - **Required/Optional parameters block** with defaults — gathered from the user via natural-language dialogue at invocation time.
29
+ - **Numbered steps** the agent executes sequentially.
30
+ - **Progress annotations** the agent prints as it goes (`✅ Validated codebase path exists`).
31
+ - **Output artifacts** in a conventional `.sop/<name>/` directory, used as handoff between chained SOPs.
32
+
33
+ Example parameter declaration (the only verbatim format snippet in the article):
34
+
35
+ ```
36
+ Required Parameters:
37
+ • codebase_path: Path to the codebase to analyze
38
+ Optional Parameters:
39
+ • output_dir: Directory where documentation will be stored (default: ".sop/summary")
40
+ ```
41
+
42
+ Invocation surfaces:
43
+
44
+ - **Strands Agents (Python)**: `Agent(system_prompt=code_assist, tools=[editor, shell])` — SOP becomes the system prompt.
45
+ - **MCP**: SOPs registered as MCP *prompts* (the `prompts/list` + `prompts/get` channel), invoked with `@codebase-summary` in Kiro CLI / `/prompts` listing.
46
+ - **Claude Skills**: A CLI converts SOPs to Anthropic Skill format.
47
+ - **Direct LLM**: paste into a model's message and run.
48
+
49
+ Composition is **sequential chaining via artifact handoff** — `codebase-summary` writes docs, `pdd` reads them. No nesting/include directive is shown.
50
+
51
+ ## How This Maps to ClaudeMemory Today
52
+
53
+ | Strands concept | Our equivalent | Status |
54
+ |---|---|---|
55
+ | Markdown SOP | `lib/claude_memory/commands/skills/*.md` (Anthropic Skills) | ✅ Have it |
56
+ | MCP prompts surface | `MCP::QueryGuide` registers `memory_guide` via `prompts/list`+`prompts/get` | ✅ Have it |
57
+ | RFC-2119 "MUST" in instructions | `distill-transcripts.md:38-43` uses MUST for reason-clause embed | ✅ Have it |
58
+ | SessionStart prompt injection | `hook_command.rb:213` writes `hookSpecificOutput.additionalContext` | ✅ Have it |
59
+ | SOP chaining via artifacts | `Ingest → Distill → Resolve → Store → Publish` (CLAUDE.md L72-79) | ✅ Have it (DB rows are the artifacts) |
60
+ | AI-assisted SOP authoring | `/skill-creator` skill | ✅ Have it |
61
+ | Format spec exposable to Claude | `strands-agents-sops rule` CLI | ⚠️ Partial — our distillation prompt is in `distill-transcripts.md`, not exposed as a tool |
62
+ | Required/Optional parameter contract | We pass `$ARGUMENTS` as freeform text | ❌ Missing |
63
+ | Progress checkpoints + resumability | `/distill-transcripts` runs end-to-end; no mid-batch checkpoint | ❌ Missing |
64
+ | Reference SOPs (`codebase-summary` etc.) | N/A — wrong domain | ❌ Not applicable |
65
+
66
+ The pattern is clear: we independently arrived at the same architecture. The two bullets in the "Missing" rows are the only candidates worth thinking about for adoption.
67
+
68
+ ## Where SOPs Could Improve Distillation
69
+
70
+ ### 1. Resumability for `/distill-transcripts`
71
+
72
+ **Current state.** `/distill-transcripts --limit 10` calls `memory.undistilled`, processes items one-by-one, calls `memory.mark_distilled` after each. If the run aborts mid-batch (rate limit, context exhaustion, user Ctrl-C), the items processed before the abort are marked, the rest are not. There is no explicit checkpoint file, but the DB itself is the checkpoint.
73
+
74
+ **SOPs angle.** SOPs add visible progress markers (`✅ Item 4/10 complete`) and a resume contract (`if .sop/distill/state.json exists, skip processed items`).
75
+
76
+ **Honest verdict.** Our DB-as-checkpoint already handles resumability. The visible-progress angle is a UX win for big runs but not a correctness improvement. **Do this only if we add a `--limit 100`+ workflow that users actually run.** Today nobody runs that.
77
+
78
+ ### 2. Required/Optional Parameter Block in Skills
79
+
80
+ **Current state.** `/distill-transcripts` accepts `--limit N` parsed implicitly inside the skill body. Other skills accept `$ARGUMENTS` as a freeform blob. Users discover parameters by reading the skill markdown.
81
+
82
+ **SOPs angle.** Declared parameter blocks let the agent prompt the user before running ("what's the codebase_path?"), and let tooling (an SOP registry, MCP prompt list) introspect the contract.
83
+
84
+ **Honest verdict.** Anthropic Skills allow YAML frontmatter that already does this (`argument-hint`, parameter docs). We are under-using that frontmatter. **Cheap, safe improvement.** Adding a `Parameters:` block to the top of `distill-transcripts.md`, `release.md`, `study-repo.md` (and friends) costs ~10 minutes per skill and makes them self-documenting to both humans and any agent reading them.
85
+
86
+ ### 3. Format-Spec-As-Tool for Authoring
87
+
88
+ **Current state.** `/skill-creator` exists. It has the format knowledge in its prompt body.
89
+
90
+ **SOPs angle.** `strands-agents-sops rule` is a CLI command that prints the SOP format spec to stdout, so any agent can `Bash` it and learn how to author one. This is a small but real ergonomic win — the spec lives in one place, not duplicated into every "make a new skill" prompt.
91
+
92
+ **Honest verdict.** Marginal. We don't have a sprawl of skill-authoring locations to consolidate. **Defer indefinitely** unless we start writing many more skills.
93
+
94
+ ## What NOT to Adopt
95
+
96
+ - **The Python package itself.** Strands is a Python agent framework; we're a Ruby gem. No reuse path.
97
+ - **The `.sop/<name>/` artifact directory convention.** We persist via DB rows + `claude_memory.generated.md`. Adding a parallel filesystem artifact tree would just add cleanup burden and an out-of-DB state to reconcile.
98
+ - **The four reference SOPs (`codebase-summary` etc.).** Wrong domain — they're for code-workflow agents, not memory pipelines. Nothing to lift.
99
+ - **Renaming "skills" to "SOPs" in our docs.** Anthropic's term is *Skills*; that's the term Claude Code users know. Adopting Amazon's term creates confusion for zero gain.
100
+ - **Sequential-only chaining as an enforced pattern.** Our pipeline already chains, but we should keep room for parallel work (e.g., NullDistiller layer 1 runs synchronously in the ingest hook regardless of layer 2/3). SOP chaining is sequential by construction.
101
+
102
+ ## Adoption Opportunities
103
+
104
+ ### Medium Priority
105
+
106
+ #### 1. Parameter blocks in skill frontmatter
107
+
108
+ - **Value**: Self-documenting skills; Claude can prompt the user for missing parameters instead of guessing from `$ARGUMENTS`. Better intro-spectability for any future skill registry UI.
109
+ - **Evidence**: Article's `Required Parameters / Optional Parameters` block — only structural snippet quoted verbatim; Anthropic Skills format already supports `argument-hint` and similar fields we under-use.
110
+ - **Implementation**: Add `## Parameters` section near the top of `lib/claude_memory/commands/skills/distill-transcripts.md`, `release.md`, `study-repo.md`, `quality-update.md`, `improve.md`. Format: bullet list with `name: description (default: …)`.
111
+ - **Effort**: ~30 minutes total across all skills.
112
+ - **Trade-off**: Tiny doc maintenance burden; no runtime cost.
113
+ - **Recommendation**: ADOPT (low-cost, high-clarity).
114
+
115
+ ### Low Priority
116
+
117
+ #### 2. Progress markers + explicit checkpoint file in `/distill-transcripts`
118
+
119
+ - **Value**: Better UX on long runs (users see progress); cleaner resume after mid-batch failure.
120
+ - **Evidence**: Article shows `✅ Validated codebase path exists` style output; SOPs document progress to support resumability.
121
+ - **Implementation**: Have `/distill-transcripts` print `[N/M] item <docid> → K facts` after each `memory.mark_distilled`. Optionally, write `.claude/distill_state.json` with `last_processed_content_id` so a re-run can resume.
122
+ - **Effort**: ~1 hour for stdout markers; ~3 hours including a state file with safe-resume semantics.
123
+ - **Trade-off**: State file adds another moving piece; DB already handles correctness, so this is purely UX. Not worth doing until somebody actually runs `/distill-transcripts --limit 100+` regularly.
124
+ - **Recommendation**: DEFER. Revisit if dashboard/usage data shows multi-hundred-item distillation runs.
125
+
126
+ #### 3. SOP-style format spec exposed as MCP prompt
127
+
128
+ - **Value**: Lets a future "make me a new skill" agent fetch our skill format spec via MCP `prompts/get` instead of duplicating it.
129
+ - **Evidence**: Article's `strands-agents-sops rule` command — same idea.
130
+ - **Implementation**: Add a `skill_authoring_guide` prompt to `MCP::QueryGuide` alongside `memory_guide`.
131
+ - **Effort**: ~1 hour.
132
+ - **Trade-off**: Solves a problem we don't yet have. We have one skill-authoring location (`/skill-creator`).
133
+ - **Recommendation**: DEFER until skill sprawl is a real problem.
134
+
135
+ ### Features to Avoid
136
+
137
+ - **Generic "SOP runtime" abstraction** layered over our skills: pure ceremony. Anthropic Skills already give us the runtime.
138
+ - **`.sop/<name>/` artifact filesystem** parallel to our DB: doubles state, doubles cleanup, halves the value of having a curated SQLite store.
139
+ - **Adopting the term "SOP" anywhere user-facing**: term collision with Skills.
140
+
141
+ ## Implementation Recommendations
142
+
143
+ **Phase 1 (do this in any 0.12.x release).** Add `## Parameters` blocks to the existing skill markdowns. ~30 minutes. Closes the only meaningful gap from this study.
144
+
145
+ **Phase 2 (defer).** Progress markers + `.claude/distill_state.json` checkpoint, only after we see real users running large distillation batches.
146
+
147
+ **Phase 3 (avoid unless triggered).** MCP-prompt-exposed skill format spec, only after we have ≥3 skill-authoring locations to consolidate.
148
+
149
+ ## Architecture Decisions
150
+
151
+ **Preserve.** Our DB-as-checkpoint substrate, our use of Anthropic Skills as the SOP equivalent, our `additionalContext` injection on SessionStart, our distillation prompt's explicit reason-clause requirement.
152
+
153
+ **Adopt.** Explicit parameter declarations in skill frontmatter (Phase 1).
154
+
155
+ **Reject.** Strands Python package, `.sop/` artifact tree, generic SOP runtime abstraction, terminology adoption ("SOP" → user-facing).
156
+
157
+ ## Key Takeaways
158
+
159
+ 1. **We are already doing this.** Strands describes a class of patterns — markdown instructions, MCP prompts, parameterized invocation, sequential chaining, RFC-2119 vocabulary — that ClaudeMemory has independently. The existence of Strands is *validation*, not a roadmap.
160
+ 2. **Anthropic Skills ≈ Strands SOPs.** Same idea, different label, different ecosystem. Don't refactor toward Strands; we'd just be renaming Skills.
161
+ 3. **One narrow win.** Explicit parameter declarations in skill frontmatter cost ~30 minutes and make our skills self-documenting. Worth doing.
162
+ 4. **One narrow defer.** Progress markers + checkpoint files in `/distill-transcripts` are real UX improvements *if* anyone runs distillation at scale; today nobody does. Revisit when the data says to.
163
+ 5. **No deep architectural shifts.** Nothing in the article justifies a redesign of our distillation, storage, or prompting pipelines.
@@ -9,6 +9,51 @@
9
9
 
10
10
  ---
11
11
 
12
+ ## Observational Layer — Pre-Merge Review (2026-06-18)
13
+
14
+ **Review Date:** 2026-06-18
15
+ **Previous Review:** 2026-04-28 (51 days ago)
16
+ **Scope:** the observational-layer branch (`claude/observational-layer-design-7662r9`, 22 commits ahead of `origin/main`, ~57 files). Two parallel expert-lens reviews — core/data layer (migrations 019/020, `Domain::Observation`, `SQLiteStore` observation methods, `Resolver`) and pipeline layer (`NullDistiller` extraction, renderer, `Reflector`, `ContextInjector`, MCP handlers, dashboard panel).
17
+
18
+ **Verdict:** No hard merge-blockers. The append-only/tombstone discipline is consistent, `Domain::Observation` is a clean immutable value object, border validation (`coerce_observation`) is textbook, and test coverage on this surface is above the repo average. The items below are latent correctness edge-cases + cleanups; none break the system as shipped (the layer is experimental and observations are project-scoped only today).
19
+
20
+ ### High — address or consciously accept before merge
21
+
22
+ - **H1 · `consolidate_observations` read-modify-write race** — `lib/claude_memory/store/sqlite_store.rb:805-826` (Evans/Bernhardt). The source `SELECT` and `combined = sources.sum{…}` run *outside* the `@db.transaction` block, and the tombstone `UPDATE` (822) doesn't re-assert `status: "active"`. Two reflectors firing close together (PreCompact + SessionEnd) could double-count corroboration or re-tombstone an already-consolidated source. SQLite's single-writer lock narrows the window but doesn't close the gap. **Fix:** move the read inside the transaction and re-filter `status: "active"` on the update. Mechanical, ~1h incl. spec.
23
+ - **P1 · `noise_body?` over-broad — drops legit prose** — `lib/claude_memory/distill/null_distiller.rb:53,169` (Grimm/Bernhardt). `NOISE_BODY_SIGNATURE` matches `::`, `{}`, `=>` anywhere, so `"decided to adopt ClaudeMemory::Observation as the model"` is silently dropped — common in Ruby prose. Confirmed at runtime. **This is a precision-tuning *design* change to extraction behavior, not a mechanical fix** — per the project's data-driven-design convention it should be surveyed against real corpus data before retuning, not changed blind. Candidate: narrow to strong structural markers (`def `/`class `/`module `, JSON `","`/`":\s*"`, `$(`, `&&`, `||`), drop bare `::`/`{}`/`=>`, add a false-negative spec corpus.
24
+ - **P2 · cross-scope promote nudge — latent wrong-DB landmine** — `lib/claude_memory/hook/context_injector.rb:159-194` + `lib/claude_memory/mcp/handlers/management_handlers.rb:88` (Evans/Beck). `fetch_promotion_candidates` flat-maps project+global stores; the reflection block emits `[obs #<id>]` (a *per-DB* autoincrement id) with no scope; `promote_observation`/`consolidate_observations` default `scope: "project"`. A global-store candidate would route the promote call to the wrong DB. **Dead today** (nothing writes observations to the global DB), but a genuine landmine if global observations ever appear. **Fix:** restrict reflection candidates to the project store + document, or scope-tag the nudge line (mirror `emitted_facts_by_scope`). ~1-2h.
25
+ - **M1 · `consolidate_observations` has zero test coverage** — the most complex method in `sqlite_store.rb` is the only observation method with no specs (the `< 2 → nil` guard, summed-corroboration-tips-threshold, multi-row tombstone are all load-bearing and untested). Add specs alongside the H1 fix. ~1h.
26
+
27
+ ### Medium
28
+
29
+ - **M2/P7 · token-estimate `/4.0` heuristic duplicated 3×** — `sqlite_store.rb:714,819` + `dashboard/observations.rb:98` (Metz DRY). The compression-ratio correctness depends on both halves using the same divisor. Extract `Core::TokenEstimate.from_chars`/`.from_bytes`. ~1h.
30
+ - **M3/P10 · `recent_observations` `min_priority` name inverted** — `sqlite_store.rb:731` (Beck revealing-names). Filters `priority <= min_priority`, but priority is inverted (1=important), so a higher "minimum" returns *more* rows. Rename `max_priority_value`/`importance_floor`. ~30m + callers.
31
+ - **M4 · `Resolver#apply` `@return` rdoc stale** — `resolver.rb:29` omits the `:observations_created` and `:fact_ids` keys this PR adds. ~10m.
32
+ - **M5 · `fact_ids` array silently contains `nil`s** — `resolver.rb:45,61` (Grimm meaningful-returns). Comment promises positional alignment with `extraction.facts`, but `:discard` contributes `nil`; the sole consumer already `.compact.first`s. Pick one contract and document it (or compact at source). ~20m.
33
+ - **P3 · `clean_observation_body` is 6 chained gsubs, brittle** — `null_distiller.rb:178` (Bernhardt). Pure text logic buried as a private method; extract to a tested `Observe::BodyCleaner` with an input→output spec table. ~2h.
34
+ - **P4 · `extract_decisions`/`extract_observations` double-scan `DECISION_PATTERNS`** — `null_distiller.rb:105,138` (Metz DRY). Two full regex passes per chunk on the P95<5ms hot path; titles and bodies also diverge, complicating later corroboration. ~2-3h.
35
+ - **P5 · `consolidate_observations` reuses `coerce_observation(args)` on the whole tool-args hash** — `management_handlers.rb:151` (Grimm border). Couples the consolidation tool's param names to the observation schema and pulls in `kind`/`priority` defaults the caller may not intend. Pass a narrowed hash. ~1h.
36
+ - **P6 · dashboard N+1 across stores** — `dashboard/observations.rb:48-99` (Evans, bounded). 8-12 small aggregate queries per load; acceptable at store-count 2 but the `.where(status: "active")` predicate repeats ~6×. One `group_and_count(:status)` per store. ~2h.
37
+
38
+ ### Low (fast-follow cleanups)
39
+
40
+ - **L1** `persist_observations` reaches into raw hashes (`obs[:body]`…) — coerce through `Domain::Observation` at the border; defaults are triplicated. `resolver.rb:81`. ~1-2h.
41
+ - **L2** `respond_to?(:observations)` guard is dead defensiveness — `Extraction` always defines it. `resolver.rb:82`. ~5m.
42
+ - **L3/P8** status strings (`"active"`/`"consolidated"`/`"expired"`) and `2`/`3` literals scattered — add `STATUSES`/reuse `PROMOTION_THRESHOLD`/`INFO` from `Domain::Observation`. ~30m.
43
+ - **L4** `increment_corroboration` returns void while sibling mutators return `updated > 0` — make symmetric. `sqlite_store.rb:772`. ~10m.
44
+ - **L5** migration index DDL uses raw `CREATE INDEX` rather than Sequel's `index` DSL — idiomatic-only. ~30m, optional.
45
+ - **L6** `consolidate_observations` doesn't thread `session_id` (synthesized rows get NULL) — document the intent or thread it. ~5m.
46
+
47
+ ### What's done well
48
+
49
+ Append-only/tombstone discipline honored end-to-end with "row preserved, not deleted" specs; `Domain::Observation` immutable/frozen/self-validating with intention-revealing predicates; `corroborated?(threshold)` kept a total function (threshold injected, not hard-coded); resolver change genuinely additive (observations persist *inside* the extraction transaction, "no observations → fact behavior unchanged" tested); pure Sequel datasets throughout (no raw SQL except index DDL); promotion-gate tests pin the anti-hallucination invariant; `coerce_observation` border validation with `filter_map` drops invalids without aborting the batch; the Go-language case-sensitivity fix is clean and well-specced.
50
+
51
+ ### Recommended pre-merge action
52
+
53
+ Fix the mechanical items — **H1** (read-inside-transaction), **P2** (defuse cross-scope), **M1** (the missing consolidation spec), **M4/M5** (document this PR's own new `apply` surface). Flag **P1** (regex retune) for a data-driven decision — do not change blind. Everything else is tracked here as fast-follow.
54
+
55
+ ---
56
+
12
57
  ## Post-0.11 Investigation: Hallucination Rate Metric Calibration (2026-04-30)
13
58
 
14
59
  When #48 (hallucination-rate metric) was first run against this project's real DB, it surfaced numbers that *looked* alarming:
@@ -226,6 +226,155 @@ module ClaudeMemory
226
226
  end
227
227
  end
228
228
 
229
+ # Scopes whose stores carry an observations table. Observation checks
230
+ # iterate both DBs because observations may be project- or global-scoped.
231
+ OBSERVATION_SCOPES = %i[project global].freeze
232
+
233
+ # Valid observation lifecycle states. Anything else means a writer or
234
+ # migration stamped a status the resolver/reflector never produce.
235
+ OBSERVATION_STATUSES = %w[active consolidated expired].freeze
236
+
237
+ def observation_stores(manager)
238
+ OBSERVATION_SCOPES
239
+ .map { |scope| [scope, manager.store_if_exists(scope.to_s)] }
240
+ .reject { |_, store| store.nil? }
241
+ end
242
+
243
+ # C011 — Orphaned observations (provenance points at a missing content item).
244
+ def orphaned_observations(manager)
245
+ observation_stores(manager).flat_map do |scope, store|
246
+ content_ids = store.content_items.select(:id)
247
+ orphans = store.observations
248
+ .exclude(source_content_item_id: nil)
249
+ .exclude(source_content_item_id: content_ids)
250
+ .select(:id)
251
+ .all
252
+ next [] if orphans.empty?
253
+
254
+ [Finding.new(
255
+ id: "C011",
256
+ severity: :warn,
257
+ title: "#{orphans.size} observation(s) in #{scope} DB reference a missing content item",
258
+ detail: "An observation's source_content_item_id should point at the content_items row it was distilled from. A dangling pointer means the source row was pruned or never existed, so the observation's provenance can no longer be explained.",
259
+ suggestion: "Inspect with memory.observations. These rows are append-only; if the provenance is unrecoverable, consolidate or expire them via the Reflector (PreCompact/SessionEnd) rather than deleting.",
260
+ fact_ids: orphans.map { |r| r[:id] }
261
+ )]
262
+ end
263
+ end
264
+
265
+ # C012 — Promotion consistency (promoted_at ⇔ promoted_fact_id, fact must exist + be active).
266
+ def observation_promotion_consistency(manager)
267
+ observation_stores(manager).flat_map do |scope, store|
268
+ active_fact_ids = store.facts.where(status: "active").select(:id)
269
+
270
+ missing_fact_id = store.observations
271
+ .exclude(promoted_at: nil)
272
+ .where(promoted_fact_id: nil)
273
+ .select(:id).all
274
+ dangling_fact = store.observations
275
+ .exclude(promoted_fact_id: nil)
276
+ .exclude(promoted_fact_id: store.facts.select(:id))
277
+ .select(:id, :promoted_fact_id).all
278
+ inactive_fact = store.observations
279
+ .exclude(promoted_fact_id: nil)
280
+ .exclude(promoted_fact_id: active_fact_ids)
281
+ .exclude(promoted_fact_id: dangling_fact.map { |r| r[:promoted_fact_id] })
282
+ .select(:id, :promoted_fact_id).all
283
+ missing_timestamp = store.observations
284
+ .exclude(promoted_fact_id: nil)
285
+ .where(promoted_at: nil)
286
+ .select(:id).all
287
+
288
+ obs_ids = (missing_fact_id + dangling_fact + inactive_fact + missing_timestamp).map { |r| r[:id] }.uniq
289
+ next [] if obs_ids.empty?
290
+
291
+ problems = []
292
+ problems << "#{missing_fact_id.size} promoted but missing promoted_fact_id" unless missing_fact_id.empty?
293
+ problems << "#{dangling_fact.size} promoted_fact_id pointing at a non-existent fact" unless dangling_fact.empty?
294
+ problems << "#{inactive_fact.size} promoted into a non-active fact" unless inactive_fact.empty?
295
+ problems << "#{missing_timestamp.size} have promoted_fact_id but no promoted_at" unless missing_timestamp.empty?
296
+
297
+ [Finding.new(
298
+ id: "C012",
299
+ severity: :error,
300
+ title: "#{obs_ids.size} observation(s) in #{scope} DB have inconsistent promotion state",
301
+ detail: "Promotion must be atomic: a promoted observation has both promoted_at set and promoted_fact_id pointing at an existing, active fact. Violations (#{problems.join("; ")}) mean mark_observation_promoted ran partially or the target fact was later rejected/superseded, leaving the observation pointing at nothing usable.",
302
+ suggestion: "Inspect the fact with claude-memory explain <fact_id>. If the fact was intentionally rejected, the observation should be re-opened for re-promotion via memory.promote_observation; if mark_observation_promoted half-ran, re-run promotion.",
303
+ fact_ids: obs_ids
304
+ )]
305
+ end
306
+ end
307
+
308
+ # C013 — Tombstone-chain validity (consolidated_into must point to a real,
309
+ # non-self row and a consolidated observation must not stay active).
310
+ def observation_tombstone_chain(manager)
311
+ observation_stores(manager).flat_map do |scope, store|
312
+ obs_ids = store.observations.select(:id)
313
+
314
+ dangling = store.observations
315
+ .exclude(consolidated_into: nil)
316
+ .exclude(consolidated_into: obs_ids)
317
+ .select(:id, :consolidated_into).all
318
+ self_link = store.observations
319
+ .exclude(consolidated_into: nil)
320
+ .where(Sequel[:consolidated_into] => Sequel[:id])
321
+ .select(:id).all
322
+ active_but_tombstoned = store.observations
323
+ .exclude(consolidated_into: nil)
324
+ .where(status: "active")
325
+ .select(:id).all
326
+ consolidated_without_link = store.observations
327
+ .where(status: "consolidated", consolidated_into: nil)
328
+ .select(:id).all
329
+
330
+ flagged = (dangling + self_link + active_but_tombstoned + consolidated_without_link).map { |r| r[:id] }.uniq
331
+ next [] if flagged.empty?
332
+
333
+ problems = []
334
+ problems << "#{dangling.size} consolidated_into → missing observation" unless dangling.empty?
335
+ problems << "#{self_link.size} consolidated_into self-link" unless self_link.empty?
336
+ problems << "#{active_but_tombstoned.size} active yet have a consolidated_into target" unless active_but_tombstoned.empty?
337
+ problems << "#{consolidated_without_link.size} status=consolidated with no consolidated_into keeper" unless consolidated_without_link.empty?
338
+
339
+ [Finding.new(
340
+ id: "C013",
341
+ severity: :error,
342
+ title: "#{flagged.size} observation(s) in #{scope} DB have a broken tombstone chain",
343
+ detail: "Tombstoning is append-only: a superseded observation gets status=consolidated and consolidated_into pointing at the surviving keeper. Violations (#{problems.join("; ")}) corrupt the lineage — recall could surface a tombstoned row, or a consolidated row could orphan its history.",
344
+ suggestion: "Inspect with memory.observations. Re-run the deterministic Reflector (fires on PreCompact/SessionEnd) to re-derive consolidation; a self-link or active+tombstoned row indicates a Reflector bug — file it rather than hand-editing the append-only table.",
345
+ fact_ids: flagged
346
+ )]
347
+ end
348
+ end
349
+
350
+ # C014 — Status / corroboration sanity (known status set, corroboration ≥ 1).
351
+ def observation_status_corroboration(manager)
352
+ observation_stores(manager).flat_map do |scope, store|
353
+ bad_status = store.observations
354
+ .exclude(status: OBSERVATION_STATUSES)
355
+ .select(:id).all
356
+ bad_corroboration = store.observations
357
+ .where { corroboration_count < 1 }
358
+ .select(:id).all
359
+
360
+ flagged = (bad_status + bad_corroboration).map { |r| r[:id] }.uniq
361
+ next [] if flagged.empty?
362
+
363
+ problems = []
364
+ problems << "#{bad_status.size} with status outside #{OBSERVATION_STATUSES.inspect}" unless bad_status.empty?
365
+ problems << "#{bad_corroboration.size} with corroboration_count < 1" unless bad_corroboration.empty?
366
+
367
+ [Finding.new(
368
+ id: "C014",
369
+ severity: :warn,
370
+ title: "#{flagged.size} observation(s) in #{scope} DB have invalid status/corroboration",
371
+ detail: "Every observation should carry a known lifecycle status (#{OBSERVATION_STATUSES.join("/")}) and at least one sighting (corroboration_count ≥ 1; a fresh insert counts as 1). Violations (#{problems.join("; ")}) break the promotion gate (which keys off corroboration) and the recall filters (which key off status).",
372
+ suggestion: "Inspect with memory.observations. A corroboration_count < 1 means increment_corroboration math went negative; an unknown status means a migration or external writer bypassed insert_observation. Re-derive via the Reflector if possible.",
373
+ fact_ids: flagged
374
+ )]
375
+ end
376
+ end
377
+
229
378
  def normalize_convention(text)
230
379
  text.to_s
231
380
  .downcase
@@ -20,6 +20,10 @@ module ClaudeMemory
20
20
  bare_conclusion_rate
21
21
  project_starvation
22
22
  auto_memory_unimported
23
+ orphaned_observations
24
+ observation_promotion_consistency
25
+ observation_tombstone_chain
26
+ observation_status_corroboration
23
27
  ].freeze
24
28
 
25
29
  Result = Data.define(:findings, :stats) do
@@ -138,7 +138,7 @@ module ClaudeMemory
138
138
 
139
139
  predicates = db[:facts].select(:predicate, :status).group_and_count(:predicate, :status).all
140
140
  .each_with_object(Hash.new { |h, k| h[k] = Hash.new(0) }) do |row, acc|
141
- acc[row[:predicate].to_s][row[:status].to_s] += row[:count].to_i
141
+ acc[row[:predicate].to_s][row[:status].to_s] += row[:count].to_i
142
142
  end
143
143
 
144
144
  entity_types = db[:entities].group_and_count(:type).all.each_with_object(Hash.new(0)) do |row, acc|
@@ -205,14 +205,26 @@ module ClaudeMemory
205
205
 
206
206
  t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
207
207
  injector = ClaudeMemory::Hook::ContextInjector.new(manager, source: source)
208
- context_text = injector.generate_context
208
+ # On PreCompact (context pressure) inject only the reflection nudge, not
209
+ # the full snapshot; everywhere else inject the full SessionStart context.
210
+ context_text = if payload["hook_event_name"] == "PreCompact"
211
+ injector.reflection_context
212
+ else
213
+ injector.generate_context
214
+ end
209
215
  duration_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000).round
210
216
 
211
217
  if context_text
212
218
  response = {
213
219
  hookSpecificOutput: {
214
220
  hookEventName: "SessionStart",
215
- additionalContext: context_text
221
+ # Wrap in <claude-memory-context> so a later ingest strips our own
222
+ # injected snapshot back out (ContentSanitizer lists this tag in
223
+ # SYSTEM_TAGS). Without the wrapper, memory's injected facts and
224
+ # observation log leak into the transcript and get re-distilled —
225
+ # a self-ingestion feedback loop. Claude still reads the content;
226
+ # only the re-ingestion path treats it as strippable.
227
+ additionalContext: "<claude-memory-context>\n#{context_text}\n</claude-memory-context>"
216
228
  }
217
229
  }
218
230
  stdout.puts JSON.generate(response)
@@ -243,7 +255,8 @@ module ClaudeMemory
243
255
  top_fact_ids: injector.emitted_fact_ids.first(10),
244
256
  top_facts_by_scope: (by_scope if by_scope.any?),
245
257
  top_subjects: injector.emitted_subjects.uniq.first(10),
246
- fact_count: injector.emitted_fact_ids.size
258
+ fact_count: injector.emitted_fact_ids.size,
259
+ observation_count: injector.emitted_observation_count
247
260
  }.compact
248
261
 
249
262
  ClaudeMemory::ActivityLog.record(store,
@@ -126,7 +126,9 @@ module ClaudeMemory
126
126
  {"type" => "command", "command" => ingest_cmd, "timeout" => 30,
127
127
  "statusMessage" => "Saving memory..."},
128
128
  {"type" => "command", "command" => sweep_cmd, "timeout" => 30,
129
- "statusMessage" => "Sweeping memory..."}
129
+ "statusMessage" => "Sweeping memory..."},
130
+ {"type" => "command", "command" => context_cmd, "timeout" => 5,
131
+ "statusMessage" => "Reflecting on memory..."}
130
132
  ]
131
133
  }],
132
134
  "SessionEnd" => [{
@@ -15,6 +15,10 @@ module ClaudeMemory
15
15
  "distill-transcripts" => {
16
16
  file: "distill-transcripts.md",
17
17
  description: "Distill transcripts — extract facts/entities/decisions from undistilled content"
18
+ },
19
+ "reflect" => {
20
+ file: "reflect.md",
21
+ description: "Reflect on observations — consolidate the episodic log and promote corroborated observations to facts"
18
22
  }
19
23
  }.freeze
20
24