claude_memory 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/memory.sqlite3 +0 -0
  3. data/.claude/skills/dashboard/SKILL.md +42 -0
  4. data/.claude-plugin/marketplace.json +1 -1
  5. data/.claude-plugin/plugin.json +1 -1
  6. data/CHANGELOG.md +130 -0
  7. data/CLAUDE.md +30 -6
  8. data/README.md +66 -2
  9. data/db/migrations/015_add_activity_events.rb +26 -0
  10. data/db/migrations/016_add_moment_feedback.rb +22 -0
  11. data/db/migrations/017_add_last_recalled_at.rb +15 -0
  12. data/docs/1_0_punchlist.md +371 -0
  13. data/docs/EXAMPLES.md +41 -2
  14. data/docs/GETTING_STARTED.md +33 -4
  15. data/docs/architecture.md +22 -7
  16. data/docs/audit-queries.md +131 -0
  17. data/docs/dashboard.md +192 -0
  18. data/docs/improvements.md +650 -9
  19. data/docs/influence/cq.md +187 -0
  20. data/docs/plugin.md +13 -6
  21. data/docs/quality_review.md +524 -172
  22. data/docs/reflection_memory_as_accumulating_judgment.md +67 -0
  23. data/lib/claude_memory/activity_log.rb +86 -0
  24. data/lib/claude_memory/commands/census_command.rb +210 -0
  25. data/lib/claude_memory/commands/completion_command.rb +3 -0
  26. data/lib/claude_memory/commands/dashboard_command.rb +54 -0
  27. data/lib/claude_memory/commands/dedupe_conflicts_command.rb +55 -0
  28. data/lib/claude_memory/commands/digest_command.rb +273 -0
  29. data/lib/claude_memory/commands/hook_command.rb +61 -2
  30. data/lib/claude_memory/commands/initializers/hooks_configurator.rb +7 -4
  31. data/lib/claude_memory/commands/reclassify_references_command.rb +56 -0
  32. data/lib/claude_memory/commands/registry.rb +7 -1
  33. data/lib/claude_memory/commands/show_command.rb +90 -0
  34. data/lib/claude_memory/commands/skills/distill-transcripts.md +13 -1
  35. data/lib/claude_memory/commands/stats_command.rb +131 -2
  36. data/lib/claude_memory/commands/sweep_command.rb +2 -0
  37. data/lib/claude_memory/configuration.rb +16 -0
  38. data/lib/claude_memory/core/relative_time.rb +9 -0
  39. data/lib/claude_memory/dashboard/api.rb +610 -0
  40. data/lib/claude_memory/dashboard/conflicts.rb +279 -0
  41. data/lib/claude_memory/dashboard/efficacy.rb +127 -0
  42. data/lib/claude_memory/dashboard/fact_presenter.rb +109 -0
  43. data/lib/claude_memory/dashboard/health.rb +175 -0
  44. data/lib/claude_memory/dashboard/index.html +2707 -0
  45. data/lib/claude_memory/dashboard/knowledge.rb +136 -0
  46. data/lib/claude_memory/dashboard/moments.rb +244 -0
  47. data/lib/claude_memory/dashboard/reuse.rb +97 -0
  48. data/lib/claude_memory/dashboard/scoped_fact_resolver.rb +95 -0
  49. data/lib/claude_memory/dashboard/server.rb +211 -0
  50. data/lib/claude_memory/dashboard/timeline.rb +68 -0
  51. data/lib/claude_memory/dashboard/trust.rb +454 -0
  52. data/lib/claude_memory/distill/bare_conclusion_detector.rb +71 -0
  53. data/lib/claude_memory/distill/reference_material_detector.rb +78 -0
  54. data/lib/claude_memory/hook/auto_memory_mirror.rb +112 -0
  55. data/lib/claude_memory/hook/context_injector.rb +97 -3
  56. data/lib/claude_memory/hook/handler.rb +191 -3
  57. data/lib/claude_memory/mcp/handlers/management_handlers.rb +8 -0
  58. data/lib/claude_memory/mcp/query_guide.rb +11 -0
  59. data/lib/claude_memory/mcp/text_summary.rb +29 -0
  60. data/lib/claude_memory/mcp/tool_definitions.rb +13 -0
  61. data/lib/claude_memory/mcp/tools.rb +148 -0
  62. data/lib/claude_memory/publish.rb +13 -21
  63. data/lib/claude_memory/recall/stale_detector.rb +67 -0
  64. data/lib/claude_memory/resolve/predicate_policy.rb +2 -0
  65. data/lib/claude_memory/resolve/resolver.rb +41 -11
  66. data/lib/claude_memory/store/llm_cache.rb +68 -0
  67. data/lib/claude_memory/store/metrics_aggregator.rb +96 -0
  68. data/lib/claude_memory/store/schema_manager.rb +1 -1
  69. data/lib/claude_memory/store/sqlite_store.rb +47 -143
  70. data/lib/claude_memory/store/store_manager.rb +29 -0
  71. data/lib/claude_memory/sweep/maintenance.rb +216 -0
  72. data/lib/claude_memory/sweep/recall_timestamp_refresher.rb +83 -0
  73. data/lib/claude_memory/sweep/sweeper.rb +2 -0
  74. data/lib/claude_memory/templates/hooks.example.json +5 -0
  75. data/lib/claude_memory/version.rb +1 -1
  76. data/lib/claude_memory.rb +24 -0
  77. metadata +51 -1
@@ -0,0 +1,454 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Dashboard
5
+ # Sidebar data for the feed-first dashboard. Six surfaces, each
6
+ # answering a different "is memory helping/costing/clean?" question:
7
+ #
8
+ # 1. Moments this week + week-over-week delta — the headline value
9
+ # number. A moment is any meaningful activity event (recall hit,
10
+ # extraction, context injection, conflict detected). Ingest-only
11
+ # events don't count because they're not directly user-visible value.
12
+ #
13
+ # 2. "What memory knows about you" — up to 5 global facts rendered as
14
+ # plain English. The trust panel's most compelling surface: users
15
+ # can sanity-check what's being injected into their sessions.
16
+ #
17
+ # 3. Needs review — open conflicts plus stale facts (active but never
18
+ # recalled in the last N days) plus empty recalls (queries that
19
+ # returned nothing). A single actionable count; the feed surfaces
20
+ # the individual items.
21
+ #
22
+ # 4. Utilization (30d) — of facts extracted in the last 30 days, how
23
+ # many has Claude actually surfaced via recall or context injection.
24
+ # Low ratios are a signal too: memory accumulating knowledge that
25
+ # Claude isn't reaching for.
26
+ #
27
+ # 5. Token budget (30d, 0.11.0+) — p50/p95/avg `context_tokens`
28
+ # injected per SessionStart. Answers "what does memory cost per
29
+ # session?" via numbers a skeptical user can read.
30
+ #
31
+ # 6. Quality score (live + historical, 0.11.0+) — hallucination-rate
32
+ # proxy: 100 - (suspect_pct + bare_pct), clamped 0..100. Live is
33
+ # over the last UTILIZATION_DAYS; historical mirrors the same
34
+ # calculation across all active facts as a supplementary baseline.
35
+ # See `quality_review.md` 2026-04-30 note for why the split exists.
36
+ class Trust
37
+ WEEK_SECONDS = 7 * 86_400
38
+ UTILIZATION_DAYS = 30
39
+ VALUE_EVENT_TYPES = %w[hook_context recall store_extraction].freeze
40
+
41
+ def initialize(manager)
42
+ @manager = manager
43
+ end
44
+
45
+ def snapshot
46
+ {
47
+ weekly_moments: weekly_moments,
48
+ fingerprint: fingerprint,
49
+ needs_review: needs_review,
50
+ utilization: utilization,
51
+ feedback: feedback_summary,
52
+ token_budget: token_budget,
53
+ quality_score: quality_score
54
+ }
55
+ end
56
+
57
+ # The trust panel's hallucination-rate proxy. Counts two pollution
58
+ # signals:
59
+ #
60
+ # - suspect: facts that ReferenceMaterialDetector retagged from
61
+ # `convention` to `reference` predicate (descriptions of external
62
+ # projects mislabeled as user conventions).
63
+ # - bare_conclusion: `decision` / `convention` facts whose object
64
+ # skipped the prompt-mandated reason clause and so are dead
65
+ # weight once the originating context is gone.
66
+ #
67
+ # Reports two windows so users can distinguish historical noise from
68
+ # live extraction quality (per `quality_review.md` 2026-04-30
69
+ # investigation): the headline `score` is computed over facts
70
+ # created within the last UTILIZATION_DAYS — that's the actionable
71
+ # signal. The `historical` block reports the same counts over all
72
+ # active facts so legacy data is visible without dominating.
73
+ #
74
+ # Score = 100 - (suspect_pct + bare_pct), clamped 0..100. Lower is
75
+ # worse. Returns 100 (perfect) when there are no facts in the
76
+ # window so a quiet week isn't penalized.
77
+ def quality_score
78
+ cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
79
+ live = compute_quality(cutoff: cutoff)
80
+ historical = compute_quality(cutoff: nil)
81
+
82
+ live.merge(
83
+ window_days: UTILIZATION_DAYS,
84
+ historical: historical
85
+ )
86
+ rescue Sequel::DatabaseError => e
87
+ ClaudeMemory.logger.debug("Trust#quality_score failed: #{e.message}")
88
+ quality_score_zero
89
+ end
90
+ public :quality_score
91
+
92
+ def quality_score_zero
93
+ {
94
+ total_active: 0,
95
+ suspect_count: 0,
96
+ bare_conclusion_count: 0,
97
+ suspect_pct: 0.0,
98
+ bare_pct: 0.0,
99
+ score: 100,
100
+ window_days: UTILIZATION_DAYS,
101
+ historical: {
102
+ total_active: 0,
103
+ suspect_count: 0,
104
+ bare_conclusion_count: 0,
105
+ suspect_pct: 0.0,
106
+ bare_pct: 0.0,
107
+ score: 100
108
+ }
109
+ }
110
+ end
111
+
112
+ def compute_quality(cutoff:)
113
+ breakdown = aggregate_quality_counts(cutoff: cutoff)
114
+ total = breakdown[:total_active]
115
+
116
+ return zero_breakdown if total.zero?
117
+
118
+ suspect_pct = (breakdown[:suspect_count] * 100.0 / total).round(1)
119
+ bare_pct = (breakdown[:bare_conclusion_count] * 100.0 / total).round(1)
120
+ score = (100 - (suspect_pct + bare_pct)).clamp(0, 100).round
121
+
122
+ breakdown.merge(
123
+ suspect_pct: suspect_pct,
124
+ bare_pct: bare_pct,
125
+ score: score
126
+ )
127
+ end
128
+
129
+ def zero_breakdown
130
+ {total_active: 0, suspect_count: 0, bare_conclusion_count: 0,
131
+ suspect_pct: 0.0, bare_pct: 0.0, score: 100}
132
+ end
133
+
134
+ def aggregate_quality_counts(cutoff: nil)
135
+ detector = Distill::BareConclusionDetector.new
136
+ suspect = 0
137
+ bare = 0
138
+ total = 0
139
+
140
+ %w[project global].each do |scope|
141
+ store = @manager.store_if_exists(scope)
142
+ next unless store
143
+ dataset = store.facts.where(status: "active")
144
+ dataset = dataset.where { created_at >= cutoff } if cutoff
145
+ total += dataset.count
146
+ suspect += dataset.where(predicate: "reference").count
147
+ dataset.where(predicate: %w[decision convention])
148
+ .select(:predicate, :object_literal)
149
+ .all
150
+ .each { |row| bare += 1 if detector.bare_conclusion?(row) }
151
+ end
152
+
153
+ {total_active: total, suspect_count: suspect, bare_conclusion_count: bare}
154
+ end
155
+
156
+ # What does memory cost? Aggregates `context_tokens` from successful
157
+ # `hook_context` activity events over the last UTILIZATION_DAYS so a
158
+ # skeptical user can see the per-session token cost in p50/p95.
159
+ #
160
+ # Shape: {p50:, p95:, avg:, sample_size:, window_days:}
161
+ # All ints. Returns zeros when there are no events in the window.
162
+ def token_budget
163
+ store = @manager.default_store(prefer: :project)
164
+ return token_budget_zero unless store
165
+
166
+ cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
167
+ rows = store.activity_events
168
+ .where(event_type: "hook_context", status: "success")
169
+ .where { occurred_at >= cutoff }
170
+ .select(:detail_json)
171
+ .all
172
+
173
+ tokens = rows.filter_map do |row|
174
+ details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
175
+ value = details["context_tokens"]
176
+ value if value.is_a?(Integer) && value > 0
177
+ end
178
+
179
+ return token_budget_zero if tokens.empty?
180
+
181
+ sorted = tokens.sort
182
+ {
183
+ p50: percentile(sorted, 0.50),
184
+ p95: percentile(sorted, 0.95),
185
+ avg: (sorted.sum.to_f / sorted.size).round,
186
+ sample_size: sorted.size,
187
+ window_days: UTILIZATION_DAYS
188
+ }
189
+ rescue Sequel::DatabaseError, JSON::ParserError => e
190
+ ClaudeMemory.logger.debug("Trust#token_budget failed: #{e.message}")
191
+ token_budget_zero
192
+ end
193
+ public :token_budget
194
+
195
+ def token_budget_zero
196
+ {p50: 0, p95: 0, avg: 0, sample_size: 0, window_days: UTILIZATION_DAYS}
197
+ end
198
+
199
+ def percentile(sorted, pct)
200
+ return 0 if sorted.empty?
201
+ idx = (sorted.size * pct).ceil - 1
202
+ idx = 0 if idx < 0
203
+ idx = sorted.size - 1 if idx >= sorted.size
204
+ sorted[idx]
205
+ end
206
+
207
+ private
208
+
209
+ def weekly_moments
210
+ store = @manager.default_store(prefer: :project)
211
+ return {this_week: 0, last_week: 0, delta: 0, by_kind: {}} unless store
212
+
213
+ now = Time.now.utc
214
+ this_week_since = (now - WEEK_SECONDS).iso8601
215
+ last_week_since = (now - 2 * WEEK_SECONDS).iso8601
216
+
217
+ this_rows = valuable_events(store, this_week_since)
218
+ last_rows = valuable_events(store, last_week_since, before: this_week_since)
219
+
220
+ by_kind = this_rows.group_by { |r| r[:event_type] }.transform_values(&:size)
221
+
222
+ {
223
+ this_week: this_rows.size,
224
+ last_week: last_rows.size,
225
+ delta: this_rows.size - last_rows.size,
226
+ by_kind: by_kind
227
+ }
228
+ rescue Sequel::DatabaseError => e
229
+ ClaudeMemory.logger.debug("Trust#weekly_moments failed: #{e.message}")
230
+ {this_week: 0, last_week: 0, delta: 0, by_kind: {}}
231
+ end
232
+
233
+ def valuable_events(store, since, before: nil)
234
+ dataset = store.activity_events
235
+ .where(event_type: VALUE_EVENT_TYPES)
236
+ .where(status: "success")
237
+ .where { occurred_at >= since }
238
+ dataset = dataset.where { occurred_at < before } if before
239
+ dataset.all
240
+ end
241
+
242
+ # Up to 5 global facts rendered as plain-English sentences so a skeptical
243
+ # user can verify at-a-glance what's being injected into their Claude
244
+ # sessions. Prefers high-signal predicates (convention, decision,
245
+ # uses_framework, uses_database) and falls back to most-recent active.
246
+ def fingerprint
247
+ store = @manager.store_if_exists("global")
248
+ return [] unless store
249
+
250
+ preferred_predicates = %w[convention decision uses_framework uses_database uses_language]
251
+ rows = store.facts
252
+ .where(status: "active", scope: "global")
253
+ .where(predicate: preferred_predicates)
254
+ .order(Sequel.desc(:confidence), Sequel.desc(:created_at))
255
+ .limit(5)
256
+ .all
257
+
258
+ if rows.size < 5
259
+ extra = store.facts
260
+ .where(status: "active", scope: "global")
261
+ .exclude(id: rows.map { |r| r[:id] })
262
+ .order(Sequel.desc(:created_at))
263
+ .limit(5 - rows.size)
264
+ .all
265
+ rows += extra
266
+ end
267
+
268
+ presenter = FactPresenter.new(store)
269
+ presenter.list_summary(rows).map { |f| render_sentence(f) }
270
+ rescue Sequel::DatabaseError => e
271
+ ClaudeMemory.logger.debug("Trust#fingerprint failed: #{e.message}")
272
+ []
273
+ end
274
+
275
+ def render_sentence(fact)
276
+ predicate = fact[:predicate]
277
+ object = fact[:object]
278
+ subject = fact[:subject]
279
+
280
+ sentence = case predicate
281
+ when "convention"
282
+ object
283
+ when "decision"
284
+ object
285
+ when "uses_framework", "uses_language"
286
+ "Uses #{object}"
287
+ when "uses_database"
288
+ "Uses #{object} for storage"
289
+ when "deployment_platform"
290
+ "Deploys to #{object}"
291
+ when "auth_method"
292
+ "Auth via #{object}"
293
+ else
294
+ "#{subject} #{predicate.tr("_", " ")} #{object}"
295
+ end
296
+
297
+ {
298
+ id: fact[:id],
299
+ docid: fact[:docid],
300
+ sentence: sentence.to_s.strip,
301
+ predicate: predicate,
302
+ confidence: fact[:confidence]
303
+ }
304
+ end
305
+
306
+ def needs_review
307
+ {
308
+ open_conflicts: count_open_conflicts,
309
+ stale_facts: count_stale_facts,
310
+ empty_recalls: count_empty_recalls
311
+ }
312
+ end
313
+
314
+ def count_open_conflicts
315
+ Conflicts.new(@manager).distinct_open_counts
316
+ rescue Sequel::DatabaseError
317
+ {project: 0, global: 0, total: 0}
318
+ end
319
+
320
+ # User-supplied thumbs on feed moments. The ratio answers "when Claude
321
+ # surfaces something from memory, is the user signaling it was helpful?"
322
+ # Only moments recorded in the last UTILIZATION_DAYS count toward the
323
+ # ratio so old clicks don't distort an active week's signal.
324
+ #
325
+ # Shape: {up: Int, down: Int, net: Int, ratio_pct: Int, window_days: Int}
326
+ # ratio_pct = up / (up + down) × 100, or nil when there's no feedback.
327
+ def feedback_summary
328
+ store = @manager.default_store(prefer: :project)
329
+ return feedback_zero unless store
330
+
331
+ cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
332
+ rows = store.moment_feedback.where { recorded_at >= cutoff }.all
333
+ up = rows.count { |r| r[:verdict] == "up" }
334
+ down = rows.count { |r| r[:verdict] == "down" }
335
+ total = up + down
336
+ ratio_pct = total.zero? ? nil : ((up.to_f / total) * 100).round
337
+
338
+ {up: up, down: down, net: up - down, ratio_pct: ratio_pct, window_days: UTILIZATION_DAYS}
339
+ rescue Sequel::DatabaseError
340
+ feedback_zero
341
+ end
342
+
343
+ def feedback_zero
344
+ {up: 0, down: 0, net: 0, ratio_pct: nil, window_days: UTILIZATION_DAYS}
345
+ end
346
+
347
+ # "Stale" = active facts whose last_recalled_at is older than the
348
+ # configured threshold (or never set, with a grace window so freshly
349
+ # extracted facts don't show up as stale on day one).
350
+ #
351
+ # Backed by Recall::StaleDetector, which reads the column populated by
352
+ # Sweep::RecallTimestampRefresher. Replaces the older "active facts
353
+ # minus seen-in-recalls" approximation, which couldn't distinguish a
354
+ # never-touched 6-month-old fact from a freshly stored one.
355
+ def count_stale_facts
356
+ threshold = Configuration.new.stale_days
357
+ Recall::StaleDetector.stale_count(@manager, threshold_days: threshold)
358
+ rescue Sequel::DatabaseError, JSON::ParserError => e
359
+ ClaudeMemory.logger.debug("Trust#count_stale_facts failed: #{e.message}")
360
+ 0
361
+ end
362
+
363
+ # The ROI signal: of the facts Claude has extracted into memory over the
364
+ # last UTILIZATION_DAYS, how many has Claude actually *used* (appeared
365
+ # in any recall or context injection's top_fact_ids)? Low ratios are
366
+ # themselves a signal — it means memory is accumulating knowledge but
367
+ # Claude isn't reaching for it. Anomalies worth surfacing honestly.
368
+ #
369
+ # Shape: {extracted: Int, used: Int, ratio_pct: Int, window_days: Int}
370
+ # Both counts are scope-union (project + global) so the headline number
371
+ # reflects everything memory did, not just one store.
372
+ def utilization
373
+ cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
374
+ extracted_pairs = extracted_fact_pairs(cutoff)
375
+ used_pairs = used_fact_pairs(cutoff)
376
+
377
+ extracted = extracted_pairs.size
378
+ # "Used" counted against the extracted set — a fact used but not
379
+ # extracted in this window (taught earlier, used now) is still
380
+ # re-use worth recognizing; count it too.
381
+ used_from_extracted = (used_pairs & extracted_pairs).size
382
+ used_total = used_pairs.size
383
+
384
+ ratio_pct = extracted.zero? ? 0 : ((used_from_extracted.to_f / extracted) * 100).round
385
+
386
+ {
387
+ extracted: extracted,
388
+ used: used_total,
389
+ used_from_extracted: used_from_extracted,
390
+ ratio_pct: ratio_pct,
391
+ window_days: UTILIZATION_DAYS
392
+ }
393
+ rescue Sequel::DatabaseError, JSON::ParserError => e
394
+ ClaudeMemory.logger.debug("Trust#utilization failed: #{e.message}")
395
+ {extracted: 0, used: 0, used_from_extracted: 0, ratio_pct: 0, window_days: UTILIZATION_DAYS}
396
+ end
397
+ public :utilization
398
+
399
+ # Facts that were extracted (distilled + stored) within the window.
400
+ # Returns (scope, id) pairs across both stores.
401
+ def extracted_fact_pairs(cutoff)
402
+ pairs = Set.new
403
+ %w[project global].each do |scope|
404
+ store = @manager.store_if_exists(scope)
405
+ next unless store
406
+ store.facts
407
+ .where(status: "active")
408
+ .where { created_at >= cutoff }
409
+ .select(:id)
410
+ .all
411
+ .each { |r| pairs << [scope, r[:id]] }
412
+ end
413
+ pairs
414
+ end
415
+
416
+ # Facts that appeared as top_fact_ids in any recall or context injection
417
+ # within the window. Returns (scope, id) pairs.
418
+ def used_fact_pairs(cutoff)
419
+ store = @manager.default_store(prefer: :project)
420
+ return Set.new unless store
421
+ pairs = Set.new
422
+ store.activity_events
423
+ .where(event_type: %w[recall hook_context], status: "success")
424
+ .where { occurred_at >= cutoff }
425
+ .select(:detail_json)
426
+ .all
427
+ .each do |row|
428
+ details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
429
+ scoped = ScopedFactResolver.scoped_ids_from_details(details)
430
+ ScopedFactResolver.flat_pairs(scoped).each { |pair| pairs << pair }
431
+ end
432
+ pairs
433
+ end
434
+
435
+ def count_empty_recalls
436
+ store = @manager.default_store(prefer: :project)
437
+ return 0 unless store
438
+
439
+ cutoff = (Time.now.utc - WEEK_SECONDS).iso8601
440
+ store.activity_events
441
+ .where(event_type: "recall")
442
+ .where(status: "success")
443
+ .where { occurred_at >= cutoff }
444
+ .all
445
+ .count do |row|
446
+ details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
447
+ (details["result_count"] || 0).zero?
448
+ end
449
+ rescue Sequel::DatabaseError, JSON::ParserError
450
+ 0
451
+ end
452
+ end
453
+ end
454
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Distill
5
+ # Catches facts that survived distillation without a reason clause.
6
+ # The SessionStart distillation prompt explicitly requires `decision`
7
+ # and `convention` facts to embed the reason ("— because …", "so that
8
+ # …", "to avoid …", "caused by …", "breaks when …"); facts that ship
9
+ # without one are dead weight once they go stale because nobody can
10
+ # recover the original justification by re-reading the row.
11
+ #
12
+ # This detector is the production-side mirror of that prompt
13
+ # constraint. It exists so the dashboard can quantify how many facts
14
+ # are slipping through the prompt's reason-clause requirement —
15
+ # higher bare-conclusion ratio means the LLM is producing low-quality
16
+ # extractions, which is a hallucination-rate proxy worth surfacing.
17
+ #
18
+ # Pure function, no side effects, safe to call in tight loops.
19
+ class BareConclusionDetector
20
+ # Predicates the prompt requires reasons for. Other predicates
21
+ # (uses_framework, uses_database, etc.) carry their meaning in the
22
+ # subject-predicate-object shape itself, so a bare object is fine.
23
+ GUARDED_PREDICATES = %w[decision convention].freeze
24
+
25
+ # Reason-clause signals lifted from the distill-transcripts skill
26
+ # prompt plus a small set of common natural-language variants. The
27
+ # match is case-insensitive and substring-anchored — any one signal
28
+ # qualifies the fact as "explained" even without an em dash.
29
+ REASON_PATTERNS = [
30
+ /\bbecause\b/i,
31
+ /\bso\s+that\b/i,
32
+ /\bso\s+the\b/i,
33
+ /\bso\s+we\b/i,
34
+ /\bin\s+order\s+to\b/i,
35
+ /\bto\s+avoid\b/i,
36
+ /\bto\s+prevent\b/i,
37
+ /\bto\s+ensure\b/i,
38
+ /\bto\s+support\b/i,
39
+ /\bto\s+allow\b/i,
40
+ /\bto\s+enable\b/i,
41
+ /\bto\s+make\b/i,
42
+ /\bto\s+fix\b/i,
43
+ /\bto\s+handle\b/i,
44
+ /\bcaused\s+by\b/i,
45
+ /\bbreaks\s+when\b/i,
46
+ /\bdue\s+to\b/i,
47
+ /\botherwise\b/i,
48
+ /\bwithout\s+(?:which|this|it)\b/i
49
+ ].freeze
50
+
51
+ # Returns true when the fact has a guarded predicate AND its object
52
+ # text shows no reason-clause signal. Returns false for any fact
53
+ # outside the guarded predicates so the metric isn't polluted by
54
+ # legitimately-bare facts (uses_database "sqlite" doesn't need a
55
+ # rationale embedded in its object).
56
+ #
57
+ # @param fact [Hash] with :predicate and :object_literal keys (or
58
+ # :predicate / :object — accepts both shapes used in the codebase)
59
+ # @return [Boolean]
60
+ def bare_conclusion?(fact)
61
+ predicate = fact[:predicate].to_s
62
+ return false unless GUARDED_PREDICATES.include?(predicate)
63
+
64
+ object = (fact[:object_literal] || fact[:object]).to_s
65
+ return false if object.empty?
66
+
67
+ REASON_PATTERNS.none? { |re| object.match?(re) }
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClaudeMemory
4
+ module Distill
5
+ # Guards against the LLM distiller mislabeling reference material as
6
+ # `convention`. Audited in production data on 2026-04-24: project facts
7
+ # labeled `predicate=convention` with objects like "Cloud-backed Claude
8
+ # Code plugin (~1,195 LOC JavaScript) using Supermemory API…" and
9
+ # "Claude Code plugin with marketplace.json, 5,700+ stars, by Tobi Lütke."
10
+ # These are descriptions of external projects, not conventions the user
11
+ # applies. Leaving them under `convention` pollutes the Knowledge-base
12
+ # sidebar and the `memory.conventions` MCP tool.
13
+ #
14
+ # Heuristic: only conventions are re-examined (decisions and architecture
15
+ # notes about external projects are legitimately those predicates). A
16
+ # convention is retagged to `reference` when its object text matches any
17
+ # of the descriptive patterns below. Kept deliberately conservative —
18
+ # false-positive retagging is worse than occasionally missing a case, so
19
+ # the patterns target telltale numeric/attribution phrases that rarely
20
+ # appear in real conventions.
21
+ class ReferenceMaterialDetector
22
+ # Strong signals — any one of these on its own justifies reclassification.
23
+ # Kept tight to avoid false positives on real conventions that happen
24
+ # to quote external project names.
25
+ STRONG_PATTERNS = [
26
+ # Line-of-code counts: "~1,195 LOC", "1200 lines of code"
27
+ /~?\d+[,.]?\d*\s*(?:LOC|lines of code)/i,
28
+ # Star counts: "5,700+ stars", "3.2k stars"
29
+ /\d[\d,.]*\+?\s*(?:k\s+)?stars?\b/i,
30
+ # "X is a (plugin|library|tool|gem|service|framework|extension) …"
31
+ /\b(?:is\s+an?|are)\s+(?:cloud-backed\s+)?(?:plugin|library|tool|gem|service|framework|extension|cli|mcp\s+server)\b/i,
32
+ # Leading descriptor: "Plugin that…", "Library for…"
33
+ /\A(?:cloud-backed\s+)?(?:plugin|library|tool|gem|service|framework|extension|cli|mcp\s+server)(?:\s+(?:with|using|for|that))/i
34
+ ].freeze
35
+
36
+ # Weak signals — only fire in combination with a strong signal.
37
+ # Author attribution ("by Jane Doe") was originally a standalone
38
+ # trigger, but production text like "MCP launched by Claude Code run
39
+ # from PATH" contains the same surface pattern inside a legitimate
40
+ # convention. Requiring a co-occurring strong signal keeps the guard
41
+ # conservative.
42
+ WEAK_PATTERNS = [
43
+ /\bby\s+[[:upper:]][[:alpha:]'-]+\s+[[:upper:]][[:alpha:]'-]+/
44
+ ].freeze
45
+
46
+ # Predicates we inspect. Decisions stay decisions even when they cite
47
+ # external projects ("From QMD restudy: adopt X"); the guard targets
48
+ # only `convention`, where misclassification is most common.
49
+ GUARDED_PREDICATES = %w[convention].freeze
50
+
51
+ def reclassify(extraction)
52
+ return extraction if extraction.facts.nil? || extraction.facts.empty?
53
+
54
+ new_facts = extraction.facts.map do |fact|
55
+ if reference_material?(fact)
56
+ fact.merge(predicate: "reference")
57
+ else
58
+ fact
59
+ end
60
+ end
61
+
62
+ Distill::Extraction.new(
63
+ entities: extraction.entities,
64
+ facts: new_facts,
65
+ decisions: extraction.decisions,
66
+ signals: extraction.signals
67
+ )
68
+ end
69
+
70
+ def reference_material?(fact)
71
+ return false unless GUARDED_PREDICATES.include?(fact[:predicate].to_s)
72
+ object = fact[:object].to_s
73
+ return false if object.empty?
74
+ STRONG_PATTERNS.any? { |re| object.match?(re) }
75
+ end
76
+ end
77
+ end
78
+ end