RubyGems - claude_memory - Versions diffs - 0.9.1 → 0.11.0 - Mend

claude_memory 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

checksums.yaml +4 -4
data/.claude/memory.sqlite3 +0 -0
data/.claude/skills/dashboard/SKILL.md +42 -0
data/.claude-plugin/marketplace.json +1 -1
data/.claude-plugin/plugin.json +1 -1
data/CHANGELOG.md +130 -0
data/CLAUDE.md +30 -6
data/README.md +66 -2
data/db/migrations/015_add_activity_events.rb +26 -0
data/db/migrations/016_add_moment_feedback.rb +22 -0
data/db/migrations/017_add_last_recalled_at.rb +15 -0
data/docs/1_0_punchlist.md +371 -0
data/docs/EXAMPLES.md +41 -2
data/docs/GETTING_STARTED.md +33 -4
data/docs/architecture.md +22 -7
data/docs/audit-queries.md +131 -0
data/docs/dashboard.md +192 -0
data/docs/improvements.md +650 -9
data/docs/influence/cq.md +187 -0
data/docs/plugin.md +13 -6
data/docs/quality_review.md +524 -172
data/docs/reflection_memory_as_accumulating_judgment.md +67 -0
data/lib/claude_memory/activity_log.rb +86 -0
data/lib/claude_memory/commands/census_command.rb +210 -0
data/lib/claude_memory/commands/completion_command.rb +3 -0
data/lib/claude_memory/commands/dashboard_command.rb +54 -0
data/lib/claude_memory/commands/dedupe_conflicts_command.rb +55 -0
data/lib/claude_memory/commands/digest_command.rb +273 -0
data/lib/claude_memory/commands/hook_command.rb +61 -2
data/lib/claude_memory/commands/initializers/hooks_configurator.rb +7 -4
data/lib/claude_memory/commands/reclassify_references_command.rb +56 -0
data/lib/claude_memory/commands/registry.rb +7 -1
data/lib/claude_memory/commands/show_command.rb +90 -0
data/lib/claude_memory/commands/skills/distill-transcripts.md +13 -1
data/lib/claude_memory/commands/stats_command.rb +131 -2
data/lib/claude_memory/commands/sweep_command.rb +2 -0
data/lib/claude_memory/configuration.rb +16 -0
data/lib/claude_memory/core/relative_time.rb +9 -0
data/lib/claude_memory/dashboard/api.rb +610 -0
data/lib/claude_memory/dashboard/conflicts.rb +279 -0
data/lib/claude_memory/dashboard/efficacy.rb +127 -0
data/lib/claude_memory/dashboard/fact_presenter.rb +109 -0
data/lib/claude_memory/dashboard/health.rb +175 -0
data/lib/claude_memory/dashboard/index.html +2707 -0
data/lib/claude_memory/dashboard/knowledge.rb +136 -0
data/lib/claude_memory/dashboard/moments.rb +244 -0
data/lib/claude_memory/dashboard/reuse.rb +97 -0
data/lib/claude_memory/dashboard/scoped_fact_resolver.rb +95 -0
data/lib/claude_memory/dashboard/server.rb +211 -0
data/lib/claude_memory/dashboard/timeline.rb +68 -0
data/lib/claude_memory/dashboard/trust.rb +454 -0
data/lib/claude_memory/distill/bare_conclusion_detector.rb +71 -0
data/lib/claude_memory/distill/reference_material_detector.rb +78 -0
data/lib/claude_memory/hook/auto_memory_mirror.rb +112 -0
data/lib/claude_memory/hook/context_injector.rb +97 -3
data/lib/claude_memory/hook/handler.rb +191 -3
data/lib/claude_memory/mcp/handlers/management_handlers.rb +8 -0
data/lib/claude_memory/mcp/query_guide.rb +11 -0
data/lib/claude_memory/mcp/text_summary.rb +29 -0
data/lib/claude_memory/mcp/tool_definitions.rb +13 -0
data/lib/claude_memory/mcp/tools.rb +148 -0
data/lib/claude_memory/publish.rb +13 -21
data/lib/claude_memory/recall/stale_detector.rb +67 -0
data/lib/claude_memory/resolve/predicate_policy.rb +2 -0
data/lib/claude_memory/resolve/resolver.rb +41 -11
data/lib/claude_memory/store/llm_cache.rb +68 -0
data/lib/claude_memory/store/metrics_aggregator.rb +96 -0
data/lib/claude_memory/store/schema_manager.rb +1 -1
data/lib/claude_memory/store/sqlite_store.rb +47 -143
data/lib/claude_memory/store/store_manager.rb +29 -0
data/lib/claude_memory/sweep/maintenance.rb +216 -0
data/lib/claude_memory/sweep/recall_timestamp_refresher.rb +83 -0
data/lib/claude_memory/sweep/sweeper.rb +2 -0
data/lib/claude_memory/templates/hooks.example.json +5 -0
data/lib/claude_memory/version.rb +1 -1
data/lib/claude_memory.rb +24 -0
metadata +51 -1

data/lib/claude_memory/dashboard/trust.rb ADDED Viewed

@@ -0,0 +1,454 @@
+# frozen_string_literal: true
+module ClaudeMemory
+  module Dashboard
+    # Sidebar data for the feed-first dashboard. Six surfaces, each
+    # answering a different "is memory helping/costing/clean?" question:
+    #
+    # 1. Moments this week + week-over-week delta — the headline value
+    #    number. A moment is any meaningful activity event (recall hit,
+    #    extraction, context injection, conflict detected). Ingest-only
+    #    events don't count because they're not directly user-visible value.
+    #
+    # 2. "What memory knows about you" — up to 5 global facts rendered as
+    #    plain English. The trust panel's most compelling surface: users
+    #    can sanity-check what's being injected into their sessions.
+    #
+    # 3. Needs review — open conflicts plus stale facts (active but never
+    #    recalled in the last N days) plus empty recalls (queries that
+    #    returned nothing). A single actionable count; the feed surfaces
+    #    the individual items.
+    #
+    # 4. Utilization (30d) — of facts extracted in the last 30 days, how
+    #    many has Claude actually surfaced via recall or context injection.
+    #    Low ratios are a signal too: memory accumulating knowledge that
+    #    Claude isn't reaching for.
+    #
+    # 5. Token budget (30d, 0.11.0+) — p50/p95/avg `context_tokens`
+    #    injected per SessionStart. Answers "what does memory cost per
+    #    session?" via numbers a skeptical user can read.
+    #
+    # 6. Quality score (live + historical, 0.11.0+) — hallucination-rate
+    #    proxy: 100 - (suspect_pct + bare_pct), clamped 0..100. Live is
+    #    over the last UTILIZATION_DAYS; historical mirrors the same
+    #    calculation across all active facts as a supplementary baseline.
+    #    See `quality_review.md` 2026-04-30 note for why the split exists.
+    class Trust
+      WEEK_SECONDS = 7 * 86_400
+      UTILIZATION_DAYS = 30
+      VALUE_EVENT_TYPES = %w[hook_context recall store_extraction].freeze
+      def initialize(manager)
+        @manager = manager
+      end
+      def snapshot
+        {
+          weekly_moments: weekly_moments,
+          fingerprint: fingerprint,
+          needs_review: needs_review,
+          utilization: utilization,
+          feedback: feedback_summary,
+          token_budget: token_budget,
+          quality_score: quality_score
+        }
+      end
+      # The trust panel's hallucination-rate proxy. Counts two pollution
+      # signals:
+      #
+      #   - suspect: facts that ReferenceMaterialDetector retagged from
+      #     `convention` to `reference` predicate (descriptions of external
+      #     projects mislabeled as user conventions).
+      #   - bare_conclusion: `decision` / `convention` facts whose object
+      #     skipped the prompt-mandated reason clause and so are dead
+      #     weight once the originating context is gone.
+      #
+      # Reports two windows so users can distinguish historical noise from
+      # live extraction quality (per `quality_review.md` 2026-04-30
+      # investigation): the headline `score` is computed over facts
+      # created within the last UTILIZATION_DAYS — that's the actionable
+      # signal. The `historical` block reports the same counts over all
+      # active facts so legacy data is visible without dominating.
+      #
+      # Score = 100 - (suspect_pct + bare_pct), clamped 0..100. Lower is
+      # worse. Returns 100 (perfect) when there are no facts in the
+      # window so a quiet week isn't penalized.
+      def quality_score
+        cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
+        live = compute_quality(cutoff: cutoff)
+        historical = compute_quality(cutoff: nil)
+        live.merge(
+          window_days: UTILIZATION_DAYS,
+          historical: historical
+        )
+      rescue Sequel::DatabaseError => e
+        ClaudeMemory.logger.debug("Trust#quality_score failed: #{e.message}")
+        quality_score_zero
+      end
+      public :quality_score
+      def quality_score_zero
+        {
+          total_active: 0,
+          suspect_count: 0,
+          bare_conclusion_count: 0,
+          suspect_pct: 0.0,
+          bare_pct: 0.0,
+          score: 100,
+          window_days: UTILIZATION_DAYS,
+          historical: {
+            total_active: 0,
+            suspect_count: 0,
+            bare_conclusion_count: 0,
+            suspect_pct: 0.0,
+            bare_pct: 0.0,
+            score: 100
+          }
+        }
+      end
+      def compute_quality(cutoff:)
+        breakdown = aggregate_quality_counts(cutoff: cutoff)
+        total = breakdown[:total_active]
+        return zero_breakdown if total.zero?
+        suspect_pct = (breakdown[:suspect_count] * 100.0 / total).round(1)
+        bare_pct = (breakdown[:bare_conclusion_count] * 100.0 / total).round(1)
+        score = (100 - (suspect_pct + bare_pct)).clamp(0, 100).round
+        breakdown.merge(
+          suspect_pct: suspect_pct,
+          bare_pct: bare_pct,
+          score: score
+        )
+      end
+      def zero_breakdown
+        {total_active: 0, suspect_count: 0, bare_conclusion_count: 0,
+         suspect_pct: 0.0, bare_pct: 0.0, score: 100}
+      end
+      def aggregate_quality_counts(cutoff: nil)
+        detector = Distill::BareConclusionDetector.new
+        suspect = 0
+        bare = 0
+        total = 0
+        %w[project global].each do |scope|
+          store = @manager.store_if_exists(scope)
+          next unless store
+          dataset = store.facts.where(status: "active")
+          dataset = dataset.where { created_at >= cutoff } if cutoff
+          total += dataset.count
+          suspect += dataset.where(predicate: "reference").count
+          dataset.where(predicate: %w[decision convention])
+            .select(:predicate, :object_literal)
+            .all
+            .each { |row| bare += 1 if detector.bare_conclusion?(row) }
+        end
+        {total_active: total, suspect_count: suspect, bare_conclusion_count: bare}
+      end
+      # What does memory cost? Aggregates `context_tokens` from successful
+      # `hook_context` activity events over the last UTILIZATION_DAYS so a
+      # skeptical user can see the per-session token cost in p50/p95.
+      #
+      # Shape: {p50:, p95:, avg:, sample_size:, window_days:}
+      # All ints. Returns zeros when there are no events in the window.
+      def token_budget
+        store = @manager.default_store(prefer: :project)
+        return token_budget_zero unless store
+        cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
+        rows = store.activity_events
+          .where(event_type: "hook_context", status: "success")
+          .where { occurred_at >= cutoff }
+          .select(:detail_json)
+          .all
+        tokens = rows.filter_map do |row|
+          details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
+          value = details["context_tokens"]
+          value if value.is_a?(Integer) && value > 0
+        end
+        return token_budget_zero if tokens.empty?
+        sorted = tokens.sort
+        {
+          p50: percentile(sorted, 0.50),
+          p95: percentile(sorted, 0.95),
+          avg: (sorted.sum.to_f / sorted.size).round,
+          sample_size: sorted.size,
+          window_days: UTILIZATION_DAYS
+        }
+      rescue Sequel::DatabaseError, JSON::ParserError => e
+        ClaudeMemory.logger.debug("Trust#token_budget failed: #{e.message}")
+        token_budget_zero
+      end
+      public :token_budget
+      def token_budget_zero
+        {p50: 0, p95: 0, avg: 0, sample_size: 0, window_days: UTILIZATION_DAYS}
+      end
+      def percentile(sorted, pct)
+        return 0 if sorted.empty?
+        idx = (sorted.size * pct).ceil - 1
+        idx = 0 if idx < 0
+        idx = sorted.size - 1 if idx >= sorted.size
+        sorted[idx]
+      end
+      private
+      def weekly_moments
+        store = @manager.default_store(prefer: :project)
+        return {this_week: 0, last_week: 0, delta: 0, by_kind: {}} unless store
+        now = Time.now.utc
+        this_week_since = (now - WEEK_SECONDS).iso8601
+        last_week_since = (now - 2 * WEEK_SECONDS).iso8601
+        this_rows = valuable_events(store, this_week_since)
+        last_rows = valuable_events(store, last_week_since, before: this_week_since)
+        by_kind = this_rows.group_by { |r| r[:event_type] }.transform_values(&:size)
+        {
+          this_week: this_rows.size,
+          last_week: last_rows.size,
+          delta: this_rows.size - last_rows.size,
+          by_kind: by_kind
+        }
+      rescue Sequel::DatabaseError => e
+        ClaudeMemory.logger.debug("Trust#weekly_moments failed: #{e.message}")
+        {this_week: 0, last_week: 0, delta: 0, by_kind: {}}
+      end
+      def valuable_events(store, since, before: nil)
+        dataset = store.activity_events
+          .where(event_type: VALUE_EVENT_TYPES)
+          .where(status: "success")
+          .where { occurred_at >= since }
+        dataset = dataset.where { occurred_at < before } if before
+        dataset.all
+      end
+      # Up to 5 global facts rendered as plain-English sentences so a skeptical
+      # user can verify at-a-glance what's being injected into their Claude
+      # sessions. Prefers high-signal predicates (convention, decision,
+      # uses_framework, uses_database) and falls back to most-recent active.
+      def fingerprint
+        store = @manager.store_if_exists("global")
+        return [] unless store
+        preferred_predicates = %w[convention decision uses_framework uses_database uses_language]
+        rows = store.facts
+          .where(status: "active", scope: "global")
+          .where(predicate: preferred_predicates)
+          .order(Sequel.desc(:confidence), Sequel.desc(:created_at))
+          .limit(5)
+          .all
+        if rows.size < 5
+          extra = store.facts
+            .where(status: "active", scope: "global")
+            .exclude(id: rows.map { |r| r[:id] })
+            .order(Sequel.desc(:created_at))
+            .limit(5 - rows.size)
+            .all
+          rows += extra
+        end
+        presenter = FactPresenter.new(store)
+        presenter.list_summary(rows).map { |f| render_sentence(f) }
+      rescue Sequel::DatabaseError => e
+        ClaudeMemory.logger.debug("Trust#fingerprint failed: #{e.message}")
+        []
+      end
+      def render_sentence(fact)
+        predicate = fact[:predicate]
+        object = fact[:object]
+        subject = fact[:subject]
+        sentence = case predicate
+        when "convention"
+          object
+        when "decision"
+          object
+        when "uses_framework", "uses_language"
+          "Uses #{object}"
+        when "uses_database"
+          "Uses #{object} for storage"
+        when "deployment_platform"
+          "Deploys to #{object}"
+        when "auth_method"
+          "Auth via #{object}"
+        else
+          "#{subject} #{predicate.tr("_", " ")} #{object}"
+        end
+        {
+          id: fact[:id],
+          docid: fact[:docid],
+          sentence: sentence.to_s.strip,
+          predicate: predicate,
+          confidence: fact[:confidence]
+        }
+      end
+      def needs_review
+        {
+          open_conflicts: count_open_conflicts,
+          stale_facts: count_stale_facts,
+          empty_recalls: count_empty_recalls
+        }
+      end
+      def count_open_conflicts
+        Conflicts.new(@manager).distinct_open_counts
+      rescue Sequel::DatabaseError
+        {project: 0, global: 0, total: 0}
+      end
+      # User-supplied thumbs on feed moments. The ratio answers "when Claude
+      # surfaces something from memory, is the user signaling it was helpful?"
+      # Only moments recorded in the last UTILIZATION_DAYS count toward the
+      # ratio so old clicks don't distort an active week's signal.
+      #
+      # Shape: {up: Int, down: Int, net: Int, ratio_pct: Int, window_days: Int}
+      # ratio_pct = up / (up + down) × 100, or nil when there's no feedback.
+      def feedback_summary
+        store = @manager.default_store(prefer: :project)
+        return feedback_zero unless store
+        cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
+        rows = store.moment_feedback.where { recorded_at >= cutoff }.all
+        up = rows.count { |r| r[:verdict] == "up" }
+        down = rows.count { |r| r[:verdict] == "down" }
+        total = up + down
+        ratio_pct = total.zero? ? nil : ((up.to_f / total) * 100).round
+        {up: up, down: down, net: up - down, ratio_pct: ratio_pct, window_days: UTILIZATION_DAYS}
+      rescue Sequel::DatabaseError
+        feedback_zero
+      end
+      def feedback_zero
+        {up: 0, down: 0, net: 0, ratio_pct: nil, window_days: UTILIZATION_DAYS}
+      end
+      # "Stale" = active facts whose last_recalled_at is older than the
+      # configured threshold (or never set, with a grace window so freshly
+      # extracted facts don't show up as stale on day one).
+      #
+      # Backed by Recall::StaleDetector, which reads the column populated by
+      # Sweep::RecallTimestampRefresher. Replaces the older "active facts
+      # minus seen-in-recalls" approximation, which couldn't distinguish a
+      # never-touched 6-month-old fact from a freshly stored one.
+      def count_stale_facts
+        threshold = Configuration.new.stale_days
+        Recall::StaleDetector.stale_count(@manager, threshold_days: threshold)
+      rescue Sequel::DatabaseError, JSON::ParserError => e
+        ClaudeMemory.logger.debug("Trust#count_stale_facts failed: #{e.message}")
+        0
+      end
+      # The ROI signal: of the facts Claude has extracted into memory over the
+      # last UTILIZATION_DAYS, how many has Claude actually *used* (appeared
+      # in any recall or context injection's top_fact_ids)? Low ratios are
+      # themselves a signal — it means memory is accumulating knowledge but
+      # Claude isn't reaching for it. Anomalies worth surfacing honestly.
+      #
+      # Shape: {extracted: Int, used: Int, ratio_pct: Int, window_days: Int}
+      # Both counts are scope-union (project + global) so the headline number
+      # reflects everything memory did, not just one store.
+      def utilization
+        cutoff = (Time.now.utc - UTILIZATION_DAYS * 86_400).iso8601
+        extracted_pairs = extracted_fact_pairs(cutoff)
+        used_pairs = used_fact_pairs(cutoff)
+        extracted = extracted_pairs.size
+        # "Used" counted against the extracted set — a fact used but not
+        # extracted in this window (taught earlier, used now) is still
+        # re-use worth recognizing; count it too.
+        used_from_extracted = (used_pairs & extracted_pairs).size
+        used_total = used_pairs.size
+        ratio_pct = extracted.zero? ? 0 : ((used_from_extracted.to_f / extracted) * 100).round
+        {
+          extracted: extracted,
+          used: used_total,
+          used_from_extracted: used_from_extracted,
+          ratio_pct: ratio_pct,
+          window_days: UTILIZATION_DAYS
+        }
+      rescue Sequel::DatabaseError, JSON::ParserError => e
+        ClaudeMemory.logger.debug("Trust#utilization failed: #{e.message}")
+        {extracted: 0, used: 0, used_from_extracted: 0, ratio_pct: 0, window_days: UTILIZATION_DAYS}
+      end
+      public :utilization
+      # Facts that were extracted (distilled + stored) within the window.
+      # Returns (scope, id) pairs across both stores.
+      def extracted_fact_pairs(cutoff)
+        pairs = Set.new
+        %w[project global].each do |scope|
+          store = @manager.store_if_exists(scope)
+          next unless store
+          store.facts
+            .where(status: "active")
+            .where { created_at >= cutoff }
+            .select(:id)
+            .all
+            .each { |r| pairs << [scope, r[:id]] }
+        end
+        pairs
+      end
+      # Facts that appeared as top_fact_ids in any recall or context injection
+      # within the window. Returns (scope, id) pairs.
+      def used_fact_pairs(cutoff)
+        store = @manager.default_store(prefer: :project)
+        return Set.new unless store
+        pairs = Set.new
+        store.activity_events
+          .where(event_type: %w[recall hook_context], status: "success")
+          .where { occurred_at >= cutoff }
+          .select(:detail_json)
+          .all
+          .each do |row|
+            details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
+            scoped = ScopedFactResolver.scoped_ids_from_details(details)
+            ScopedFactResolver.flat_pairs(scoped).each { |pair| pairs << pair }
+          end
+        pairs
+      end
+      def count_empty_recalls
+        store = @manager.default_store(prefer: :project)
+        return 0 unless store
+        cutoff = (Time.now.utc - WEEK_SECONDS).iso8601
+        store.activity_events
+          .where(event_type: "recall")
+          .where(status: "success")
+          .where { occurred_at >= cutoff }
+          .all
+          .count do |row|
+            details = row[:detail_json] ? JSON.parse(row[:detail_json]) : {}
+            (details["result_count"] || 0).zero?
+          end
+      rescue Sequel::DatabaseError, JSON::ParserError
+        0
+      end
+    end
+  end
+end

data/lib/claude_memory/distill/bare_conclusion_detector.rb ADDED Viewed

@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+module ClaudeMemory
+  module Distill
+    # Catches facts that survived distillation without a reason clause.
+    # The SessionStart distillation prompt explicitly requires `decision`
+    # and `convention` facts to embed the reason ("— because …", "so that
+    # …", "to avoid …", "caused by …", "breaks when …"); facts that ship
+    # without one are dead weight once they go stale because nobody can
+    # recover the original justification by re-reading the row.
+    #
+    # This detector is the production-side mirror of that prompt
+    # constraint. It exists so the dashboard can quantify how many facts
+    # are slipping through the prompt's reason-clause requirement —
+    # higher bare-conclusion ratio means the LLM is producing low-quality
+    # extractions, which is a hallucination-rate proxy worth surfacing.
+    #
+    # Pure function, no side effects, safe to call in tight loops.
+    class BareConclusionDetector
+      # Predicates the prompt requires reasons for. Other predicates
+      # (uses_framework, uses_database, etc.) carry their meaning in the
+      # subject-predicate-object shape itself, so a bare object is fine.
+      GUARDED_PREDICATES = %w[decision convention].freeze
+      # Reason-clause signals lifted from the distill-transcripts skill
+      # prompt plus a small set of common natural-language variants. The
+      # match is case-insensitive and substring-anchored — any one signal
+      # qualifies the fact as "explained" even without an em dash.
+      REASON_PATTERNS = [
+        /\bbecause\b/i,
+        /\bso\s+that\b/i,
+        /\bso\s+the\b/i,
+        /\bso\s+we\b/i,
+        /\bin\s+order\s+to\b/i,
+        /\bto\s+avoid\b/i,
+        /\bto\s+prevent\b/i,
+        /\bto\s+ensure\b/i,
+        /\bto\s+support\b/i,
+        /\bto\s+allow\b/i,
+        /\bto\s+enable\b/i,
+        /\bto\s+make\b/i,
+        /\bto\s+fix\b/i,
+        /\bto\s+handle\b/i,
+        /\bcaused\s+by\b/i,
+        /\bbreaks\s+when\b/i,
+        /\bdue\s+to\b/i,
+        /\botherwise\b/i,
+        /\bwithout\s+(?:which|this|it)\b/i
+      ].freeze
+      # Returns true when the fact has a guarded predicate AND its object
+      # text shows no reason-clause signal. Returns false for any fact
+      # outside the guarded predicates so the metric isn't polluted by
+      # legitimately-bare facts (uses_database "sqlite" doesn't need a
+      # rationale embedded in its object).
+      #
+      # @param fact [Hash] with :predicate and :object_literal keys (or
+      #   :predicate / :object — accepts both shapes used in the codebase)
+      # @return [Boolean]
+      def bare_conclusion?(fact)
+        predicate = fact[:predicate].to_s
+        return false unless GUARDED_PREDICATES.include?(predicate)
+        object = (fact[:object_literal] || fact[:object]).to_s
+        return false if object.empty?
+        REASON_PATTERNS.none? { |re| object.match?(re) }
+      end
+    end
+  end
+end

data/lib/claude_memory/distill/reference_material_detector.rb ADDED Viewed

@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+module ClaudeMemory
+  module Distill
+    # Guards against the LLM distiller mislabeling reference material as
+    # `convention`. Audited in production data on 2026-04-24: project facts
+    # labeled `predicate=convention` with objects like "Cloud-backed Claude
+    # Code plugin (~1,195 LOC JavaScript) using Supermemory API…" and
+    # "Claude Code plugin with marketplace.json, 5,700+ stars, by Tobi Lütke."
+    # These are descriptions of external projects, not conventions the user
+    # applies. Leaving them under `convention` pollutes the Knowledge-base
+    # sidebar and the `memory.conventions` MCP tool.
+    #
+    # Heuristic: only conventions are re-examined (decisions and architecture
+    # notes about external projects are legitimately those predicates). A
+    # convention is retagged to `reference` when its object text matches any
+    # of the descriptive patterns below. Kept deliberately conservative —
+    # false-positive retagging is worse than occasionally missing a case, so
+    # the patterns target telltale numeric/attribution phrases that rarely
+    # appear in real conventions.
+    class ReferenceMaterialDetector
+      # Strong signals — any one of these on its own justifies reclassification.
+      # Kept tight to avoid false positives on real conventions that happen
+      # to quote external project names.
+      STRONG_PATTERNS = [
+        # Line-of-code counts: "~1,195 LOC", "1200 lines of code"
+        /~?\d+[,.]?\d*\s*(?:LOC|lines of code)/i,
+        # Star counts: "5,700+ stars", "3.2k stars"
+        /\d[\d,.]*\+?\s*(?:k\s+)?stars?\b/i,
+        # "X is a (plugin|library|tool|gem|service|framework|extension) …"
+        /\b(?:is\s+an?|are)\s+(?:cloud-backed\s+)?(?:plugin|library|tool|gem|service|framework|extension|cli|mcp\s+server)\b/i,
+        # Leading descriptor: "Plugin that…", "Library for…"
+        /\A(?:cloud-backed\s+)?(?:plugin|library|tool|gem|service|framework|extension|cli|mcp\s+server)(?:\s+(?:with|using|for|that))/i
+      ].freeze
+      # Weak signals — only fire in combination with a strong signal.
+      # Author attribution ("by Jane Doe") was originally a standalone
+      # trigger, but production text like "MCP launched by Claude Code run
+      # from PATH" contains the same surface pattern inside a legitimate
+      # convention. Requiring a co-occurring strong signal keeps the guard
+      # conservative.
+      WEAK_PATTERNS = [
+        /\bby\s+[[:upper:]][[:alpha:]'-]+\s+[[:upper:]][[:alpha:]'-]+/
+      ].freeze
+      # Predicates we inspect. Decisions stay decisions even when they cite
+      # external projects ("From QMD restudy: adopt X"); the guard targets
+      # only `convention`, where misclassification is most common.
+      GUARDED_PREDICATES = %w[convention].freeze
+      def reclassify(extraction)
+        return extraction if extraction.facts.nil? || extraction.facts.empty?
+        new_facts = extraction.facts.map do |fact|
+          if reference_material?(fact)
+            fact.merge(predicate: "reference")
+          else
+            fact
+          end
+        end
+        Distill::Extraction.new(
+          entities: extraction.entities,
+          facts: new_facts,
+          decisions: extraction.decisions,
+          signals: extraction.signals
+        )
+      end
+      def reference_material?(fact)
+        return false unless GUARDED_PREDICATES.include?(fact[:predicate].to_s)
+        object = fact[:object].to_s
+        return false if object.empty?
+        STRONG_PATTERNS.any? { |re| object.match?(re) }
+      end
+    end
+  end
+end