RubyGems - claude_memory - Versions diffs - 0.9.1 → 0.11.0 - Mend

claude_memory 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

checksums.yaml +4 -4
data/.claude/memory.sqlite3 +0 -0
data/.claude/skills/dashboard/SKILL.md +42 -0
data/.claude-plugin/marketplace.json +1 -1
data/.claude-plugin/plugin.json +1 -1
data/CHANGELOG.md +130 -0
data/CLAUDE.md +30 -6
data/README.md +66 -2
data/db/migrations/015_add_activity_events.rb +26 -0
data/db/migrations/016_add_moment_feedback.rb +22 -0
data/db/migrations/017_add_last_recalled_at.rb +15 -0
data/docs/1_0_punchlist.md +371 -0
data/docs/EXAMPLES.md +41 -2
data/docs/GETTING_STARTED.md +33 -4
data/docs/architecture.md +22 -7
data/docs/audit-queries.md +131 -0
data/docs/dashboard.md +192 -0
data/docs/improvements.md +650 -9
data/docs/influence/cq.md +187 -0
data/docs/plugin.md +13 -6
data/docs/quality_review.md +524 -172
data/docs/reflection_memory_as_accumulating_judgment.md +67 -0
data/lib/claude_memory/activity_log.rb +86 -0
data/lib/claude_memory/commands/census_command.rb +210 -0
data/lib/claude_memory/commands/completion_command.rb +3 -0
data/lib/claude_memory/commands/dashboard_command.rb +54 -0
data/lib/claude_memory/commands/dedupe_conflicts_command.rb +55 -0
data/lib/claude_memory/commands/digest_command.rb +273 -0
data/lib/claude_memory/commands/hook_command.rb +61 -2
data/lib/claude_memory/commands/initializers/hooks_configurator.rb +7 -4
data/lib/claude_memory/commands/reclassify_references_command.rb +56 -0
data/lib/claude_memory/commands/registry.rb +7 -1
data/lib/claude_memory/commands/show_command.rb +90 -0
data/lib/claude_memory/commands/skills/distill-transcripts.md +13 -1
data/lib/claude_memory/commands/stats_command.rb +131 -2
data/lib/claude_memory/commands/sweep_command.rb +2 -0
data/lib/claude_memory/configuration.rb +16 -0
data/lib/claude_memory/core/relative_time.rb +9 -0
data/lib/claude_memory/dashboard/api.rb +610 -0
data/lib/claude_memory/dashboard/conflicts.rb +279 -0
data/lib/claude_memory/dashboard/efficacy.rb +127 -0
data/lib/claude_memory/dashboard/fact_presenter.rb +109 -0
data/lib/claude_memory/dashboard/health.rb +175 -0
data/lib/claude_memory/dashboard/index.html +2707 -0
data/lib/claude_memory/dashboard/knowledge.rb +136 -0
data/lib/claude_memory/dashboard/moments.rb +244 -0
data/lib/claude_memory/dashboard/reuse.rb +97 -0
data/lib/claude_memory/dashboard/scoped_fact_resolver.rb +95 -0
data/lib/claude_memory/dashboard/server.rb +211 -0
data/lib/claude_memory/dashboard/timeline.rb +68 -0
data/lib/claude_memory/dashboard/trust.rb +454 -0
data/lib/claude_memory/distill/bare_conclusion_detector.rb +71 -0
data/lib/claude_memory/distill/reference_material_detector.rb +78 -0
data/lib/claude_memory/hook/auto_memory_mirror.rb +112 -0
data/lib/claude_memory/hook/context_injector.rb +97 -3
data/lib/claude_memory/hook/handler.rb +191 -3
data/lib/claude_memory/mcp/handlers/management_handlers.rb +8 -0
data/lib/claude_memory/mcp/query_guide.rb +11 -0
data/lib/claude_memory/mcp/text_summary.rb +29 -0
data/lib/claude_memory/mcp/tool_definitions.rb +13 -0
data/lib/claude_memory/mcp/tools.rb +148 -0
data/lib/claude_memory/publish.rb +13 -21
data/lib/claude_memory/recall/stale_detector.rb +67 -0
data/lib/claude_memory/resolve/predicate_policy.rb +2 -0
data/lib/claude_memory/resolve/resolver.rb +41 -11
data/lib/claude_memory/store/llm_cache.rb +68 -0
data/lib/claude_memory/store/metrics_aggregator.rb +96 -0
data/lib/claude_memory/store/schema_manager.rb +1 -1
data/lib/claude_memory/store/sqlite_store.rb +47 -143
data/lib/claude_memory/store/store_manager.rb +29 -0
data/lib/claude_memory/sweep/maintenance.rb +216 -0
data/lib/claude_memory/sweep/recall_timestamp_refresher.rb +83 -0
data/lib/claude_memory/sweep/sweeper.rb +2 -0
data/lib/claude_memory/templates/hooks.example.json +5 -0
data/lib/claude_memory/version.rb +1 -1
data/lib/claude_memory.rb +24 -0
metadata +51 -1

data/lib/claude_memory/mcp/tools.rb CHANGED Viewed

@@ -44,11 +44,34 @@ module ClaudeMemory
         ToolDefinitions.all
       end
+      # Tools that represent recall/query usage - tracked for efficacy
+      RECALL_TOOLS = %w[
+        memory.recall memory.recall_index memory.recall_semantic
+        memory.search_concepts memory.decisions memory.conventions memory.architecture
+      ].freeze
+      # Write tools worth tracking
+      WRITE_TOOLS = %w[memory.store_extraction].freeze
+      TRACKED_TOOLS = (RECALL_TOOLS + WRITE_TOOLS).freeze
       # Dispatch a tool call to the appropriate handler method.
       # @param name [String] fully-qualified tool name (e.g. "memory.recall")
       # @param arguments [Hash] tool arguments from the MCP request
       # @return [Hash] structured result hash for the tool response
       def call(name, arguments)
+        t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        result = dispatch(name, arguments)
+        log_tool_activity(name, arguments, result, t0) if TRACKED_TOOLS.include?(name)
+        result
+      end
+      private
+      def dispatch(name, arguments)
         case name
         when "memory.recall" then recall(arguments)
         when "memory.recall_index" then recall_index(arguments)
@@ -74,6 +97,7 @@ module ClaudeMemory
         when "memory.mark_distilled" then mark_distilled(arguments)
         when "memory.check_setup" then check_setup
         when "memory.list_projects" then list_projects
+        when "memory.activity" then activity(arguments)
         else {error: "Unknown tool: #{name}"}
         end
       end
@@ -111,6 +135,130 @@ module ClaudeMemory
           @legacy_store
         end
       end
+      def log_tool_activity(name, arguments, result, t0)
+        store = default_store
+        return unless store
+        duration_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000).round
+        event_type = WRITE_TOOLS.include?(name) ? "store_extraction" : "recall"
+        status = result[:error] ? "error" : "success"
+        session_id = extract_session_id(arguments)
+        details = {tool: name}
+        if event_type == "recall"
+          details[:query] = arguments["query"] || arguments["concepts"]&.join(", ")
+          details[:scope] = arguments["scope"]
+          details[:result_count] = extract_result_count(result)
+          # top_fact_ids is a flat list of the first 5 IDs; top_facts_by_scope
+          # groups the same IDs by source so dashboard readers can resolve
+          # each ID from the DB it actually came from. Fact IDs autoincrement
+          # per-DB, so a bare ID without scope is ambiguous.
+          scoped = extract_top_facts_scoped(result)
+          details[:top_fact_ids] = scoped.values.flatten.first(5)
+          details[:top_facts_by_scope] = scoped if scoped.any?
+          details[:results_by_scope] = extract_scope_breakdown(result)
+        else
+          details[:facts_created] = result[:facts_created]
+          details[:entities_created] = result[:entities_created]
+          details[:content_item_id] = result[:content_item_id]
+        end
+        ActivityLog.record(store, event_type: event_type, status: status,
+          session_id: session_id, duration_ms: duration_ms, details: details.compact)
+      end
+      # Probe a recall result for a count of returned items. Falls back to
+      # counting the first array-valued key among the shapes emitted by the
+      # various recall handlers (facts, results, items, concepts).
+      def extract_result_count(result)
+        return 0 unless result.is_a?(Hash)
+        [:fact_count, :count, :results_count].each do |key|
+          val = result[key]
+          return val if val.is_a?(Integer)
+        end
+        [:facts, :results, :items, :concepts, :conflicts].each do |key|
+          val = result[key]
+          return val.size if val.is_a?(Array)
+        end
+        0
+      end
+      # Capture up to 5 fact ids from a recall result, grouped by source scope.
+      # Fact IDs autoincrement per-DB, so without scope a bare ID is ambiguous
+      # (project fact #1 and global fact #1 are different facts). Recall rows
+      # carry either a :source or :scope field identifying which DB the fact
+      # came from; we use that to group.
+      #
+      # @return [Hash{String => Array<Integer>}] e.g. {"project" => [5, 8], "global" => [1]}
+      def extract_top_facts_scoped(result, limit: 5)
+        return {} unless result.is_a?(Hash)
+        collection = [:facts, :results, :items].map { |k| result[k] }.find { |v| v.is_a?(Array) }
+        return {} unless collection
+        grouped = Hash.new { |h, k| h[k] = [] }
+        collection.first(limit).each do |row|
+          next unless row.is_a?(Hash)
+          fact = row[:fact] || row["fact"] || row
+          id = fact.is_a?(Hash) ? (fact[:id] || fact["id"]) : nil
+          next unless id
+          scope = row[:source] || row["source"] || fact[:scope] || fact["scope"] || "project"
+          grouped[scope.to_s] << id
+        end
+        grouped
+      end
+      def extract_session_id(arguments)
+        (arguments.is_a?(Hash) && arguments["session_id"]) || Configuration.new.session_id
+      end
+      # Count returned items grouped by their :scope field so the dashboard
+      # can show whether a recall's hits came from global preferences, project
+      # facts, or both. Returns nil when the result shape doesn't carry facts.
+      # @return [Hash{String => Integer}, nil]
+      def extract_scope_breakdown(result)
+        return nil unless result.is_a?(Hash)
+        collection = [:facts, :results, :items].map { |k| result[k] }.find { |v| v.is_a?(Array) }
+        return nil unless collection
+        breakdown = Hash.new(0)
+        collection.each { |row|
+          next unless row.is_a?(Hash)
+          scope = row[:scope] || row["scope"] || row[:source] || row["source"] || "unknown"
+          breakdown[scope.to_s] += 1
+        }
+        breakdown.empty? ? nil : breakdown
+      end
+      # Return whichever store is available for activity logging. Delegates
+      # to StoreManager#default_store which prefers the project store and
+      # falls back to global — preventing silent drops of activity events
+      # when the project DB hasn't been initialized yet.
+      def default_store
+        return @legacy_store unless @manager
+        @manager.default_store(prefer: :project)
+      end
+      def activity(args)
+        store = default_store
+        return {error: "No database available"} unless store
+        limit = args["limit"] || 50
+        event_type = args["event_type"]
+        since = args["since"]
+        events = ActivityLog.recent(store, limit: limit, event_type: event_type, since: since)
+        summary = ActivityLog.summary(store, since: since)
+        {
+          event_count: events.size,
+          summary: summary,
+          events: events.map { |e|
+            e[:occurred_ago] = Core::RelativeTime.format(e[:occurred_at])
+            e
+          }
+        }
+      end
     end
   end
 end

data/lib/claude_memory/publish.rb CHANGED Viewed

@@ -112,38 +112,30 @@ module ClaudeMemory
     # @return [String] Markdown section for decision facts
     def generate_decisions_section(facts)
-      decisions = facts.select { |f| Resolve::PredicatePolicy.section_for(f[:predicate]) == :decisions }
-      return "" if decisions.empty?
-      lines = ["## Current Decisions\n"]
-      decisions.each do |d|
-        lines << "- #{d[:object_literal]}"
+      generate_section(facts, section: :decisions, title: "Current Decisions") do |d|
+        "- #{d[:object_literal]}"
       end
-      lines.join("\n") + "\n"
     end
     # @return [String] Markdown section for convention facts
     def generate_conventions_section(facts)
-      conventions = facts.select { |f| Resolve::PredicatePolicy.section_for(f[:predicate]) == :conventions }
-      return "" if conventions.empty?
-      lines = ["## Conventions\n"]
-      conventions.each do |c|
-        lines << "- #{c[:object_literal]}"
+      generate_section(facts, section: :conventions, title: "Conventions") do |c|
+        "- #{c[:object_literal]}"
       end
-      lines.join("\n") + "\n"
     end
     # @return [String] Markdown section for technical constraint facts
     def generate_constraints_section(facts)
-      constraints = facts.select { |f| Resolve::PredicatePolicy.section_for(f[:predicate]) == :constraints }
-      return "" if constraints.empty?
-      lines = ["## Technical Constraints\n"]
-      constraints.each do |c|
-        lines << "- **#{humanize(c[:predicate])}**: #{c[:object_literal]}"
+      generate_section(facts, section: :constraints, title: "Technical Constraints") do |c|
+        "- **#{humanize(c[:predicate])}**: #{c[:object_literal]}"
       end
-      lines.join("\n") + "\n"
+    end
+    def generate_section(facts, section:, title:, &row_formatter)
+      rows = facts.select { |f| Resolve::PredicatePolicy.section_for(f[:predicate]) == section }
+      return "" if rows.empty?
+      (["## #{title}\n"] + rows.map(&row_formatter)).join("\n") + "\n"
     end
     # @return [String] Markdown section for additional knowledge grouped by predicate

data/lib/claude_memory/recall/stale_detector.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+module ClaudeMemory
+  class Recall
+    # #35 access-based staleness — read-only query layer over the
+    # last_recalled_at column populated by Sweep::RecallTimestampRefresher.
+    #
+    # An active fact is "stale" when:
+    # - It hasn't been recalled or context-injected within `threshold_days`
+    #   (last_recalled_at < cutoff OR last_recalled_at is NULL), AND
+    # - It was created before the cutoff too — freshly extracted facts
+    #   aren't dead weight, they just haven't had a chance to be used.
+    #
+    # No auto-deletion. The point is to surface a count and a list to the
+    # user so they can review and reject; the sweeper never acts on this.
+    module StaleDetector
+      module_function
+      # @param manager [Store::StoreManager]
+      # @param threshold_days [Integer] grace window in days
+      # @param limit [Integer] max rows per scope (0 = unlimited)
+      # @return [Hash] {project: [...], global: [...], total: Int}
+      def stale_facts(manager, threshold_days:, limit: 50)
+        cutoff = (Time.now.utc - threshold_days * 86_400).iso8601
+        result = {project: [], global: [], total: 0}
+        %w[project global].each do |scope|
+          store = manager.store_if_exists(scope)
+          next unless store
+          rows = stale_rows_for(store, cutoff, limit)
+          result[scope.to_sym] = rows
+          result[:total] += rows.size
+        end
+        result
+      end
+      # Scope-agnostic count helper for the dashboard sidebar. Avoids
+      # materializing rows when only a count is needed.
+      #
+      # @return [Integer] total stale facts across both stores
+      def stale_count(manager, threshold_days:)
+        cutoff = (Time.now.utc - threshold_days * 86_400).iso8601
+        count = 0
+        %w[project global].each do |scope|
+          store = manager.store_if_exists(scope)
+          next unless store
+          count += stale_dataset(store, cutoff).count
+        end
+        count
+      end
+      def stale_dataset(store, cutoff)
+        store.facts
+          .where(status: "active")
+          .where { created_at < cutoff }
+          .where { (last_recalled_at < cutoff) | {last_recalled_at: nil} }
+      end
+      def stale_rows_for(store, cutoff, limit)
+        ds = stale_dataset(store, cutoff).order(Sequel.asc(:last_recalled_at)).order_append(:created_at)
+        ds = ds.limit(limit) if limit > 0
+        ds.all
+      end
+    end
+  end
+end

data/lib/claude_memory/resolve/predicate_policy.rb CHANGED Viewed

@@ -17,6 +17,7 @@ module ClaudeMemory
         "convention" => {cardinality: :multi, exclusive: false},
         "decision" => {cardinality: :multi, exclusive: false},
         "architecture" => {cardinality: :multi, exclusive: false},
+        "reference" => {cardinality: :multi, exclusive: false},
         "uses_framework" => {cardinality: :multi, exclusive: false},
         "uses_language" => {cardinality: :multi, exclusive: false},
         "uses_database" => {cardinality: :single, exclusive: true},
@@ -46,6 +47,7 @@ module ClaudeMemory
       SECTION_MAP = {
         "decision" => :decisions,
         "convention" => :conventions,
+        "reference" => :references,
         "uses_database" => :constraints,
         "uses_framework" => :constraints,
         "uses_language" => :constraints,

data/lib/claude_memory/resolve/resolver.rb CHANGED Viewed

@@ -106,17 +106,26 @@ module ClaudeMemory
       end
       def determine_resolution(existing_facts, fact_data, entity_ids)
-        return :insert unless PredicatePolicy.single?(fact_data[:predicate]) && existing_facts.any?
+        return :insert if existing_facts.empty?
+        # Always reinforce on an exact match — works for both single- and
+        # multi-value predicates. Without this check, multi-value predicates
+        # like uses_language and uses_framework accumulated an identical
+        # fact every ingest cycle (one ruby fact per Stop hook), because
+        # the old :insert fast-path for multi-value never looked at the
+        # existing set.
         object_entity_id = entity_ids[fact_data[:object]]
         matching = existing_facts.find { |f| values_match?(f, fact_data[:object], object_entity_id) }
-        if matching
-          :reinforce
-        elsif supersession_signal?(fact_data)
-          :supersede
+        return :reinforce if matching
+        # No exact match: for multi-value predicates the new object is
+        # genuinely a new coexisting value. For single-value, either the
+        # user signaled supersession ("now we use X instead") or the new
+        # claim contradicts the current one.
+        if PredicatePolicy.single?(fact_data[:predicate])
+          supersession_signal?(fact_data) ? :supersede : :conflict
         else
-          :conflict
+          :insert
         end
       end
@@ -141,6 +150,20 @@ module ClaudeMemory
       end
       def apply_conflict(existing_facts, fact_data, subject_id, content_item_id, occurred_at, project_path:, scope:)
+        # Before creating a new disputed fact + conflict row, check whether
+        # we've already recorded this exact contradiction against the same
+        # active slot. Without this guard, every re-extraction of the losing
+        # value produced a new disputed fact + conflict row — the production
+        # DB accumulated 11 identical sqlite-vs-postgresql conflict rows that
+        # way. facts_for_slot defaults to status="active", so the existing
+        # disputed fact stayed invisible until we explicitly asked for it.
+        existing_disputed = @store.facts_for_slot(subject_id, fact_data[:predicate], status: "disputed")
+        matching = existing_disputed.find { |f| values_match?(f, fact_data[:object], nil) }
+        if matching
+          add_provenance(matching[:id], content_item_id, fact_data)
+          return {created: 0, superseded: 0, conflicts: 0, provenance: 1}
+        end
         create_conflict(existing_facts.first[:id], fact_data, subject_id, content_item_id, occurred_at,
           project_path: project_path, scope: scope)
         {created: 0, superseded: 0, conflicts: 1, provenance: 0}
@@ -162,8 +185,15 @@ module ClaudeMemory
       end
       def insert_new_fact(fact_data, subject_id, entity_ids, occurred_at, project_path:, scope:)
-        fact_scope = fact_data[:scope_hint] || scope
-        fact_project = (fact_scope == "global") ? nil : project_path
+        # The fact's scope MUST match the store it's being written to.
+        # The distiller may emit scope_hint: "global" when text matches
+        # patterns like "always" / "my preference", but scope_hint is
+        # advisory — it doesn't route the write. Honoring it as a scope
+        # override produced "scope=global" rows inside the project DB
+        # (orphaned facts that were never visible to global recall). Users
+        # who want a project fact in global memory use `claude-memory
+        # promote`, which does the proper cross-store copy.
+        fact_project = (scope == "global") ? nil : project_path
         @store.insert_fact(
           subject_entity_id: subject_id,
@@ -173,7 +203,7 @@ module ClaudeMemory
           polarity: fact_data[:polarity] || "positive",
           confidence: fact_data[:confidence] || 1.0,
           valid_from: occurred_at,
-          scope: fact_scope,
+          scope: scope,
           project_path: fact_project
         )
       end

data/lib/claude_memory/store/llm_cache.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+require "digest"
+module ClaudeMemory
+  module Store
+    # LLM cache persistence for the SQLiteStore.
+    # Keyed on SHA-256 of "{operation}:{model}:{input_hash}" so identical
+    # (operation, model, input) tuples collapse to a single row via upsert.
+    # Pruning is age-based — callers decide the retention window.
+    module LLMCache
+      # Look up a cached LLM result by its cache key.
+      # @param cache_key [String] SHA-256 hex cache key
+      # @return [Hash, nil]
+      def llm_cache_lookup(cache_key)
+        llm_cache.where(cache_key: cache_key).first
+      end
+      # Store or update a cached LLM result. Uses upsert on the cache_key.
+      # @param operation [String] operation name (e.g. "distill", "embed")
+      # @param model [String] model identifier
+      # @param input_hash [String] SHA-256 hex digest of the input
+      # @param result_json [String] JSON-serialized result
+      # @param input_tokens [Integer, nil] input tokens consumed
+      # @param output_tokens [Integer, nil] output tokens consumed
+      # @return [void]
+      def llm_cache_store(operation:, model:, input_hash:, result_json:, input_tokens: nil, output_tokens: nil)
+        cache_key = Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
+        llm_cache
+          .insert_conflict(target: :cache_key, update: {
+            result_json: result_json,
+            input_tokens: input_tokens,
+            output_tokens: output_tokens,
+            created_at: Time.now.utc.iso8601
+          })
+          .insert(
+            cache_key: cache_key,
+            operation: operation,
+            model: model,
+            input_hash: input_hash,
+            result_json: result_json,
+            input_tokens: input_tokens,
+            output_tokens: output_tokens,
+            created_at: Time.now.utc.iso8601
+          )
+      end
+      # Compute the cache key for an LLM operation.
+      # @param operation [String] operation name
+      # @param model [String] model identifier
+      # @param input [String] raw input text
+      # @return [String] SHA-256 hex cache key
+      def llm_cache_key(operation, model, input)
+        input_hash = Digest::SHA256.hexdigest(input)
+        Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
+      end
+      # Delete LLM cache entries older than the given age.
+      # @param max_age_seconds [Integer] maximum age in seconds (default: 7 days)
+      # @return [Integer] number of rows deleted
+      def llm_cache_prune(max_age_seconds: 604_800)
+        cutoff = (Time.now - max_age_seconds).utc.iso8601
+        llm_cache.where { created_at < cutoff }.delete
+      end
+    end
+  end
+end

data/lib/claude_memory/store/metrics_aggregator.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+module ClaudeMemory
+  module Store
+    # Ingestion metrics persistence and aggregation for the SQLiteStore.
+    # Records per-distillation LLM token usage and extraction counts, and
+    # computes totals + efficiency ratios over the full history.
+    module MetricsAggregator
+      # Count content items that have not yet been distilled.
+      # @param min_length [Integer] minimum byte_len threshold
+      # @return [Integer]
+      def count_undistilled(min_length: 200)
+        content_items
+          .left_join(:ingestion_metrics, content_item_id: :id)
+          .where(Sequel[:ingestion_metrics][:id] => nil)
+          .where { byte_len >= min_length }
+          .count
+      end
+      # Record token usage and extraction counts for a distillation run.
+      # @param content_item_id [Integer] content item that was distilled
+      # @param input_tokens [Integer] LLM input tokens consumed
+      # @param output_tokens [Integer] LLM output tokens consumed
+      # @param facts_extracted [Integer] number of facts extracted
+      # @return [Integer] inserted row id
+      def record_ingestion_metrics(content_item_id:, input_tokens:, output_tokens:, facts_extracted:)
+        ingestion_metrics.insert(
+          content_item_id: content_item_id,
+          input_tokens: input_tokens,
+          output_tokens: output_tokens,
+          facts_extracted: facts_extracted,
+          created_at: Time.now.utc.iso8601
+        )
+      end
+      # Compute aggregate ingestion metrics across all distillation runs.
+      # @return [Hash, nil] totals and efficiency ratio, or nil if no data
+      def aggregate_ingestion_metrics
+        # standard:disable Performance/Detect (Sequel DSL requires .select{}.first)
+        result = ingestion_metrics
+          .select {
+            [
+              sum(:input_tokens).as(:total_input),
+              sum(:output_tokens).as(:total_output),
+              sum(:facts_extracted).as(:total_facts),
+              count(:id).as(:total_ops)
+            ]
+          }
+          .first
+        # standard:enable Performance/Detect
+        return nil if result.nil? || result[:total_ops].to_i.zero?
+        total_input = result[:total_input].to_i
+        total_output = result[:total_output].to_i
+        total_facts = result[:total_facts].to_i
+        total_ops = result[:total_ops].to_i
+        efficiency = total_input.zero? ? 0.0 : (total_facts.to_f / total_input * 1000).round(2)
+        {
+          total_input_tokens: total_input,
+          total_output_tokens: total_output,
+          total_facts_extracted: total_facts,
+          total_operations: total_ops,
+          avg_facts_per_1k_input_tokens: efficiency
+        }
+      end
+      # Mark all undistilled content items as distilled with zero token counts.
+      # Used for backfilling legacy content that predates the metrics table.
+      # @return [Integer] number of items backfilled
+      def backfill_distillation_metrics!
+        undistilled_ids = content_items
+          .left_join(:ingestion_metrics, content_item_id: :id)
+          .where(Sequel[:ingestion_metrics][:id] => nil)
+          .select_map(Sequel[:content_items][:id])
+        return 0 if undistilled_ids.empty?
+        now = Time.now.utc.iso8601
+        undistilled_ids.each do |cid|
+          ingestion_metrics.insert(
+            content_item_id: cid,
+            input_tokens: 0,
+            output_tokens: 0,
+            facts_extracted: 0,
+            created_at: now
+          )
+        end
+        undistilled_ids.size
+      end
+    end
+  end
+end

data/lib/claude_memory/store/schema_manager.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module ClaudeMemory
     # Schema migration and version management for SQLiteStore.
     # Handles Sequel migrations, legacy version syncing, and initial setup.
     module SchemaManager
-      SCHEMA_VERSION = 14
+      SCHEMA_VERSION = 17
       private