RubyGems - claude_memory - Versions diffs - 0.7.1 → 0.9.0 - Mend

claude_memory 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

checksums.yaml +4 -4
data/.claude/memory.sqlite3 +0 -0
data/.claude/rules/claude_memory.generated.md +32 -2
data/.claude/settings.json +65 -15
data/.claude/settings.local.json +5 -2
data/.claude/skills/improve/SKILL.md +113 -25
data/.claude/skills/upgrade-dependencies/SKILL.md +154 -0
data/.claude-plugin/commands/distill-transcripts.md +98 -0
data/.claude-plugin/commands/memory-recall.md +67 -0
data/.claude-plugin/marketplace.json +2 -2
data/.claude-plugin/plugin.json +3 -3
data/.claude-plugin/scripts/hook-runner.sh +14 -0
data/.claude-plugin/scripts/serve-mcp.sh +14 -0
data/.ruby-version +1 -1
data/CHANGELOG.md +90 -1
data/CLAUDE.md +56 -18
data/README.md +35 -0
data/db/migrations/013_add_mcp_tool_calls.rb +26 -0
data/db/migrations/014_canonicalize_predicates.rb +30 -0
data/docs/improvements.md +74 -74
data/docs/influence/claude-mem.md +1 -0
data/docs/influence/claude-supermemory.md +1 -0
data/docs/influence/episodic-memory.md +1 -0
data/docs/influence/grepai.md +1 -0
data/docs/influence/kbs.md +1 -0
data/docs/influence/lossless-claw.md +1 -0
data/docs/influence/qmd.md +1 -0
data/docs/quality_review.md +119 -224
data/hooks/hooks.json +39 -7
data/lib/claude_memory/commands/checks/distill_check.rb +61 -0
data/lib/claude_memory/commands/checks/hooks_check.rb +2 -2
data/lib/claude_memory/commands/checks/vec_check.rb +2 -1
data/lib/claude_memory/commands/completion_command.rb +149 -0
data/lib/claude_memory/commands/doctor_command.rb +2 -0
data/lib/claude_memory/commands/embeddings_command.rb +198 -0
data/lib/claude_memory/commands/help_command.rb +12 -1
data/lib/claude_memory/commands/hook_command.rb +2 -1
data/lib/claude_memory/commands/index_command.rb +85 -78
data/lib/claude_memory/commands/initializers/database_ensurer.rb +16 -0
data/lib/claude_memory/commands/initializers/global_initializer.rb +2 -1
data/lib/claude_memory/commands/initializers/hooks_configurator.rb +55 -11
data/lib/claude_memory/commands/initializers/project_initializer.rb +2 -1
data/lib/claude_memory/commands/install_skill_command.rb +78 -0
data/lib/claude_memory/commands/registry.rb +47 -32
data/lib/claude_memory/commands/reject_command.rb +62 -0
data/lib/claude_memory/commands/restore_command.rb +77 -0
data/lib/claude_memory/commands/skills/distill-transcripts.md +102 -0
data/lib/claude_memory/commands/skills/memory-recall.md +67 -0
data/lib/claude_memory/commands/stats_command.rb +98 -2
data/lib/claude_memory/configuration.rb +14 -1
data/lib/claude_memory/core/fact_ranker.rb +2 -2
data/lib/claude_memory/core/rr_fusion.rb +23 -6
data/lib/claude_memory/core/snippet_extractor.rb +7 -3
data/lib/claude_memory/core/text_builder.rb +11 -0
data/lib/claude_memory/distill/json_schema.md +8 -4
data/lib/claude_memory/distill/null_distiller.rb +2 -0
data/lib/claude_memory/domain/entity.rb +13 -1
data/lib/claude_memory/domain/fact.rb +26 -2
data/lib/claude_memory/domain/provenance.rb +0 -1
data/lib/claude_memory/embeddings/api_adapter.rb +97 -0
data/lib/claude_memory/embeddings/dimension_check.rb +23 -0
data/lib/claude_memory/embeddings/fastembed_adapter.rb +46 -12
data/lib/claude_memory/embeddings/generator.rb +4 -0
data/lib/claude_memory/embeddings/inspector.rb +91 -0
data/lib/claude_memory/embeddings/model_registry.rb +210 -0
data/lib/claude_memory/embeddings/resolver.rb +44 -0
data/lib/claude_memory/hook/context_injector.rb +58 -2
data/lib/claude_memory/hook/distillation_runner.rb +46 -0
data/lib/claude_memory/hook/handler.rb +11 -2
data/lib/claude_memory/index/vector_index.rb +15 -2
data/lib/claude_memory/infrastructure/schema_validator.rb +3 -3
data/lib/claude_memory/ingest/ingester.rb +17 -0
data/lib/claude_memory/mcp/handlers/context_handlers.rb +38 -0
data/lib/claude_memory/mcp/handlers/management_handlers.rb +169 -0
data/lib/claude_memory/mcp/handlers/query_handlers.rb +115 -0
data/lib/claude_memory/mcp/handlers/setup_handlers.rb +211 -0
data/lib/claude_memory/mcp/handlers/shortcut_handlers.rb +37 -0
data/lib/claude_memory/mcp/handlers/stats_handlers.rb +205 -0
data/lib/claude_memory/mcp/instructions_builder.rb +19 -1
data/lib/claude_memory/mcp/query_guide.rb +10 -0
data/lib/claude_memory/mcp/response_formatter.rb +1 -0
data/lib/claude_memory/mcp/server.rb +22 -1
data/lib/claude_memory/mcp/telemetry.rb +86 -0
data/lib/claude_memory/mcp/text_summary.rb +26 -0
data/lib/claude_memory/mcp/tool_definitions.rb +116 -4
data/lib/claude_memory/mcp/tool_helpers.rb +43 -0
data/lib/claude_memory/mcp/tools.rb +50 -679
data/lib/claude_memory/publish.rb +40 -5
data/lib/claude_memory/recall/dual_engine.rb +105 -0
data/lib/claude_memory/recall/legacy_engine.rb +138 -0
data/lib/claude_memory/recall/query_core.rb +371 -0
data/lib/claude_memory/recall.rb +121 -673
data/lib/claude_memory/resolve/predicate_policy.rb +63 -3
data/lib/claude_memory/resolve/resolver.rb +43 -0
data/lib/claude_memory/shortcuts.rb +4 -4
data/lib/claude_memory/store/retry_handler.rb +61 -0
data/lib/claude_memory/store/schema_manager.rb +68 -0
data/lib/claude_memory/store/sqlite_store.rb +334 -201
data/lib/claude_memory/store/store_manager.rb +50 -1
data/lib/claude_memory/sweep/maintenance.rb +115 -1
data/lib/claude_memory/sweep/sweeper.rb +3 -0
data/lib/claude_memory/templates/hooks.example.json +26 -7
data/lib/claude_memory/version.rb +1 -1
data/lib/claude_memory.rb +16 -0
metadata +48 -8
data/.claude/memory.sqlite3-shm +0 -0
data/.claude/memory.sqlite3-wal +0 -0

data/lib/claude_memory/store/sqlite_store.rb CHANGED Viewed

@@ -6,14 +6,25 @@ require "digest"
 require "json"
 require "extralite"
 require "sequel/adapters/extralite"
+require_relative "retry_handler"
+require_relative "schema_manager"
 module ClaudeMemory
   module Store
+    # SQLite-backed fact store for ClaudeMemory.
+    # Manages all database tables (content_items, entities, facts, provenance,
+    # conflicts, fact_links, etc.) via Sequel with Extralite adapter.
+    # Includes RetryHandler for transient lock recovery and SchemaManager
+    # for automatic migrations on open.
     class SQLiteStore
-      SCHEMA_VERSION = 12
+      include RetryHandler
+      include SchemaManager
+      # @return [Sequel::Database] the underlying Sequel database connection
       attr_reader :db
+      # Open (or create) a SQLite database and migrate to the current schema.
+      # @param db_path [String] filesystem path to the SQLite database file
       def initialize(db_path)
         @db_path = db_path
         @db = connect_database(db_path)
@@ -21,140 +32,117 @@ module ClaudeMemory
         ensure_schema!
       end
-      # Retry configuration for database operations
-      # SQLite's busy_timeout doesn't reliably detect lock release, so we use
-      # shorter timeouts with application-level retry for better responsiveness
-      MAX_RETRIES = 5
-      RETRY_BASE_DELAY = 0.1 # seconds, with exponential backoff
-      # Execute a block with retry logic for busy/locked errors
-      # This handles concurrent access from multiple hook processes
-      def with_retry(operation_name = "database operation")
-        retries = 0
-        begin
-          yield
-        rescue Sequel::DatabaseError, Extralite::Error, Extralite::BusyError => e
-          if retryable_error?(e) && retries < MAX_RETRIES
-            retries += 1
-            delay = RETRY_BASE_DELAY * (2**retries) # Exponential backoff
-            sleep(delay)
-            retry
-          end
-          raise
-        end
-      end
-      # Execute a transaction with retry logic for concurrent access
-      # Use this instead of @db.transaction when concurrent writes are expected
-      def transaction_with_retry(&block)
-        with_retry("transaction") do
-          @db.transaction(&block)
-        end
-      end
-      private
-      def retryable_error?(error)
-        message = error.message.downcase
-        message.include?("busy") || message.include?("locked")
-      end
-      def connect_database(db_path)
-        retries = 0
-        begin
-          Sequel.connect(
-            "extralite:#{db_path}",
-            # Use shorter busy_timeout since we handle retry at app level
-            # This allows faster detection of lock release between retries
-            connect_sqls: [
-              "PRAGMA busy_timeout = 1000",
-              "PRAGMA journal_mode = WAL",
-              "PRAGMA synchronous = NORMAL"
-            ]
-          )
-        rescue Sequel::DatabaseConnectionError, Extralite::Error => e
-          retries += 1
-          if retries <= MAX_RETRIES && retryable_error?(e)
-            sleep(RETRY_BASE_DELAY * (2**retries))
-            retry
-          end
-          raise
-        end
-      end
-      public
+      # Disconnect from the database.
+      # @return [void]
       def close
         @db.disconnect
       end
+      # Lazily-initialized vector index for semantic search.
+      # @return [Index::VectorIndex]
       def vector_index
         @vector_index ||= Index::VectorIndex.new(self)
       end
-      # Checkpoint the WAL file to prevent unlimited growth
-      # This truncates the WAL after checkpointing
-      # Should be called periodically during maintenance/sweep operations
+      # Checkpoint the WAL file to prevent unlimited growth.
+      # @return [void]
       def checkpoint_wal
         @db.run("PRAGMA wal_checkpoint(TRUNCATE)")
       end
+      # Current schema version stored in the meta table.
+      # @return [Integer, nil]
       def schema_version
         @db[:meta].where(key: "schema_version").get(:value)&.to_i
       end
-      def content_items
-        @db[:content_items]
-      end
+      # --- Table accessors ---
+      # Each returns a {Sequel::Dataset} bound to the corresponding table.
-      def delta_cursors
-        @db[:delta_cursors]
-      end
+      # @return [Sequel::Dataset]
+      def content_items = @db[:content_items]
-      def entities
-        @db[:entities]
-      end
+      # @return [Sequel::Dataset]
+      def delta_cursors = @db[:delta_cursors]
-      def entity_aliases
-        @db[:entity_aliases]
-      end
+      # @return [Sequel::Dataset]
+      def entities = @db[:entities]
-      def facts
-        @db[:facts]
-      end
+      # @return [Sequel::Dataset]
+      def entity_aliases = @db[:entity_aliases]
-      def provenance
-        @db[:provenance]
-      end
+      # @return [Sequel::Dataset]
+      def facts = @db[:facts]
-      def fact_links
-        @db[:fact_links]
-      end
+      # @return [Sequel::Dataset]
+      def provenance = @db[:provenance]
-      def conflicts
-        @db[:conflicts]
-      end
+      # @return [Sequel::Dataset]
+      def fact_links = @db[:fact_links]
-      def tool_calls
-        @db[:tool_calls]
-      end
+      # @return [Sequel::Dataset]
+      def conflicts = @db[:conflicts]
-      def operation_progress
-        @db[:operation_progress]
-      end
+      # @return [Sequel::Dataset]
+      def tool_calls = @db[:tool_calls]
-      def schema_health
-        @db[:schema_health]
-      end
+      # @return [Sequel::Dataset]
+      def operation_progress = @db[:operation_progress]
-      def ingestion_metrics
-        @db[:ingestion_metrics]
-      end
+      # @return [Sequel::Dataset]
+      def schema_health = @db[:schema_health]
+      # @return [Sequel::Dataset]
+      def ingestion_metrics = @db[:ingestion_metrics]
-      def llm_cache
-        @db[:llm_cache]
+      # @return [Sequel::Dataset]
+      def llm_cache = @db[:llm_cache]
+      # @return [Sequel::Dataset]
+      def mcp_tool_calls = @db[:mcp_tool_calls]
+      # Record a single MCP tool invocation for telemetry.
+      # Inserts synchronously; callers wrap in with_retry at the call site
+      # if needed.
+      #
+      # @param tool_name [String] name of the MCP tool invoked
+      # @param duration_ms [Integer] execution time in milliseconds
+      # @param result_count [Integer, nil] number of results returned
+      # @param scope [String, nil] "global" or "project"
+      # @param error_class [String, nil] error class name if the call failed
+      # @param called_at [String, nil] ISO 8601 timestamp (defaults to now UTC)
+      # @return [Integer] inserted row id
+      def insert_mcp_tool_call(tool_name:, duration_ms:, result_count: nil, scope: nil, error_class: nil, called_at: nil)
+        mcp_tool_calls.insert(
+          tool_name: tool_name,
+          called_at: called_at || Time.now.utc.iso8601,
+          duration_ms: duration_ms,
+          result_count: result_count,
+          scope: scope,
+          error_class: error_class
+        )
       end
+      # --- Content items ---
+      # Insert a content item or return the existing id if a duplicate
+      # (same text_hash + session_id) already exists. Wrapped in retry logic.
+      #
+      # @param source [String] origin type (e.g. "transcript", "hook")
+      # @param text_hash [String] SHA-256 hex digest of the raw text
+      # @param byte_len [Integer] byte length of the raw text
+      # @param session_id [String, nil] Claude Code session identifier
+      # @param transcript_path [String, nil] filesystem path to the transcript file
+      # @param project_path [String, nil] project directory path
+      # @param occurred_at [String, nil] ISO 8601 timestamp (defaults to now UTC)
+      # @param raw_text [String, nil] original text content
+      # @param metadata [Hash, nil] additional metadata stored as JSON
+      # @param git_branch [String, nil] active git branch at ingestion time
+      # @param cwd [String, nil] working directory at ingestion time
+      # @param claude_version [String, nil] Claude Code version string
+      # @param thinking_level [String, nil] thinking level setting
+      # @param source_mtime [String, nil] ISO 8601 mtime of the source file
+      # @return [Integer] content item row id (existing or newly inserted)
       def upsert_content_item(source:, text_hash:, byte_len:, session_id: nil, transcript_path: nil,
         project_path: nil, occurred_at: nil, raw_text: nil, metadata: nil,
         git_branch: nil, cwd: nil, claude_version: nil, thinking_level: nil, source_mtime: nil)
@@ -183,12 +171,31 @@ module ClaudeMemory
         end
       end
+      # Fetch a single content item by primary key.
+      # @param id [Integer] content item id
+      # @return [Hash, nil]
+      def get_content_item(id)
+        content_items.where(id: id).first
+      end
+      # Find a content item by transcript path and source modification time.
+      # @param transcript_path [String] filesystem path to the transcript
+      # @param mtime_iso8601 [String] ISO 8601 modification timestamp
+      # @return [Hash, nil]
       def content_item_by_transcript_and_mtime(transcript_path, mtime_iso8601)
         content_items
           .where(transcript_path: transcript_path, source_mtime: mtime_iso8601)
           .first
       end
+      # --- Tool calls ---
+      # Bulk-insert tool call records for a content item.
+      # @param content_item_id [Integer] owning content item id
+      # @param tool_calls_data [Array<Hash>] tool call hashes with keys
+      #   :tool_name, :tool_input, :tool_result, :compressed_summary,
+      #   :is_error, :timestamp
+      # @return [void]
       def insert_tool_calls(content_item_id, tool_calls_data)
         tool_calls_data.each do |tc|
           tool_calls.insert(
@@ -203,6 +210,9 @@ module ClaudeMemory
         end
       end
+      # Retrieve tool calls for a content item, ordered by timestamp.
+      # @param content_item_id [Integer] content item id
+      # @return [Array<Hash>]
       def tool_calls_for_content_item(content_item_id)
         tool_calls
           .where(content_item_id: content_item_id)
@@ -210,10 +220,21 @@ module ClaudeMemory
           .all
       end
+      # --- Delta cursors ---
+      # Get the last-read byte offset for a session/transcript pair.
+      # @param session_id [String] session identifier
+      # @param transcript_path [String] transcript file path
+      # @return [Integer, nil] byte offset, or nil if no cursor exists
       def get_delta_cursor(session_id, transcript_path)
         delta_cursors.where(session_id: session_id, transcript_path: transcript_path).get(:last_byte_offset)
       end
+      # Create or update the byte-offset cursor for a session/transcript pair.
+      # @param session_id [String] session identifier
+      # @param transcript_path [String] transcript file path
+      # @param offset [Integer] new byte offset
+      # @return [void]
       def update_delta_cursor(session_id, transcript_path, offset)
         now = Time.now.utc.iso8601
         delta_cursors
@@ -229,6 +250,12 @@ module ClaudeMemory
           )
       end
+      # --- Entities ---
+      # Find an entity by its slug or create a new one.
+      # @param type [String] entity type (e.g. "database", "framework", "person")
+      # @param name [String] canonical entity name
+      # @return [Integer] entity row id
       def find_or_create_entity(type:, name:)
         slug = slugify(type, name)
         existing = entities.where(slug: slug).get(:id)
@@ -238,6 +265,23 @@ module ClaudeMemory
         entities.insert(type: type, canonical_name: name, slug: slug, created_at: now)
       end
+      # --- Facts ---
+      # Insert a new fact (subject-predicate-object triple) with an auto-generated docid.
+      #
+      # @param subject_entity_id [Integer] entity id for the subject
+      # @param predicate [String] predicate label (e.g. "uses_database", "depends_on")
+      # @param object_entity_id [Integer, nil] entity id for the object (if entity-valued)
+      # @param object_literal [String, nil] literal value for the object
+      # @param datatype [String, nil] datatype hint for the object literal
+      # @param polarity [String] "positive" or "negative"
+      # @param valid_from [String, nil] ISO 8601 validity start (defaults to now UTC)
+      # @param status [String] fact status ("active", "superseded", "rejected")
+      # @param confidence [Float] confidence score 0.0..1.0
+      # @param created_from [String, nil] provenance tag (e.g. "promoted:path:id")
+      # @param scope [String] "global" or "project"
+      # @param project_path [String, nil] project directory for project-scoped facts
+      # @return [Integer] inserted fact row id
       def insert_fact(subject_entity_id:, predicate:, object_entity_id: nil, object_literal: nil,
         datatype: nil, polarity: "positive", valid_from: nil, status: "active",
         confidence: 1.0, created_from: nil, scope: "project", project_path: nil)
@@ -261,10 +305,24 @@ module ClaudeMemory
         )
       end
+      # Look up a fact by its short document identifier.
+      # @param docid [String] 8-character hex document id
+      # @return [Hash, nil]
       def find_fact_by_docid(docid)
         facts.where(docid: docid).first
       end
+      # Selectively update one or more fields on a fact.
+      # Only provided (non-nil) keyword arguments are written. Setting scope
+      # to "global" automatically clears project_path.
+      #
+      # @param fact_id [Integer] fact row id
+      # @param status [String, nil] new status value
+      # @param valid_to [String, nil] ISO 8601 end-of-validity timestamp
+      # @param scope [String, nil] "global" or "project"
+      # @param project_path [String, nil] project directory (cleared when scope is "global")
+      # @param embedding [Array<Float>, nil] embedding vector to store as JSON
+      # @return [Boolean] true if any fields were updated, false if all args were nil
       def update_fact(fact_id, status: nil, valid_to: nil, scope: nil, project_path: nil, embedding: nil)
         updates = {}
         updates[:status] = status if status
@@ -285,10 +343,53 @@ module ClaudeMemory
         true
       end
+      # Overwrite the embedding vector for a fact.
+      # @param fact_id [Integer] fact row id
+      # @param embedding_vector [Array<Float>] embedding to store as JSON
+      # @return [void]
       def update_fact_embedding(fact_id, embedding_vector)
         facts.where(id: fact_id).update(embedding_json: embedding_vector.to_json)
       end
+      # Reject a fact as incorrect (e.g. a distiller hallucination).
+      # Sets status to "rejected", closes any open conflicts involving
+      # the fact, and records the reason in conflict notes when provided.
+      # All updates run in a single transaction.
+      #
+      # @param fact_id [Integer] fact row id to reject
+      # @param reason [String, nil] optional rejection reason appended to conflict notes
+      # @return [Hash, nil] +{rejected: true, conflicts_resolved: Integer}+
+      #   or nil if the fact does not exist
+      def reject_fact(fact_id, reason: nil)
+        row = facts.where(id: fact_id).first
+        return nil unless row
+        now = Time.now.utc.iso8601
+        resolved = 0
+        @db.transaction do
+          facts.where(id: fact_id).update(status: "rejected", valid_to: now)
+          open_conflict_rows = conflicts
+            .where(status: "open")
+            .where { (fact_a_id =~ fact_id) | (fact_b_id =~ fact_id) }
+            .all
+          open_conflict_rows.each do |conflict|
+            suffix = reason ? " | resolved: rejected fact #{fact_id} (#{reason})" : " | resolved: rejected fact #{fact_id}"
+            notes = "#{conflict[:notes]}#{suffix}"
+            conflicts.where(id: conflict[:id]).update(status: "resolved", notes: notes)
+          end
+          resolved = open_conflict_rows.size
+        end
+        {rejected: true, conflicts_resolved: resolved}
+      end
+      # Retrieve active facts that have stored embeddings.
+      # @param limit [Integer] maximum rows to return
+      # @return [Array<Hash>] fact rows with :id, :subject_entity_id,
+      #   :predicate, :object_literal, :embedding_json, :scope
       def facts_with_embeddings(limit: 1000)
         facts
           .where(Sequel.~(embedding_json: nil))
@@ -298,6 +399,12 @@ module ClaudeMemory
           .all
       end
+      # Find all facts for a given subject + predicate combination (a "slot").
+      # Used by the resolver to detect supersession and conflicts.
+      # @param subject_entity_id [Integer] subject entity id
+      # @param predicate [String] predicate label
+      # @param status [String] filter by status (default: "active")
+      # @return [Array<Hash>]
       def facts_for_slot(subject_entity_id, predicate, status: "active")
         facts
           .where(subject_entity_id: subject_entity_id, predicate: predicate, status: status)
@@ -307,6 +414,18 @@ module ClaudeMemory
           .all
       end
+      # --- Provenance ---
+      # Record a provenance link between a fact and its source evidence.
+      #
+      # @param fact_id [Integer] fact row id
+      # @param content_item_id [Integer, nil] source content item id
+      # @param quote [String, nil] verbatim quote from the source
+      # @param attribution_entity_id [Integer, nil] entity who stated the fact
+      # @param strength [String] evidence strength ("stated", "inferred", "derived")
+      # @param line_start [Integer, nil] starting line in source content
+      # @param line_end [Integer, nil] ending line in source content
+      # @return [Integer] inserted provenance row id
       def insert_provenance(fact_id:, content_item_id: nil, quote: nil, attribution_entity_id: nil, strength: "stated",
         line_start: nil, line_end: nil)
         provenance.insert(
@@ -320,10 +439,21 @@ module ClaudeMemory
         )
       end
+      # Retrieve all provenance records for a given fact.
+      # @param fact_id [Integer] fact row id
+      # @return [Array<Hash>]
       def provenance_for_fact(fact_id)
         provenance.where(fact_id: fact_id).all
       end
+      # --- Conflicts & fact links ---
+      # Record a conflict between two facts.
+      # @param fact_a_id [Integer] first conflicting fact id
+      # @param fact_b_id [Integer] second conflicting fact id
+      # @param status [String] conflict status ("open" or "resolved")
+      # @param notes [String, nil] human-readable notes about the conflict
+      # @return [Integer] inserted conflict row id
       def insert_conflict(fact_a_id:, fact_b_id:, status: "open", notes: nil)
         now = Time.now.utc.iso8601
         conflicts.insert(
@@ -335,21 +465,55 @@ module ClaudeMemory
         )
       end
+      # Retrieve all unresolved conflicts.
+      # @return [Array<Hash>]
       def open_conflicts
         conflicts.where(status: "open").all
       end
+      # Create a directional link between two facts (e.g. supersession).
+      # @param from_fact_id [Integer] source fact id
+      # @param to_fact_id [Integer] target fact id
+      # @param link_type [String] relationship type (e.g. "supersedes", "conflicts_with")
+      # @return [Integer] inserted fact_link row id
       def insert_fact_link(from_fact_id:, to_fact_id:, link_type:)
         fact_links.insert(from_fact_id: from_fact_id, to_fact_id: to_fact_id, link_type: link_type)
       end
-      # Record token usage metrics for a distillation operation
-      #
-      # @param content_item_id [Integer] The content item that was distilled
-      # @param input_tokens [Integer] Tokens sent to the API
-      # @param output_tokens [Integer] Tokens returned from the API
-      # @param facts_extracted [Integer] Number of facts extracted
-      # @return [Integer] The created metric record ID
+      # --- Ingestion metrics ---
+      # Fetch content items that have not yet been distilled, ordered newest first.
+      # @param limit [Integer] maximum rows to return
+      # @param min_length [Integer] minimum byte_len threshold
+      # @return [Array<Hash>]
+      def undistilled_content_items(limit: 3, min_length: 200)
+        content_items
+          .left_join(:ingestion_metrics, content_item_id: :id)
+          .where(Sequel[:ingestion_metrics][:id] => nil)
+          .where { byte_len >= min_length }
+          .order(Sequel.desc(:occurred_at))
+          .limit(limit)
+          .select_all(:content_items)
+          .all
+      end
+      # Count content items that have not yet been distilled.
+      # @param min_length [Integer] minimum byte_len threshold
+      # @return [Integer]
+      def count_undistilled(min_length: 200)
+        content_items
+          .left_join(:ingestion_metrics, content_item_id: :id)
+          .where(Sequel[:ingestion_metrics][:id] => nil)
+          .where { byte_len >= min_length }
+          .count
+      end
+      # Record token usage and extraction counts for a distillation run.
+      # @param content_item_id [Integer] content item that was distilled
+      # @param input_tokens [Integer] LLM input tokens consumed
+      # @param output_tokens [Integer] LLM output tokens consumed
+      # @param facts_extracted [Integer] number of facts extracted
+      # @return [Integer] inserted row id
       def record_ingestion_metrics(content_item_id:, input_tokens:, output_tokens:, facts_extracted:)
         ingestion_metrics.insert(
           content_item_id: content_item_id,
@@ -360,14 +524,8 @@ module ClaudeMemory
         )
       end
-      # Get aggregate metrics across all distillation operations
-      #
-      # @return [Hash] Aggregated metrics with keys:
-      #   - total_input_tokens: Total tokens sent to API
-      #   - total_output_tokens: Total tokens returned from API
-      #   - total_facts_extracted: Total facts extracted
-      #   - total_operations: Number of distillation operations
-      #   - avg_facts_per_1k_input_tokens: Average efficiency metric
+      # Compute aggregate ingestion metrics across all distillation runs.
+      # @return [Hash, nil] totals and efficiency ratio, or nil if no data
       def aggregate_ingestion_metrics
         # standard:disable Performance/Detect (Sequel DSL requires .select{}.first)
         result = ingestion_metrics
@@ -400,23 +558,48 @@ module ClaudeMemory
         }
       end
-      # Look up a cached LLM response by cache key
-      #
-      # @param cache_key [String] SHA256 hex digest of operation+model+input
-      # @return [Hash, nil] Cached result row or nil
+      # Mark all undistilled content items as distilled with zero token counts.
+      # Used for backfilling legacy content that predates the metrics table.
+      # @return [Integer] number of items backfilled
+      def backfill_distillation_metrics!
+        undistilled_ids = content_items
+          .left_join(:ingestion_metrics, content_item_id: :id)
+          .where(Sequel[:ingestion_metrics][:id] => nil)
+          .select_map(Sequel[:content_items][:id])
+        return 0 if undistilled_ids.empty?
+        now = Time.now.utc.iso8601
+        undistilled_ids.each do |cid|
+          ingestion_metrics.insert(
+            content_item_id: cid,
+            input_tokens: 0,
+            output_tokens: 0,
+            facts_extracted: 0,
+            created_at: now
+          )
+        end
+        undistilled_ids.size
+      end
+      # --- LLM cache ---
+      # Look up a cached LLM result by its cache key.
+      # @param cache_key [String] SHA-256 hex cache key
+      # @return [Hash, nil]
       def llm_cache_lookup(cache_key)
         llm_cache.where(cache_key: cache_key).first
       end
-      # Store an LLM response in the cache
-      #
-      # @param operation [String] Operation type (e.g., "distill", "extract")
-      # @param model [String] Model identifier
-      # @param input_hash [String] SHA256 of input content
-      # @param result_json [String] JSON response to cache
-      # @param input_tokens [Integer, nil] Tokens in request
-      # @param output_tokens [Integer, nil] Tokens in response
-      # @return [Integer] The created cache entry ID
+      # Store or update a cached LLM result. Uses upsert on the cache_key.
+      # @param operation [String] operation name (e.g. "distill", "embed")
+      # @param model [String] model identifier
+      # @param input_hash [String] SHA-256 hex digest of the input
+      # @param result_json [String] JSON-serialized result
+      # @param input_tokens [Integer, nil] input tokens consumed
+      # @param output_tokens [Integer, nil] output tokens consumed
+      # @return [void]
       def llm_cache_store(operation:, model:, input_hash:, result_json:, input_tokens: nil, output_tokens: nil)
         cache_key = Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
@@ -439,97 +622,47 @@ module ClaudeMemory
           )
       end
-      # Generate a cache key for LLM response lookup
-      #
-      # @param operation [String] Operation type
-      # @param model [String] Model identifier
-      # @param input [String] Raw input content
-      # @return [String] SHA256 hex digest cache key
+      # Compute the cache key for an LLM operation.
+      # @param operation [String] operation name
+      # @param model [String] model identifier
+      # @param input [String] raw input text
+      # @return [String] SHA-256 hex cache key
       def llm_cache_key(operation, model, input)
         input_hash = Digest::SHA256.hexdigest(input)
         Digest::SHA256.hexdigest("#{operation}:#{model}:#{input_hash}")
       end
-      # Prune cache entries older than the given age
-      #
-      # @param max_age_seconds [Integer] Maximum age in seconds (default: 7 days)
-      # @return [Integer] Number of entries pruned
+      # Delete LLM cache entries older than the given age.
+      # @param max_age_seconds [Integer] maximum age in seconds (default: 7 days)
+      # @return [Integer] number of rows deleted
       def llm_cache_prune(max_age_seconds: 604_800)
         cutoff = (Time.now - max_age_seconds).utc.iso8601
         llm_cache.where { created_at < cutoff }.delete
       end
-      private
-      def ensure_schema!
-        migrations_path = File.expand_path("../../../db/migrations", __dir__)
-        # Handle backward compatibility: databases created with old migration system
-        sync_legacy_schema_version!
-        # Skip migration if the database is already ahead of this gem's version.
-        # This happens when a newer gem version migrated the DB and an older
-        # installed gem (e.g. via hooks) tries to open it.
-        current = current_schema_version
-        return if current && current > SCHEMA_VERSION
-        # Run Sequel migrations to bring database to target version
-        Sequel::Migrator.run(@db, migrations_path, target: SCHEMA_VERSION)
-        # Set created_at timestamp on first initialization
-        set_meta("created_at", Time.now.utc.iso8601) unless get_meta("created_at")
-        # Sync legacy schema_version meta key with Sequel's schema_info
-        # This maintains backwards compatibility with code that reads schema_version
-        sequel_version = @db[:schema_info].get(:version) if @db.table_exists?(:schema_info)
-        set_meta("schema_version", sequel_version.to_s) if sequel_version
-      end
-      # Sync legacy schema_version from meta table to Sequel's schema_info
-      # Handles two cases:
-      # 1. No schema_info table exists (old system, pre-Sequel migrations)
-      # 2. schema_info exists but is out of sync with meta.schema_version
-      def sync_legacy_schema_version!
-        return unless @db.table_exists?(:meta)
-        meta_version = get_meta("schema_version")&.to_i
-        return unless meta_version && meta_version >= 2
-        # Verify database actually has v2+ schema (defensive check)
-        columns = @db.schema(:content_items).map(&:first) if @db.table_exists?(:content_items)
-        return unless columns&.include?(:project_path)
-        # Create or update schema_info to match meta.schema_version
-        @db.create_table?(:schema_info) do
-          Integer :version, null: false, default: 0
-        end
-        sequel_version = @db[:schema_info].get(:version)
-        if sequel_version.nil? || sequel_version < meta_version
-          # Update schema_info to match meta (old system's version)
-          @db[:schema_info].delete
-          @db[:schema_info].insert(version: meta_version)
-        end
-      end
-      def current_schema_version
-        return nil unless @db.table_exists?(:schema_info)
-        @db[:schema_info].get(:version)
-      end
+      # --- Meta ---
+      # Set a key-value pair in the meta table (upsert).
+      # @param key [String] metadata key
+      # @param value [String] metadata value
+      # @return [void]
       def set_meta(key, value)
         @db[:meta].insert_conflict(target: :key, update: {value: value}).insert(key: key, value: value)
       end
+      # Retrieve a value from the meta table.
+      # @param key [String] metadata key
+      # @return [String, nil]
       def get_meta(key)
         @db[:meta].where(key: key).get(:value)
       end
+      private
       def generate_docid(subject_entity_id, predicate, object_literal, created_at)
         input = "#{subject_entity_id}:#{predicate}:#{object_literal}:#{created_at}"
         docid = Digest::SHA256.hexdigest(input)[0, 8]
-        # Handle unlikely collisions by rehashing with a counter
         counter = 0
         while facts.where(docid: docid).any?
           counter += 1