RubyGems - htm - Versions diffs - 0.0.2 → 0.0.10 - Mend

htm 0.0.2 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

checksums.yaml +4 -4
data/.aigcm_msg +1 -0
data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
data/.claude/settings.local.json +92 -0
data/.irbrc +283 -80
data/.tbls.yml +2 -1
data/CHANGELOG.md +294 -26
data/CLAUDE.md +603 -0
data/README.md +76 -5
data/Rakefile +5 -0
data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
data/db/migrate/00002_create_robots.rb +11 -0
data/db/migrate/00003_create_file_sources.rb +20 -0
data/db/migrate/00004_create_nodes.rb +65 -0
data/db/migrate/00005_create_tags.rb +13 -0
data/db/migrate/00006_create_node_tags.rb +18 -0
data/db/migrate/00007_create_robot_nodes.rb +26 -0
data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
data/db/schema.sql +172 -1
data/docs/api/database.md +1 -2
data/docs/api/htm.md +197 -2
data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
data/docs/api/yard/HTM/AuthorizationError.md +11 -0
data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
data/docs/api/yard/HTM/Configuration.md +175 -0
data/docs/api/yard/HTM/Database.md +99 -0
data/docs/api/yard/HTM/DatabaseError.md +14 -0
data/docs/api/yard/HTM/EmbeddingError.md +18 -0
data/docs/api/yard/HTM/EmbeddingService.md +58 -0
data/docs/api/yard/HTM/Error.md +11 -0
data/docs/api/yard/HTM/JobAdapter.md +39 -0
data/docs/api/yard/HTM/LongTermMemory.md +342 -0
data/docs/api/yard/HTM/NotFoundError.md +17 -0
data/docs/api/yard/HTM/Observability.md +107 -0
data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
data/docs/api/yard/HTM/Railtie.md +27 -0
data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
data/docs/api/yard/HTM/TagError.md +18 -0
data/docs/api/yard/HTM/TagService.md +67 -0
data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
data/docs/api/yard/HTM/Timeframe.md +40 -0
data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
data/docs/api/yard/HTM/ValidationError.md +20 -0
data/docs/api/yard/HTM/WorkingMemory.md +131 -0
data/docs/api/yard/HTM.md +80 -0
data/docs/api/yard/index.csv +179 -0
data/docs/api/yard-reference.md +51 -0
data/docs/database/README.md +128 -128
data/docs/database/public.file_sources.md +42 -0
data/docs/database/public.file_sources.svg +211 -0
data/docs/database/public.node_tags.md +4 -4
data/docs/database/public.node_tags.svg +212 -79
data/docs/database/public.nodes.md +22 -12
data/docs/database/public.nodes.svg +246 -127
data/docs/database/public.robot_nodes.md +11 -9
data/docs/database/public.robot_nodes.svg +220 -98
data/docs/database/public.robots.md +2 -2
data/docs/database/public.robots.svg +136 -81
data/docs/database/public.tags.md +3 -3
data/docs/database/public.tags.svg +118 -39
data/docs/database/schema.json +850 -771
data/docs/database/schema.svg +256 -197
data/docs/development/schema.md +67 -2
data/docs/guides/adding-memories.md +93 -7
data/docs/guides/recalling-memories.md +36 -1
data/examples/README.md +280 -0
data/examples/cli_app/htm_cli.rb +65 -5
data/examples/cli_app/temp.log +93 -0
data/examples/file_loader_usage.rb +177 -0
data/examples/robot_groups/lib/robot_group.rb +419 -0
data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
data/examples/robot_groups/multi_process.rb +286 -0
data/examples/robot_groups/robot_worker.rb +136 -0
data/examples/robot_groups/same_process.rb +229 -0
data/examples/timeframe_demo.rb +276 -0
data/lib/htm/active_record_config.rb +1 -1
data/lib/htm/circuit_breaker.rb +202 -0
data/lib/htm/configuration.rb +59 -13
data/lib/htm/database.rb +67 -36
data/lib/htm/embedding_service.rb +39 -2
data/lib/htm/errors.rb +131 -11
data/lib/htm/jobs/generate_embedding_job.rb +5 -4
data/lib/htm/jobs/generate_tags_job.rb +4 -0
data/lib/htm/loaders/markdown_loader.rb +263 -0
data/lib/htm/loaders/paragraph_chunker.rb +112 -0
data/lib/htm/long_term_memory.rb +460 -343
data/lib/htm/models/file_source.rb +99 -0
data/lib/htm/models/node.rb +80 -5
data/lib/htm/models/robot.rb +24 -1
data/lib/htm/models/robot_node.rb +1 -0
data/lib/htm/models/tag.rb +254 -4
data/lib/htm/observability.rb +395 -0
data/lib/htm/tag_service.rb +60 -3
data/lib/htm/tasks.rb +26 -1
data/lib/htm/timeframe.rb +194 -0
data/lib/htm/timeframe_extractor.rb +307 -0
data/lib/htm/version.rb +1 -1
data/lib/htm/working_memory.rb +165 -70
data/lib/htm.rb +328 -130
data/lib/tasks/doc.rake +300 -0
data/lib/tasks/files.rake +299 -0
data/lib/tasks/htm.rake +158 -3
data/lib/tasks/jobs.rake +3 -9
data/lib/tasks/tags.rake +166 -6
data/mkdocs.yml +36 -1
data/notes/ARCHITECTURE_REVIEW.md +1167 -0
data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
data/notes/next_steps.md +100 -0
data/notes/plan.md +627 -0
data/notes/tag_ontology_enhancement_ideas.md +222 -0
data/notes/timescaledb_removal_summary.md +200 -0
metadata +125 -15
data/db/migrate/20250101000002_create_robots.rb +0 -14
data/db/migrate/20250101000003_create_nodes.rb +0 -42
data/db/migrate/20250101000005_create_tags.rb +0 -38
data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
data/db/migrate/20250126000001_create_working_memories.rb +0 -19
data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
data/docs/database/public.working_memories.md +0 -40
data/docs/database/public.working_memories.svg +0 -112
data/lib/htm/models/working_memory_entry.rb +0 -88

data/lib/htm/long_term_memory.rb CHANGED Viewed

@@ -25,6 +25,23 @@ class HTM
     attr_reader :query_timeout
+    # Initialize long-term memory storage
+    #
+    # @param config [Hash] Database configuration (host, port, dbname, user, password)
+    # @param pool_size [Integer, nil] Connection pool size (uses ActiveRecord default if nil)
+    # @param query_timeout [Integer] Query timeout in milliseconds (default: 30000)
+    # @param cache_size [Integer] Number of query results to cache (default: 1000, use 0 to disable)
+    # @param cache_ttl [Integer] Cache time-to-live in seconds (default: 300)
+    #
+    # @example Initialize with defaults
+    #   ltm = LongTermMemory.new(HTM::Database.default_config)
+    #
+    # @example Initialize with custom cache settings
+    #   ltm = LongTermMemory.new(config, cache_size: 500, cache_ttl: 600)
+    #
+    # @example Disable caching
+    #   ltm = LongTermMemory.new(config, cache_size: 0)
+    #
     def initialize(config, pool_size: nil, query_timeout: DEFAULT_QUERY_TIMEOUT, cache_size: DEFAULT_CACHE_SIZE, cache_ttl: DEFAULT_CACHE_TTL)
       @config = config
       @query_timeout = query_timeout  # in milliseconds
@@ -36,6 +53,7 @@ class HTM
       if cache_size > 0
         @query_cache = LruRedux::TTL::ThreadSafeCache.new(cache_size, cache_ttl)
         @cache_stats = { hits: 0, misses: 0 }
+        @cache_stats_mutex = Mutex.new  # Thread-safety for cache statistics
       end
     end
@@ -48,60 +66,71 @@ class HTM
     # @param token_count [Integer] Token count
     # @param robot_id [Integer] Robot identifier
     # @param embedding [Array<Float>, nil] Pre-generated embedding vector
+    # @param metadata [Hash] Flexible metadata for the node (default: {})
     # @return [Hash] { node_id:, is_new:, robot_node: }
     #
-    def add(content:, token_count: 0, robot_id:, embedding: nil)
+    def add(content:, token_count: 0, robot_id:, embedding: nil, metadata: {})
       content_hash = HTM::Models::Node.generate_content_hash(content)
-      # Check for existing node with same content
-      existing_node = HTM::Models::Node.find_by(content_hash: content_hash)
-      if existing_node
-        # Link robot to existing node (or update if already linked)
-        robot_node = link_robot_to_node(robot_id: robot_id, node: existing_node)
+      # Wrap in transaction to ensure data consistency
+      ActiveRecord::Base.transaction do
+        # Check for existing node with same content (including soft-deleted)
+        # This avoids unique constraint violations on content_hash
+        existing_node = HTM::Models::Node.with_deleted.find_by(content_hash: content_hash)
-        # Update the node's updated_at timestamp
-        existing_node.touch
-        {
-          node_id: existing_node.id,
-          is_new: false,
-          robot_node: robot_node
-        }
-      else
-        # Prepare embedding if provided
-        embedding_str = nil
-        if embedding
-          # Pad embedding to 2000 dimensions if needed
-          actual_dimension = embedding.length
-          padded_embedding = if actual_dimension < 2000
-            embedding + Array.new(2000 - actual_dimension, 0.0)
-          else
-            embedding
-          end
-          embedding_str = "[#{padded_embedding.join(',')}]"
+        # If found but soft-deleted, restore it
+        if existing_node&.deleted?
+          existing_node.restore!
+          HTM.logger.info "Restored soft-deleted node #{existing_node.id} for content match"
         end
-        # Create new node
-        node = HTM::Models::Node.create!(
-          content: content,
-          content_hash: content_hash,
-          token_count: token_count,
-          embedding: embedding_str,
-          embedding_dimension: embedding&.length
-        )
+        if existing_node
+          # Link robot to existing node (or update if already linked)
+          robot_node = link_robot_to_node(robot_id: robot_id, node: existing_node)
-        # Link robot to new node
-        robot_node = link_robot_to_node(robot_id: robot_id, node: node)
+          # Update the node's updated_at timestamp
+          existing_node.touch
-        # Invalidate cache since database content changed
-        invalidate_cache!
+          {
+            node_id: existing_node.id,
+            is_new: false,
+            robot_node: robot_node
+          }
+        else
+          # Prepare embedding if provided
+          embedding_str = nil
+          if embedding
+            # Pad embedding to 2000 dimensions if needed
+            actual_dimension = embedding.length
+            padded_embedding = if actual_dimension < 2000
+              embedding + Array.new(2000 - actual_dimension, 0.0)
+            else
+              embedding
+            end
+            embedding_str = "[#{padded_embedding.join(',')}]"
+          end
-        {
-          node_id: node.id,
-          is_new: true,
-          robot_node: robot_node
-        }
+          # Create new node
+          node = HTM::Models::Node.create!(
+            content: content,
+            content_hash: content_hash,
+            token_count: token_count,
+            embedding: embedding_str,
+            metadata: metadata
+          )
+          # Link robot to new node
+          robot_node = link_robot_to_node(robot_id: robot_id, node: node)
+          # Invalidate cache since database content changed
+          invalidate_cache!
+          {
+            node_id: node.id,
+            is_new: true,
+            robot_node: robot_node
+          }
+        end
       end
     end
@@ -109,14 +138,16 @@ class HTM
     #
     # @param robot_id [Integer] Robot ID
     # @param node [HTM::Models::Node] Node to link
+    # @param working_memory [Boolean] Whether node is in working memory (default: false)
     # @return [HTM::Models::RobotNode] The robot_node link record
     #
-    def link_robot_to_node(robot_id:, node:)
+    def link_robot_to_node(robot_id:, node:, working_memory: false)
       robot_node = HTM::Models::RobotNode.find_by(robot_id: robot_id, node_id: node.id)
       if robot_node
         # Existing link - record that robot remembered this again
         robot_node.record_remember!
+        robot_node.update!(working_memory: working_memory) if working_memory
       else
         # New link
         robot_node = HTM::Models::RobotNode.create!(
@@ -124,7 +155,8 @@ class HTM
           node_id: node.id,
           first_remembered_at: Time.current,
           last_remembered_at: Time.current,
-          remember_count: 1
+          remember_count: 1,
+          working_memory: working_memory
         )
       end
@@ -183,33 +215,17 @@ class HTM
     # Vector similarity search
     #
-    # @param timeframe [Range] Time range to search
+    # @param timeframe [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
     # @param query [String] Search query
     # @param limit [Integer] Maximum results
     # @param embedding_service [Object] Service to generate embeddings
+    # @param metadata [Hash] Filter by metadata fields (default: {})
     # @return [Array<Hash>] Matching nodes
     #
-    def search(timeframe:, query:, limit:, embedding_service:)
-      # Return uncached if cache disabled
-      return search_uncached(timeframe: timeframe, query: query, limit: limit, embedding_service: embedding_service) unless @query_cache
-      # Generate cache key
-      cache_key = cache_key_for(:search, timeframe, query, limit)
-      # Try to get from cache
-      cached = @query_cache[cache_key]
-      if cached
-        @cache_stats[:hits] += 1
-        return cached
+    def search(timeframe:, query:, limit:, embedding_service:, metadata: {})
+      cached_query(:search, timeframe, query, limit, metadata) do
+        search_uncached(timeframe: timeframe, query: query, limit: limit, embedding_service: embedding_service, metadata: metadata)
       end
-      # Cache miss - execute query
-      @cache_stats[:misses] += 1
-      result = search_uncached(timeframe: timeframe, query: query, limit: limit, embedding_service: embedding_service)
-      # Store in cache
-      @query_cache[cache_key] = result
-      result
     end
     # Full-text search
@@ -217,29 +233,13 @@ class HTM
     # @param timeframe [Range] Time range to search
     # @param query [String] Search query
     # @param limit [Integer] Maximum results
+    # @param metadata [Hash] Filter by metadata fields (default: {})
     # @return [Array<Hash>] Matching nodes
     #
-    def search_fulltext(timeframe:, query:, limit:)
-      # Return uncached if cache disabled
-      return search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit) unless @query_cache
-      # Generate cache key
-      cache_key = cache_key_for(:fulltext, timeframe, query, limit)
-      # Try to get from cache
-      cached = @query_cache[cache_key]
-      if cached
-        @cache_stats[:hits] += 1
-        return cached
+    def search_fulltext(timeframe:, query:, limit:, metadata: {})
+      cached_query(:fulltext, timeframe, query, limit, metadata) do
+        search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit, metadata: metadata)
       end
-      # Cache miss - execute query
-      @cache_stats[:misses] += 1
-      result = search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit)
-      # Store in cache
-      @query_cache[cache_key] = result
-      result
     end
     # Hybrid search (full-text + vector)
@@ -249,29 +249,13 @@ class HTM
     # @param limit [Integer] Maximum results
     # @param embedding_service [Object] Service to generate embeddings
     # @param prefilter_limit [Integer] Candidates to consider (default: 100)
+    # @param metadata [Hash] Filter by metadata fields (default: {})
     # @return [Array<Hash>] Matching nodes
     #
-    def search_hybrid(timeframe:, query:, limit:, embedding_service:, prefilter_limit: 100)
-      # Return uncached if cache disabled
-      return search_hybrid_uncached(timeframe: timeframe, query: query, limit: limit, embedding_service: embedding_service, prefilter_limit: prefilter_limit) unless @query_cache
-      # Generate cache key
-      cache_key = cache_key_for(:hybrid, timeframe, query, limit, prefilter_limit)
-      # Try to get from cache
-      cached = @query_cache[cache_key]
-      if cached
-        @cache_stats[:hits] += 1
-        return cached
+    def search_hybrid(timeframe:, query:, limit:, embedding_service:, prefilter_limit: 100, metadata: {})
+      cached_query(:hybrid, timeframe, query, limit, prefilter_limit, metadata) do
+        search_hybrid_uncached(timeframe: timeframe, query: query, limit: limit, embedding_service: embedding_service, prefilter_limit: prefilter_limit, metadata: metadata)
       end
-      # Cache miss - execute query
-      @cache_stats[:misses] += 1
-      result = search_hybrid_uncached(timeframe: timeframe, query: query, limit: limit, embedding_service: embedding_service, prefilter_limit: prefilter_limit)
-      # Store in cache
-      @query_cache[cache_key] = result
-      result
     end
     # Add a tag to a node
@@ -292,15 +276,19 @@ class HTM
     # Mark nodes as evicted from working memory
     #
-    # Working memory state is now tracked per-robot in the working_memories table
-    # (optional persistence). The in-memory WorkingMemory class handles eviction
-    # tracking. This method is retained for API compatibility but is a no-op.
+    # Sets working_memory = false on the robot_nodes join table for the specified
+    # robot and node IDs.
     #
-    # @param node_ids [Array<Integer>] Node IDs (ignored)
+    # @param robot_id [Integer] Robot ID whose working memory is being evicted
+    # @param node_ids [Array<Integer>] Node IDs to mark as evicted
     # @return [void]
     #
-    def mark_evicted(node_ids)
-      # No-op: working memory is tracked in-memory or via WorkingMemoryEntry model
+    def mark_evicted(robot_id:, node_ids:)
+      return if node_ids.empty?
+      HTM::Models::RobotNode
+        .where(robot_id: robot_id, node_id: node_ids)
+        .update_all(working_memory: false)
     end
     # Track access for multiple nodes (bulk operation)
@@ -371,6 +359,17 @@ class HTM
       # This method kept for API compatibility
     end
+    # Clear the query cache
+    #
+    # Call this after any operation that modifies data (soft delete, restore, etc.)
+    # to ensure subsequent queries see fresh results.
+    #
+    # @return [void]
+    #
+    def clear_cache!
+      invalidate_cache!
+    end
     # For backwards compatibility with tests/code that expect pool_size
     def pool_size
       ActiveRecord::Base.connection_pool.size
@@ -421,19 +420,24 @@ class HTM
     # @return [Array<Hash>] Topic relationships
     #
     def topic_relationships(min_shared_nodes: 2, limit: 50)
-      result = ActiveRecord::Base.connection.select_all(
-        <<~SQL,
-          SELECT t1.name AS topic1, t2.name AS topic2, COUNT(DISTINCT nt1.node_id) AS shared_nodes
-          FROM tags t1
-          JOIN node_tags nt1 ON t1.id = nt1.tag_id
-          JOIN node_tags nt2 ON nt1.node_id = nt2.node_id
-          JOIN tags t2 ON nt2.tag_id = t2.id
-          WHERE t1.name < t2.name
-          GROUP BY t1.name, t2.name
-          HAVING COUNT(DISTINCT nt1.node_id) >= #{min_shared_nodes.to_i}
-          ORDER BY shared_nodes DESC
-          LIMIT #{limit.to_i}
-        SQL
+      # Use parameterized query to prevent SQL injection
+      sql = <<~SQL
+        SELECT t1.name AS topic1, t2.name AS topic2, COUNT(DISTINCT nt1.node_id) AS shared_nodes
+        FROM tags t1
+        JOIN node_tags nt1 ON t1.id = nt1.tag_id
+        JOIN node_tags nt2 ON nt1.node_id = nt2.node_id
+        JOIN tags t2 ON nt2.tag_id = t2.id
+        WHERE t1.name < t2.name
+        GROUP BY t1.name, t2.name
+        HAVING COUNT(DISTINCT nt1.node_id) >= $1
+        ORDER BY shared_nodes DESC
+        LIMIT $2
+      SQL
+      result = ActiveRecord::Base.connection.exec_query(
+        sql,
+        'topic_relationships',
+        [[nil, min_shared_nodes.to_i], [nil, limit.to_i]]
       )
       result.to_a
     end
@@ -462,9 +466,10 @@ class HTM
     # @param node [Hash] Node data with similarity, tags, created_at, access_count
     # @param query_tags [Array<String>] Tags associated with the query
     # @param vector_similarity [Float, nil] Pre-computed vector similarity (0-1)
+    # @param node_tags [Array<String>, nil] Pre-loaded tags for this node (avoids N+1 query)
     # @return [Float] Composite relevance score (0-10)
     #
-    def calculate_relevance(node:, query_tags: [], vector_similarity: nil)
+    def calculate_relevance(node:, query_tags: [], vector_similarity: nil, node_tags: nil)
       # 1. Vector similarity (semantic match) - weight: 0.5
       semantic_score = if vector_similarity
         vector_similarity
@@ -475,7 +480,8 @@ class HTM
       end
       # 2. Tag overlap (categorical relevance) - weight: 0.3
-      node_tags = get_node_tags(node['id'])
+      # Use pre-loaded tags if provided, otherwise fetch (for backward compatibility)
+      node_tags ||= get_node_tags(node['id'])
       tag_score = if query_tags.any? && node_tags.any?
         weighted_hierarchical_jaccard(query_tags, node_tags)
       else
@@ -505,41 +511,48 @@ class HTM
     #
     # Returns nodes with calculated relevance scores based on query context
     #
-    # @param timeframe [Range] Time range to search
+    # @param timeframe [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
     # @param query [String, nil] Search query
     # @param query_tags [Array<String>] Tags to match
     # @param limit [Integer] Maximum results
     # @param embedding_service [Object, nil] Service to generate embeddings
+    # @param metadata [Hash] Filter by metadata fields (default: {})
     # @return [Array<Hash>] Nodes with relevance scores
     #
-    def search_with_relevance(timeframe:, query: nil, query_tags: [], limit: 20, embedding_service: nil)
+    def search_with_relevance(timeframe:, query: nil, query_tags: [], limit: 20, embedding_service: nil, metadata: {})
       # Get candidates from appropriate search method
       candidates = if query && embedding_service
         # Vector search
-        search_uncached(timeframe: timeframe, query: query, limit: limit * 2, embedding_service: embedding_service)
+        search_uncached(timeframe: timeframe, query: query, limit: limit * 2, embedding_service: embedding_service, metadata: metadata)
       elsif query
         # Full-text search
-        search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit * 2)
+        search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit * 2, metadata: metadata)
       else
-        # Time-range only
-        HTM::Models::Node
-          .where(created_at: timeframe)
-          .order(created_at: :desc)
-          .limit(limit * 2)
-          .map(&:attributes)
+        # Time-range only (or no filter if timeframe is nil)
+        scope = HTM::Models::Node.where(deleted_at: nil)
+        scope = apply_timeframe_scope(scope, timeframe)
+        scope = apply_metadata_scope(scope, metadata)
+        scope.order(created_at: :desc).limit(limit * 2).map(&:attributes)
       end
+      # Batch load all tags for candidates (fixes N+1 query)
+      node_ids = candidates.map { |n| n['id'] }
+      tags_by_node = batch_load_node_tags(node_ids)
       # Calculate relevance for each candidate
       scored_nodes = candidates.map do |node|
+        node_tags = tags_by_node[node['id']] || []
         relevance = calculate_relevance(
           node: node,
           query_tags: query_tags,
-          vector_similarity: node['similarity']&.to_f
+          vector_similarity: node['similarity']&.to_f,
+          node_tags: node_tags
         )
         node.merge({
           'relevance' => relevance,
-          'tags' => get_node_tags(node['id'])
+          'tags' => node_tags
         })
       end
@@ -559,10 +572,32 @@ class HTM
         .joins(:node_tags)
         .where(node_tags: { node_id: node_id })
         .pluck(:name)
-    rescue
+    rescue ActiveRecord::ActiveRecordError => e
+      HTM.logger.error("Failed to retrieve tags for node #{node_id}: #{e.message}")
       []
     end
+    # Batch load tags for multiple nodes (avoids N+1 queries)
+    #
+    # @param node_ids [Array<Integer>] Node database IDs
+    # @return [Hash<Integer, Array<String>>] Map of node_id to array of tag names
+    #
+    def batch_load_node_tags(node_ids)
+      return {} if node_ids.empty?
+      # Single query to get all tags for all nodes
+      results = HTM::Models::NodeTag
+        .joins(:tag)
+        .where(node_id: node_ids)
+        .pluck(:node_id, 'tags.name')
+      # Group by node_id
+      results.group_by(&:first).transform_values { |pairs| pairs.map(&:last) }
+    rescue ActiveRecord::ActiveRecordError => e
+      HTM.logger.error("Failed to batch load tags: #{e.message}")
+      {}
+    end
     # Search nodes by tags
     #
     # @param tags [Array<String>] Tags to search for
@@ -593,16 +628,22 @@ class HTM
       # Get results
       nodes = query.limit(limit).map(&:attributes)
+      # Batch load all tags for nodes (fixes N+1 query)
+      node_ids = nodes.map { |n| n['id'] }
+      tags_by_node = batch_load_node_tags(node_ids)
       # Calculate relevance and enrich with tags
       nodes.map do |node|
+        node_tags = tags_by_node[node['id']] || []
         relevance = calculate_relevance(
           node: node,
-          query_tags: tags
+          query_tags: tags,
+          node_tags: node_tags
         )
         node.merge({
           'relevance' => relevance,
-          'tags' => get_node_tags(node['id'])
+          'tags' => node_tags
         })
       end.sort_by { |n| -n['relevance'] }
     end
@@ -633,43 +674,207 @@ class HTM
     # Searches the tags table for tags where any hierarchy level matches
     # query words. For example, query "PostgreSQL database" would match
     # tags like "database:postgresql", "database:sql", etc.
+    # Find tags matching a query using semantic extraction
     #
     # @param query [String] Search query
-    # @return [Array<String>] Matching tag names
+    # @param include_extracted [Boolean] If true, returns hash with :extracted and :matched keys
+    # @return [Array<String>] Matching tag names (default)
+    # @return [Hash] If include_extracted: { extracted: [...], matched: [...] }
     #
-    def find_query_matching_tags(query)
-      return [] if query.nil? || query.strip.empty?
+    def find_query_matching_tags(query, include_extracted: false)
+      empty_result = include_extracted ? { extracted: [], matched: [] } : []
+      return empty_result if query.nil? || query.strip.empty?
-      # Extract words from query (lowercase, 3+ chars)
-      words = query.downcase.split(/\s+/).select { |w| w.length >= 3 }
-      return [] if words.empty?
+      # Use the tag extractor to generate semantic tags from the query
+      # This uses the same LLM process as when storing nodes
+      existing_tags = HTM::Models::Tag.pluck(:name).sample(50)
+      extracted_tags = HTM::TagService.extract(query, existing_ontology: existing_tags)
-      # Build LIKE conditions for each word
-      # Match tags where any part of the hierarchy contains the word
-      conditions = words.map { |w| "name ILIKE ?" }
-      values = words.map { |w| "%#{w}%" }
+      if extracted_tags.empty?
+        return include_extracted ? { extracted: [], matched: [] } : []
+      end
-      HTM::Models::Tag
-        .where(conditions.join(' OR '), *values)
-        .pluck(:name)
+      # Step 1: Try exact matches
+      exact_matches = HTM::Models::Tag.where(name: extracted_tags).pluck(:name)
+      if exact_matches.any?
+        return include_extracted ? { extracted: extracted_tags, matched: exact_matches } : exact_matches
+      end
+      # Step 2: Try matching on parent/prefix levels
+      # For "person:human:character:popeye", try "person:human:character", "person:human", "person"
+      prefix_candidates = extracted_tags.flat_map do |tag|
+        levels = tag.split(':')
+        (1...levels.size).map { |i| levels[0, i].join(':') }
+      end.uniq
+      if prefix_candidates.any?
+        prefix_matches = HTM::Models::Tag.where(name: prefix_candidates).pluck(:name)
+        if prefix_matches.any?
+          return include_extracted ? { extracted: extracted_tags, matched: prefix_matches } : prefix_matches
+        end
+      end
+      # Step 3: Try matching individual components, starting from rightmost (most specific)
+      # For "person:human:character:popeye", try "popeye", then "character", then "human", then "person"
+      # Search for tags that contain this component at any level
+      all_components = extracted_tags.flat_map { |tag| tag.split(':') }.uniq
+      # Order by specificity: components that appear at deeper levels first
+      component_depths = Hash.new(0)
+      extracted_tags.each do |tag|
+        levels = tag.split(':')
+        levels.each_with_index { |comp, idx| component_depths[comp] = [component_depths[comp], idx].max }
+      end
+      ordered_components = all_components.sort_by { |c| -component_depths[c] }
+      # Try each component, starting with most specific (rightmost)
+      ordered_components.each do |component|
+        # Find tags where this component appears at any level
+        component_matches = HTM::Models::Tag
+          .where("name = ? OR name LIKE ? OR name LIKE ? OR name LIKE ?",
+                 component,           # exact match (single-level tag)
+                 "#{component}:%",    # starts with component
+                 "%:#{component}",    # ends with component
+                 "%:#{component}:%")  # component in middle
+          .pluck(:name)
+        if component_matches.any?
+          return include_extracted ? { extracted: extracted_tags, matched: component_matches } : component_matches
+        end
+      end
+      # No matches found at any level
+      include_extracted ? { extracted: extracted_tags, matched: [] } : []
     end
     private
+    # Sanitize embedding for SQL use
+    #
+    # Validates that all values are numeric and converts to safe PostgreSQL vector format.
+    # This prevents SQL injection by ensuring only valid numeric values are included.
+    #
+    # @param embedding [Array<Numeric>] Embedding vector
+    # @return [String] Sanitized vector string for PostgreSQL (e.g., "[0.1,0.2,0.3]")
+    # @raise [ArgumentError] If embedding contains non-numeric values
+    #
+    def sanitize_embedding_for_sql(embedding)
+      unless embedding.is_a?(Array) && embedding.all? { |v| v.is_a?(Numeric) && v.finite? }
+        raise ArgumentError, "Embedding must be an array of finite numeric values"
+      end
+      "[#{embedding.map { |v| v.to_f }.join(',')}]"
+    end
+    # Build SQL condition for timeframe filtering
+    #
+    # @param timeframe [nil, Range, Array<Range>] Time range(s)
+    # @param table_alias [String] Table alias (default: none)
+    # @return [String, nil] SQL condition or nil for no filter
+    #
+    def build_timeframe_condition(timeframe, table_alias: nil)
+      return nil if timeframe.nil?
+      prefix = table_alias ? "#{table_alias}." : ""
+      column = "#{prefix}created_at"
+      conn = ActiveRecord::Base.connection
+      case timeframe
+      when Range
+        # Use quote to safely escape timestamp values
+        begin_quoted = conn.quote(timeframe.begin.iso8601)
+        end_quoted = conn.quote(timeframe.end.iso8601)
+        "(#{column} BETWEEN #{begin_quoted} AND #{end_quoted})"
+      when Array
+        conditions = timeframe.map do |range|
+          begin_quoted = conn.quote(range.begin.iso8601)
+          end_quoted = conn.quote(range.end.iso8601)
+          "(#{column} BETWEEN #{begin_quoted} AND #{end_quoted})"
+        end
+        "(#{conditions.join(' OR ')})"
+      else
+        nil
+      end
+    end
+    # Build ActiveRecord where clause for timeframe
+    #
+    # @param scope [ActiveRecord::Relation] Base scope
+    # @param timeframe [nil, Range, Array<Range>] Time range(s)
+    # @return [ActiveRecord::Relation] Scoped query
+    #
+    def apply_timeframe_scope(scope, timeframe)
+      return scope if timeframe.nil?
+      case timeframe
+      when Range
+        scope.where(created_at: timeframe)
+      when Array
+        # Build OR conditions for multiple ranges
+        conditions = timeframe.map { |range| scope.where(created_at: range) }
+        conditions.reduce { |result, condition| result.or(condition) }
+      else
+        scope
+      end
+    end
+    # Build SQL condition for metadata filtering (JSONB containment)
+    #
+    # @param metadata [Hash] Metadata to filter by
+    # @param table_alias [String] Table alias (default: none)
+    # @return [String, nil] SQL condition or nil for no filter
+    #
+    def build_metadata_condition(metadata, table_alias: nil)
+      return nil if metadata.nil? || metadata.empty?
+      prefix = table_alias ? "#{table_alias}." : ""
+      column = "#{prefix}metadata"
+      conn = ActiveRecord::Base.connection
+      # Use JSONB containment operator @>
+      # This matches if the metadata column contains all key-value pairs in the filter
+      quoted_metadata = conn.quote(metadata.to_json)
+      "(#{column} @> #{quoted_metadata}::jsonb)"
+    end
+    # Build ActiveRecord where clause for metadata
+    #
+    # @param scope [ActiveRecord::Relation] Base scope
+    # @param metadata [Hash] Metadata to filter by
+    # @return [ActiveRecord::Relation] Scoped query
+    #
+    def apply_metadata_scope(scope, metadata)
+      return scope if metadata.nil? || metadata.empty?
+      # Use JSONB containment operator
+      scope.where("metadata @> ?::jsonb", metadata.to_json)
+    end
     # Generate cache key for query
     #
     # @param method [Symbol] Search method name
-    # @param timeframe [Range] Time range
+    # @param timeframe [nil, Range, Array<Range>] Time range(s)
     # @param query [String] Search query
     # @param limit [Integer] Result limit
     # @param args [Array] Additional arguments
     # @return [String] Cache key
     #
     def cache_key_for(method, timeframe, query, limit, *args)
+      timeframe_key = case timeframe
+      when nil
+        "nil"
+      when Range
+        "#{timeframe.begin.to_i}-#{timeframe.end.to_i}"
+      when Array
+        timeframe.map { |r| "#{r.begin.to_i}-#{r.end.to_i}" }.join(',')
+      else
+        timeframe.to_s
+      end
       key_parts = [
         method,
-        timeframe.begin.to_i,
-        timeframe.end.to_i,
+        timeframe_key,
         query,
         limit,
         *args
@@ -738,17 +943,41 @@ class HTM
       @query_cache.clear if @query_cache
     end
+    # Execute a query with caching
+    #
+    # @param method [Symbol] Search method name for cache key
+    # @param args [Array] Arguments for cache key (timeframe, query, limit, etc.)
+    # @yield Block that executes the actual query
+    # @return [Array<Hash>] Query results (from cache or freshly executed)
+    #
+    def cached_query(method, *args, &block)
+      return yield unless @query_cache
+      cache_key = cache_key_for(method, *args)
+      if (cached = @query_cache[cache_key])
+        @cache_stats_mutex.synchronize { @cache_stats[:hits] += 1 }
+        return cached
+      end
+      @cache_stats_mutex.synchronize { @cache_stats[:misses] += 1 }
+      result = yield
+      @query_cache[cache_key] = result
+      result
+    end
     # Uncached vector similarity search
     #
     # Generates query embedding client-side and performs vector search in database.
     #
-    # @param timeframe [Range] Time range to search
+    # @param timeframe [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
     # @param query [String] Search query
     # @param limit [Integer] Maximum results
     # @param embedding_service [Object] Service to generate query embedding
+    # @param metadata [Hash] Filter by metadata fields (default: {})
     # @return [Array<Hash>] Matching nodes
     #
-    def search_uncached(timeframe:, query:, limit:, embedding_service:)
+    def search_uncached(timeframe:, query:, limit:, embedding_service:, metadata: {})
       # Generate query embedding client-side
       query_embedding = embedding_service.embed(query)
@@ -757,17 +986,29 @@ class HTM
         query_embedding = query_embedding + Array.new(2000 - query_embedding.length, 0.0)
       end
-      # Convert to PostgreSQL vector format
-      embedding_str = "[#{query_embedding.join(',')}]"
+      # Sanitize embedding for safe SQL use (validates all values are numeric)
+      embedding_str = sanitize_embedding_for_sql(query_embedding)
+      # Build filter conditions
+      timeframe_condition = build_timeframe_condition(timeframe)
+      metadata_condition = build_metadata_condition(metadata)
+      conditions = ["embedding IS NOT NULL", "deleted_at IS NULL"]
+      conditions << timeframe_condition if timeframe_condition
+      conditions << metadata_condition if metadata_condition
+      where_clause = "WHERE #{conditions.join(' AND ')}"
+      # Use quote to safely escape the embedding string in the query
+      quoted_embedding = ActiveRecord::Base.connection.quote(embedding_str)
       result = ActiveRecord::Base.connection.select_all(
         <<~SQL,
           SELECT id, content, access_count, created_at, token_count,
-                 1 - (embedding <=> '#{embedding_str}'::vector) as similarity
+                 1 - (embedding <=> #{quoted_embedding}::vector) as similarity
           FROM nodes
-          WHERE created_at BETWEEN '#{timeframe.begin.iso8601}' AND '#{timeframe.end.iso8601}'
-          AND embedding IS NOT NULL
-          ORDER BY embedding <=> '#{embedding_str}'::vector
+          #{where_clause}
+          ORDER BY embedding <=> #{quoted_embedding}::vector
           LIMIT #{limit.to_i}
         SQL
       )
@@ -781,24 +1022,35 @@ class HTM
     # Uncached full-text search
     #
-    # @param timeframe [Range] Time range to search
+    # @param timeframe [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
     # @param query [String] Search query
     # @param limit [Integer] Maximum results
+    # @param metadata [Hash] Filter by metadata fields (default: {})
     # @return [Array<Hash>] Matching nodes
     #
-    def search_fulltext_uncached(timeframe:, query:, limit:)
+    def search_fulltext_uncached(timeframe:, query:, limit:, metadata: {})
+      # Build filter conditions
+      timeframe_condition = build_timeframe_condition(timeframe)
+      metadata_condition = build_metadata_condition(metadata)
+      additional_conditions = []
+      additional_conditions << timeframe_condition if timeframe_condition
+      additional_conditions << metadata_condition if metadata_condition
+      additional_sql = additional_conditions.any? ? "AND #{additional_conditions.join(' AND ')}" : ""
       result = ActiveRecord::Base.connection.select_all(
         ActiveRecord::Base.sanitize_sql_array([
           <<~SQL,
             SELECT id, content, access_count, created_at, token_count,
                    ts_rank(to_tsvector('english', content), plainto_tsquery('english', ?)) as rank
             FROM nodes
-            WHERE created_at BETWEEN ? AND ?
+            WHERE deleted_at IS NULL
             AND to_tsvector('english', content) @@ plainto_tsquery('english', ?)
+            #{additional_sql}
             ORDER BY rank DESC
             LIMIT ?
           SQL
-          query, timeframe.begin, timeframe.end, query, limit
+          query, query, limit
         ])
       )
@@ -816,14 +1068,15 @@ class HTM
     # 2. Tag matching for categorical relevance
     # 3. Vector similarity for semantic ranking
     #
-    # @param timeframe [Range] Time range to search
+    # @param timeframe [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
     # @param query [String] Search query
     # @param limit [Integer] Maximum results
     # @param embedding_service [Object] Service to generate query embedding
     # @param prefilter_limit [Integer] Candidates to consider
+    # @param metadata [Hash] Filter by metadata fields (default: {})
     # @return [Array<Hash>] Matching nodes with similarity and tag_boost scores
     #
-    def search_hybrid_uncached(timeframe:, query:, limit:, embedding_service:, prefilter_limit:)
+    def search_hybrid_uncached(timeframe:, query:, limit:, embedding_service:, prefilter_limit:, metadata: {})
       # Generate query embedding client-side
       query_embedding = embedding_service.embed(query)
@@ -832,8 +1085,27 @@ class HTM
         query_embedding = query_embedding + Array.new(2000 - query_embedding.length, 0.0)
       end
-      # Convert to PostgreSQL vector format
-      embedding_str = "[#{query_embedding.join(',')}]"
+      # Sanitize embedding for safe SQL use (validates all values are numeric)
+      embedding_str = sanitize_embedding_for_sql(query_embedding)
+      quoted_embedding = ActiveRecord::Base.connection.quote(embedding_str)
+      # Build filter conditions (with table alias for CTEs)
+      timeframe_condition = build_timeframe_condition(timeframe, table_alias: 'n')
+      metadata_condition = build_metadata_condition(metadata, table_alias: 'n')
+      additional_conditions = []
+      additional_conditions << timeframe_condition if timeframe_condition
+      additional_conditions << metadata_condition if metadata_condition
+      additional_sql = additional_conditions.any? ? "AND #{additional_conditions.join(' AND ')}" : ""
+      # Same for non-aliased queries
+      timeframe_condition_bare = build_timeframe_condition(timeframe)
+      metadata_condition_bare = build_metadata_condition(metadata)
+      additional_conditions_bare = []
+      additional_conditions_bare << timeframe_condition_bare if timeframe_condition_bare
+      additional_conditions_bare << metadata_condition_bare if metadata_condition_bare
+      additional_sql_bare = additional_conditions_bare.any? ? "AND #{additional_conditions_bare.join(' AND ')}" : ""
       # Find tags that match query terms
       matching_tags = find_query_matching_tags(query)
@@ -843,10 +1115,7 @@ class HTM
       # NOTE: Hybrid search includes nodes without embeddings using a default
       # similarity score of 0.5. This allows newly created nodes to appear in
       # search results immediately (via fulltext matching) before their embeddings
-      # are generated by background jobs. Useful for demos with short timeframes
-      # (seconds) where async embedding generation hasn't completed yet.
-      # In production with longer timeframes, embeddings are typically ready
-      # within 1-5 seconds, so this fallback is rarely used.
+      # are generated by background jobs.
       if matching_tags.any?
         # Escape tag names for SQL
@@ -858,8 +1127,9 @@ class HTM
                 -- Nodes matching full-text search (with or without embeddings)
                 SELECT DISTINCT n.id, n.content, n.access_count, n.created_at, n.token_count, n.embedding
                 FROM nodes n
-                WHERE n.created_at BETWEEN ? AND ?
+                WHERE n.deleted_at IS NULL
                 AND to_tsvector('english', n.content) @@ plainto_tsquery('english', ?)
+                #{additional_sql}
                 LIMIT ?
               ),
               tag_candidates AS (
@@ -868,8 +1138,9 @@ class HTM
                 FROM nodes n
                 JOIN node_tags nt ON nt.node_id = n.id
                 JOIN tags t ON t.id = nt.tag_id
-                WHERE n.created_at BETWEEN ? AND ?
+                WHERE n.deleted_at IS NULL
                 AND t.name IN (#{tag_list})
+                #{additional_sql}
                 LIMIT ?
               ),
               all_candidates AS (
@@ -881,7 +1152,7 @@ class HTM
                 SELECT
                   ac.id, ac.content, ac.access_count, ac.created_at, ac.token_count,
                   CASE
-                    WHEN ac.embedding IS NOT NULL THEN 1 - (ac.embedding <=> '#{embedding_str}'::vector)
+                    WHEN ac.embedding IS NOT NULL THEN 1 - (ac.embedding <=> #{quoted_embedding}::vector)
                     ELSE 0.5  -- Default similarity for nodes without embeddings
                   END as similarity,
                   COALESCE((
@@ -899,8 +1170,8 @@ class HTM
               ORDER BY combined_score DESC
               LIMIT ?
             SQL
-            timeframe.begin, timeframe.end, query, prefilter_limit,
-            timeframe.begin, timeframe.end, prefilter_limit,
+            query, prefilter_limit,
+            prefilter_limit,
             matching_tags.length.to_f,
             limit
           ])
@@ -914,25 +1185,26 @@ class HTM
               WITH candidates AS (
                 SELECT id, content, access_count, created_at, token_count, embedding
                 FROM nodes
-                WHERE created_at BETWEEN ? AND ?
+                WHERE deleted_at IS NULL
                 AND to_tsvector('english', content) @@ plainto_tsquery('english', ?)
+                #{additional_sql_bare}
                 LIMIT ?
               )
               SELECT id, content, access_count, created_at, token_count,
                      CASE
-                       WHEN embedding IS NOT NULL THEN 1 - (embedding <=> '#{embedding_str}'::vector)
+                       WHEN embedding IS NOT NULL THEN 1 - (embedding <=> #{quoted_embedding}::vector)
                        ELSE 0.5  -- Default similarity for nodes without embeddings
                      END as similarity,
                      0.0 as tag_boost,
                      CASE
-                       WHEN embedding IS NOT NULL THEN 1 - (embedding <=> '#{embedding_str}'::vector)
+                       WHEN embedding IS NOT NULL THEN 1 - (embedding <=> #{quoted_embedding}::vector)
                        ELSE 0.5  -- Default score for nodes without embeddings (fulltext matched)
                      END as combined_score
               FROM candidates
               ORDER BY combined_score DESC
               LIMIT ?
             SQL
-            timeframe.begin, timeframe.end, query, prefilter_limit, limit
+            query, prefilter_limit, limit
           ])
         )
       end
@@ -969,160 +1241,5 @@ class HTM
       [similarity, depth_weight]
     end
-#######################################
-=begin
-# Enhanced hierarchical similarity (with term_bonus for deep term matches like "country-music")
-# Replaces your private calculate_hierarchical_similarity
-def calculate_hierarchical_similarity(tag_a, tag_b, max_depth: 5)
-  return [0.0, 1.0] if tag_a.empty? || tag_b.empty?  # [similarity, weight]
-  parts_a = tag_a.split(':').reject(&:empty?)
-  parts_b = tag_b.split(':').reject(&:empty?)
-  return [0.0, 1.0] if parts_a.empty? || parts_b.empty?
-  # Prefix similarity
-  local_max = [parts_a.length, parts_b.length].max
-  common_levels = 0
-  (0...local_max).each do |i|
-    if i < parts_a.length && i < parts_b.length && parts_a[i] == parts_b[i]
-      common_levels += 1
-    else
-      break
-    end
-  end
-  prefix_sim = local_max > 0 ? common_levels.to_f / local_max : 0.0
-  # Term bonus: Shared terms weighted by avg depth
-  common_terms = parts_a.to_set & parts_b.to_set
-  term_bonus = 0.0
-  common_terms.each do |term|
-    depth_a = parts_a.index(term) + 1
-    depth_b = parts_b.index(term) + 1
-    avg_depth = (depth_a + depth_b) / 2.0
-    depth_weight = avg_depth / max_depth.to_f
-    term_bonus += depth_weight * 0.8  # Increased from 0.5 for more aggression
-  end
-  term_bonus = [1.0, term_bonus].min
-  # Combined similarity (your weight now favors deeper via local_max)
-  sim = (prefix_sim + term_bonus) / 2.0
-  weight = local_max.to_f / max_depth  # Deeper = higher weight (flipped from your 1/max)
-  [sim, weight]
-end
-# Enhanced weighted_hierarchical_jaccard (uses new similarity; adds max_pairs fallback)
-# Replaces your private weighted_hierarchical_jaccard
-def weighted_hierarchical_jaccard(set_a, set_b, max_depth: 5, max_pairs: 1000)
-  return 0.0 if set_a.empty? || set_b.empty?
-  # Fallback to flat Jaccard for large sets (your jaccard_similarity)
-  if set_a.size * set_b.size > max_pairs
-    terms_a = set_a.flat_map { |tag| tag.split(':').reject(&:empty?) }.to_set
-    terms_b = set_b.flat_map { |tag| tag.split(':').reject(&:empty?) }.to_set
-    return jaccard_similarity(terms_a.to_a, terms_b.to_a)
-  end
-  total_weighted_similarity = 0.0
-  total_weights = 0.0
-  set_a.each do |tag_a|
-    set_b.each do |tag_b|
-      similarity, weight = calculate_hierarchical_similarity(tag_a, tag_b, max_depth: max_depth)
-      total_weighted_similarity += similarity * weight
-      total_weights += weight
-    end
-  end
-  total_weights > 0 ? total_weighted_similarity / total_weights : 0.0
-end
-# Updated calculate_relevance (adds ont_weight param; scales to 0-100 option)
-# Enhances your existing method
-def calculate_relevance(node:, query_tags: [], vector_similarity: nil, ont_weight: 1.0, scale_to_100: false)
-  # 1. Vector similarity (semantic) - weight: 0.5
-  semantic_score = if vector_similarity
-    vector_similarity
-  elsif node['similarity']
-    node['similarity'].to_f
-  else
-    0.5
-  end
-  # 2. Tag overlap (ontology) - weight: 0.3, boosted by ont_weight
-  node_tags = get_node_tags(node['id'])
-  tag_score = if query_tags.any? && node_tags.any?
-    weighted_hierarchical_jaccard(query_tags, node_tags) * ont_weight
-  else
-    0.5
-  end
-  tag_score = [tag_score, 1.0].min  # Cap boosted score
-  # 3. Recency - weight: 0.1
-  age_hours = (Time.current - Time.parse(node['created_at'].to_s)) / 3600.0
-  recency_score = Math.exp(-age_hours / 168.0)
-  # 4. Access frequency - weight: 0.1
-  access_count = node['access_count'] || 0
-  access_score = Math.log(1 + access_count) / 10.0
-  # Weighted composite (0-10 base)
-  relevance_0_10 = (
-    (semantic_score * 0.5) +
-    (tag_score * 0.3) +
-    (recency_score * 0.1) +
-    (access_score * 0.1)
-  ).clamp(0.0, 10.0)
-  # Scale to 0-100 if requested
-  final_relevance = scale_to_100 ? (relevance_0_10 * 10.0).round(2) : relevance_0_10
-  final_relevance
-end
-# Updated search_with_relevance (adds threshold: for 0-100 filtering; ont_weight)
-# Enhances your existing method
-def search_with_relevance(timeframe:, query: nil, query_tags: [], limit: 20, embedding_service: nil, threshold: nil, ont_weight: 1.0, scale_to_100: true)
-  # Get candidates (your logic)
-  candidates = if query && embedding_service
-    search_uncached(timeframe: timeframe, query: query, limit: limit * 3, embedding_service: embedding_service)  # Oversample more for thresholds
-  elsif query
-    search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit * 3)
-  else
-    HTM::Models::Node
-      .where(created_at: timeframe)
-      .order(created_at: :desc)
-      .limit(limit * 3)
-      .map(&:attributes)
-  end
-  # Score and enrich
-  scored_nodes = candidates.map do |node|
-    relevance = calculate_relevance(
-      node: node,
-      query_tags: query_tags,
-      vector_similarity: node['similarity']&.to_f,
-      ont_weight: ont_weight,
-      scale_to_100: scale_to_100
-    )
-    node.merge({
-      'relevance' => relevance,
-      'tags' => get_node_tags(node['id'])
-    })
-  end
-  # Filter by threshold if provided (e.g., >=80 for 0-100 scale)
-  scored_nodes = scored_nodes.select { |n| threshold.nil? || n['relevance'] >= threshold }
-  # Sort by relevance DESC, take limit (or all if threshold used)
-  scored_nodes
-    .sort_by { |n| -n['relevance'] }
-    .take(limit)
-end
-=end
   end
 end