RubyGems - htm - Versions diffs - 0.0.11 → 0.0.15 - Mend

htm 0.0.11 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

checksums.yaml +4 -4
data/.dictate.toml +46 -0
data/.envrc +2 -0
data/CHANGELOG.md +85 -2
data/README.md +348 -79
data/Rakefile +14 -2
data/bin/htm_mcp.rb +94 -0
data/config/database.yml +20 -13
data/db/migrate/00003_create_file_sources.rb +5 -0
data/db/migrate/00004_create_nodes.rb +17 -0
data/db/migrate/00005_create_tags.rb +7 -0
data/db/migrate/00006_create_node_tags.rb +2 -0
data/db/migrate/00007_create_robot_nodes.rb +7 -0
data/db/schema.sql +69 -100
data/docs/api/index.md +1 -1
data/docs/api/yard/HTM/Configuration.md +54 -0
data/docs/api/yard/HTM/Database.md +13 -10
data/docs/api/yard/HTM/EmbeddingService.md +5 -1
data/docs/api/yard/HTM/LongTermMemory.md +18 -277
data/docs/api/yard/HTM/PropositionError.md +18 -0
data/docs/api/yard/HTM/PropositionService.md +66 -0
data/docs/api/yard/HTM/QueryCache.md +88 -0
data/docs/api/yard/HTM/RobotGroup.md +481 -0
data/docs/api/yard/HTM/SqlBuilder.md +108 -0
data/docs/api/yard/HTM/TagService.md +4 -0
data/docs/api/yard/HTM/Telemetry/NullInstrument.md +13 -0
data/docs/api/yard/HTM/Telemetry/NullMeter.md +15 -0
data/docs/api/yard/HTM/Telemetry.md +109 -0
data/docs/api/yard/HTM/WorkingMemoryChannel.md +176 -0
data/docs/api/yard/HTM.md +8 -22
data/docs/api/yard/index.csv +102 -25
data/docs/api/yard-reference.md +8 -0
data/docs/architecture/index.md +1 -1
data/docs/assets/images/multi-provider-failover.svg +51 -0
data/docs/assets/images/robot-group-architecture.svg +65 -0
data/docs/database/README.md +3 -3
data/docs/database/public.file_sources.svg +29 -21
data/docs/database/public.node_tags.md +2 -0
data/docs/database/public.node_tags.svg +53 -41
data/docs/database/public.nodes.md +2 -0
data/docs/database/public.nodes.svg +52 -40
data/docs/database/public.robot_nodes.md +2 -0
data/docs/database/public.robot_nodes.svg +30 -22
data/docs/database/public.robots.svg +16 -12
data/docs/database/public.tags.md +3 -0
data/docs/database/public.tags.svg +41 -33
data/docs/database/schema.json +66 -0
data/docs/database/schema.svg +60 -48
data/docs/development/index.md +14 -1
data/docs/development/rake-tasks.md +1068 -0
data/docs/getting-started/index.md +1 -1
data/docs/getting-started/quick-start.md +144 -155
data/docs/guides/adding-memories.md +2 -3
data/docs/guides/context-assembly.md +185 -184
data/docs/guides/getting-started.md +154 -148
data/docs/guides/index.md +8 -1
data/docs/guides/long-term-memory.md +60 -92
data/docs/guides/mcp-server.md +617 -0
data/docs/guides/multi-robot.md +249 -345
data/docs/guides/recalling-memories.md +153 -163
data/docs/guides/robot-groups.md +604 -0
data/docs/guides/search-strategies.md +61 -58
data/docs/guides/working-memory.md +103 -136
data/docs/images/telemetry-architecture.svg +153 -0
data/docs/index.md +30 -26
data/docs/telemetry.md +391 -0
data/examples/README.md +46 -1
data/examples/cli_app/README.md +1 -1
data/examples/cli_app/htm_cli.rb +1 -1
data/examples/robot_groups/robot_worker.rb +1 -2
data/examples/robot_groups/same_process.rb +1 -4
data/examples/sinatra_app/app.rb +1 -1
data/examples/telemetry/README.md +147 -0
data/examples/telemetry/SETUP_README.md +169 -0
data/examples/telemetry/demo.rb +498 -0
data/examples/telemetry/grafana/dashboards/htm-metrics.json +457 -0
data/lib/htm/configuration.rb +261 -70
data/lib/htm/database.rb +46 -22
data/lib/htm/embedding_service.rb +24 -14
data/lib/htm/errors.rb +15 -1
data/lib/htm/jobs/generate_embedding_job.rb +19 -0
data/lib/htm/jobs/generate_propositions_job.rb +103 -0
data/lib/htm/jobs/generate_tags_job.rb +24 -0
data/lib/htm/loaders/markdown_chunker.rb +79 -0
data/lib/htm/loaders/markdown_loader.rb +41 -15
data/lib/htm/long_term_memory/fulltext_search.rb +138 -0
data/lib/htm/long_term_memory/hybrid_search.rb +324 -0
data/lib/htm/long_term_memory/node_operations.rb +209 -0
data/lib/htm/long_term_memory/relevance_scorer.rb +355 -0
data/lib/htm/long_term_memory/robot_operations.rb +34 -0
data/lib/htm/long_term_memory/tag_operations.rb +428 -0
data/lib/htm/long_term_memory/vector_search.rb +109 -0
data/lib/htm/long_term_memory.rb +51 -1153
data/lib/htm/models/node.rb +35 -2
data/lib/htm/models/node_tag.rb +31 -0
data/lib/htm/models/robot_node.rb +31 -0
data/lib/htm/models/tag.rb +44 -0
data/lib/htm/proposition_service.rb +169 -0
data/lib/htm/query_cache.rb +214 -0
data/lib/htm/robot_group.rb +721 -0
data/lib/htm/sql_builder.rb +178 -0
data/lib/htm/tag_service.rb +16 -6
data/lib/htm/tasks.rb +8 -2
data/lib/htm/telemetry.rb +224 -0
data/lib/htm/version.rb +1 -1
data/lib/htm/working_memory_channel.rb +250 -0
data/lib/htm.rb +66 -3
data/lib/tasks/doc.rake +1 -1
data/lib/tasks/htm.rake +259 -13
data/mkdocs.yml +98 -96
metadata +55 -20
data/.aigcm_msg +0 -1
data/.claude/settings.local.json +0 -95
data/CLAUDE.md +0 -603
data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +0 -12
data/examples/cli_app/temp.log +0 -93
data/examples/robot_groups/lib/robot_group.rb +0 -419
data/examples/robot_groups/lib/working_memory_channel.rb +0 -140
data/lib/htm/loaders/paragraph_chunker.rb +0 -112
data/notes/ARCHITECTURE_REVIEW.md +0 -1167
data/notes/IMPLEMENTATION_SUMMARY.md +0 -606
data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +0 -451
data/notes/next_steps.md +0 -100
data/notes/plan.md +0 -627
data/notes/tag_ontology_enhancement_ideas.md +0 -222
data/notes/timescaledb_removal_summary.md +0 -200

data/lib/htm/errors.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-# HTM (Hierarchical Temporary Memory) error classes
+# HTM (Hierarchical Temporal Memory) error classes
 #
 # All HTM errors inherit from HTM::Error, allowing you to catch
 # all HTM-related errors with a single rescue clause.
@@ -93,6 +93,20 @@ class HTM
   #
   class TagError < Error; end
+  # Raised when proposition extraction fails
+  #
+  # Common causes:
+  # - LLM provider API errors
+  # - Invalid proposition response format
+  # - Network connectivity issues
+  # - Model not available
+  #
+  # Note: This error is distinct from CircuitBreakerOpenError.
+  # PropositionError indicates a single failure, while CircuitBreakerOpenError
+  # indicates repeated failures have triggered protective circuit breaking.
+  #
+  class PropositionError < Error; end
   # Raised when database operations fail
   #
   # Common causes:

data/lib/htm/jobs/generate_embedding_job.rb CHANGED Viewed

@@ -36,6 +36,9 @@ class HTM
           return
         end
+        provider = HTM.configuration.embedding_provider.to_s
+        start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
         begin
           HTM.logger.debug "GenerateEmbeddingJob: Generating embedding for node #{node_id}"
@@ -45,17 +48,33 @@ class HTM
           # Update node with processed embedding
           node.update!(embedding: result[:storage_embedding])
+          # Record success metrics
+          elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+          HTM::Telemetry.embedding_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'success' })
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'success' })
           HTM.logger.info "GenerateEmbeddingJob: Successfully generated embedding for node #{node_id} (#{result[:dimension]} dimensions)"
         rescue HTM::CircuitBreakerOpenError => e
           # Circuit breaker is open - service is unavailable, will retry later
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'circuit_open' })
           HTM.logger.warn "GenerateEmbeddingJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
         rescue HTM::EmbeddingError => e
+          # Record failure metrics
+          elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+          HTM::Telemetry.embedding_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'error' })
           # Log embedding-specific errors
           HTM.logger.error "GenerateEmbeddingJob: Embedding generation failed for node #{node_id}: #{e.message}"
         rescue StandardError => e
+          # Record failure metrics
+          elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+          HTM::Telemetry.embedding_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'embedding', 'status' => 'error' })
           # Log unexpected errors
           HTM.logger.error "GenerateEmbeddingJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
           HTM.logger.debug e.backtrace.first(5).join("\n")

data/lib/htm/jobs/generate_propositions_job.rb ADDED Viewed

@@ -0,0 +1,103 @@
+# frozen_string_literal: true
+require_relative '../errors'
+require_relative '../models/node'
+require_relative '../proposition_service'
+class HTM
+  module Jobs
+    # Background job to extract propositions from nodes and create new nodes
+    #
+    # This job is enqueued after a node is saved (if proposition extraction is enabled).
+    # It uses LLM to extract atomic factual propositions from node content and
+    # creates new nodes for each proposition. Proposition nodes are marked with
+    # metadata to prevent recursive extraction.
+    #
+    # @see PropositionService
+    #
+    class GeneratePropositionsJob
+      # Generate propositions for a node
+      #
+      # Uses the configured proposition extractor (HTM.extract_propositions) which
+      # delegates to the application-provided or default RubyLLM implementation.
+      #
+      # @param node_id [Integer] ID of the node to process
+      # @param robot_id [Integer] ID of the robot that owns this node
+      #
+      def self.perform(node_id:, robot_id:)
+        node = HTM::Models::Node.find_by(id: node_id)
+        unless node
+          HTM.logger.warn "GeneratePropositionsJob: Node #{node_id} not found"
+          return
+        end
+        # Skip if this node is already a proposition (prevent recursion)
+        if node.metadata&.dig('is_proposition')
+          HTM.logger.debug "GeneratePropositionsJob: Node #{node_id} is a proposition, skipping"
+          return
+        end
+        begin
+          HTM.logger.debug "GeneratePropositionsJob: Extracting propositions for node #{node_id}"
+          # Extract propositions using PropositionService
+          propositions = HTM::PropositionService.extract(node.content)
+          if propositions.empty?
+            HTM.logger.debug "GeneratePropositionsJob: No propositions extracted for node #{node_id}"
+            return
+          end
+          HTM.logger.info "GeneratePropositionsJob: Extracted #{propositions.length} propositions for node #{node_id}"
+          # Create a node for each proposition
+          created_count = 0
+          propositions.each do |proposition_text|
+            # Calculate token count
+            token_count = HTM.count_tokens(proposition_text)
+            # Create proposition node with is_proposition marker
+            proposition_node = HTM::Models::Node.create!(
+              content: proposition_text,
+              token_count: token_count,
+              metadata: { is_proposition: true, source_node_id: node_id }
+            )
+            # Link to robot via RobotNode
+            HTM::Models::RobotNode.find_or_create_by!(
+              robot_id: robot_id,
+              node_id: proposition_node.id
+            )
+            # Enqueue embedding and tag jobs for the new proposition node
+            # (but NOT another propositions job - the is_proposition marker prevents that)
+            HTM::JobAdapter.enqueue(HTM::Jobs::GenerateEmbeddingJob, node_id: proposition_node.id)
+            HTM::JobAdapter.enqueue(HTM::Jobs::GenerateTagsJob, node_id: proposition_node.id)
+            created_count += 1
+          end
+          HTM.logger.info "GeneratePropositionsJob: Created #{created_count} proposition nodes from node #{node_id}"
+        rescue HTM::CircuitBreakerOpenError
+          # Circuit breaker is open - service is unavailable, will retry later
+          HTM.logger.warn "GeneratePropositionsJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
+        rescue HTM::PropositionError => e
+          # Log proposition-specific errors
+          HTM.logger.error "GeneratePropositionsJob: Proposition extraction failed for node #{node_id}: #{e.message}"
+        rescue ActiveRecord::RecordInvalid => e
+          # Log validation errors
+          HTM.logger.error "GeneratePropositionsJob: Database validation failed for node #{node_id}: #{e.message}"
+        rescue StandardError => e
+          # Log unexpected errors
+          HTM.logger.error "GeneratePropositionsJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
+          HTM.logger.debug e.backtrace.first(5).join("\n")
+        end
+      end
+    end
+  end
+end

data/lib/htm/jobs/generate_tags_job.rb CHANGED Viewed

@@ -33,6 +33,9 @@ class HTM
           return
         end
+        provider = HTM.configuration.tag_provider.to_s
+        start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
         begin
           HTM.logger.debug "GenerateTagsJob: Extracting tags for node #{node_id}"
@@ -61,21 +64,42 @@ class HTM
             )
           end
+          # Record success metrics
+          elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+          HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'success' })
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'success' })
           HTM.logger.info "GenerateTagsJob: Successfully generated #{tag_names.length} tags for node #{node_id}: #{tag_names.join(', ')}"
         rescue HTM::CircuitBreakerOpenError => e
           # Circuit breaker is open - service is unavailable, will retry later
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'circuit_open' })
           HTM.logger.warn "GenerateTagsJob: Circuit breaker open for node #{node_id}, will retry when service recovers"
         rescue HTM::TagError => e
+          # Record failure metrics
+          elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+          HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
           # Log tag-specific errors
           HTM.logger.error "GenerateTagsJob: Tag generation failed for node #{node_id}: #{e.message}"
         rescue ActiveRecord::RecordInvalid => e
+          # Record failure metrics
+          elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+          HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
           # Log validation errors
           HTM.logger.error "GenerateTagsJob: Database validation failed for node #{node_id}: #{e.message}"
         rescue StandardError => e
+          # Record failure metrics
+          elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+          HTM::Telemetry.tag_latency.record(elapsed_ms, attributes: { 'provider' => provider, 'status' => 'error' })
+          HTM::Telemetry.job_counter.add(1, attributes: { 'job' => 'tags', 'status' => 'error' })
           # Log unexpected errors
           HTM.logger.error "GenerateTagsJob: Unexpected error for node #{node_id}: #{e.class.name} - #{e.message}"
           HTM.logger.debug e.backtrace.first(5).join("\n")

data/lib/htm/loaders/markdown_chunker.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+require 'baran'
+class HTM
+  module Loaders
+    # Markdown-aware text chunker using Baran
+    #
+    # Wraps Baran::MarkdownSplitter to provide intelligent text chunking
+    # that respects markdown structure (headers, code blocks, etc.).
+    #
+    # @example Basic usage
+    #   chunker = MarkdownChunker.new
+    #   chunks = chunker.chunk("# Header\n\nParagraph text.\n\n## Subheader\n\nMore text.")
+    #   # => ["# Header\n\nParagraph text.", "## Subheader\n\nMore text."]
+    #
+    # @example With custom chunk size
+    #   chunker = MarkdownChunker.new(chunk_size: 512, chunk_overlap: 50)
+    #   chunks = chunker.chunk(long_text)
+    #
+    # @example With full metadata (includes cursor positions)
+    #   chunker = MarkdownChunker.new
+    #   chunks = chunker.chunk_with_metadata(text)
+    #   # => [{ text: "...", cursor: 0, metadata: nil }, { text: "...", cursor: 156, metadata: nil }]
+    #
+    class MarkdownChunker
+      # @param chunk_size [Integer] Maximum characters per chunk (default: from config or 1024)
+      # @param chunk_overlap [Integer] Character overlap between chunks (default: from config or 64)
+      def initialize(chunk_size: nil, chunk_overlap: nil)
+        @chunk_size = chunk_size || HTM.configuration.chunk_size
+        @chunk_overlap = chunk_overlap || HTM.configuration.chunk_overlap
+        @splitter = Baran::MarkdownSplitter.new(
+          chunk_size: @chunk_size,
+          chunk_overlap: @chunk_overlap
+        )
+      end
+      # Split text into markdown-aware chunks (text only)
+      #
+      # @param text [String] Text to chunk
+      # @return [Array<String>] Array of text chunks
+      #
+      def chunk(text)
+        return [] if text.nil? || text.strip.empty?
+        # Normalize line endings
+        normalized = text.gsub(/\r\n?/, "\n")
+        # Use Baran's MarkdownSplitter
+        result = @splitter.chunks(normalized)
+        # Extract text from chunk hashes, filter empty
+        result.map { |chunk| chunk[:text].strip }.reject(&:empty?)
+      end
+      # Split text and return full chunk data (with cursor positions)
+      #
+      # Returns Baran's full output including:
+      # - :text [String] The chunk content
+      # - :cursor [Integer] Character offset where chunk starts in original text
+      #
+      # @param text [String] Text to chunk
+      # @return [Array<Hash>] Array of chunk hashes with :text and :cursor
+      #
+      def chunk_with_metadata(text)
+        return [] if text.nil? || text.strip.empty?
+        # Normalize line endings
+        normalized = text.gsub(/\r\n?/, "\n")
+        # Use Baran's MarkdownSplitter - returns [{text:, cursor:}, ...]
+        @splitter.chunks(normalized)
+      end
+      attr_reader :chunk_size, :chunk_overlap
+    end
+  end
+end

data/lib/htm/loaders/markdown_loader.rb CHANGED Viewed

@@ -26,9 +26,14 @@ class HTM
       MAX_FILE_SIZE = 10 * 1024 * 1024  # 10 MB maximum file size
       # @param htm_instance [HTM] The HTM instance to use for storing nodes
-      def initialize(htm_instance)
+      # @param chunk_size [Integer] Maximum characters per chunk (default: from config)
+      # @param chunk_overlap [Integer] Character overlap between chunks (default: from config)
+      def initialize(htm_instance, chunk_size: nil, chunk_overlap: nil)
         @htm = htm_instance
-        @chunker = ParagraphChunker.new
+        @chunker = MarkdownChunker.new(
+          chunk_size: chunk_size,
+          chunk_overlap: chunk_overlap
+        )
       end
       # Load a single markdown file into long-term memory
@@ -89,19 +94,19 @@ class HTM
         # Parse frontmatter and body
         frontmatter, body = extract_frontmatter(content)
-        # Chunk the body
-        chunks = @chunker.chunk(body)
+        # Chunk the body with metadata (includes cursor positions)
+        chunks = @chunker.chunk_with_metadata(body)
         # Prepend frontmatter to first chunk if present
         if frontmatter.any? && chunks.any?
           frontmatter_yaml = YAML.dump(frontmatter).sub(/\A---\n/, "---\n")
-          chunks[0] = "#{frontmatter_yaml}---\n\n#{chunks[0]}"
+          chunks[0][:text] = "#{frontmatter_yaml}---\n\n#{chunks[0][:text]}"
         end
         # Save source first (need ID for node association)
         source.save! if source.new_record?
-        # Sync chunks to database
+        # Sync chunks to database (chunks now include cursor positions)
         result = sync_chunks(source, chunks)
         # Update source record
@@ -181,7 +186,7 @@ class HTM
       # Sync chunks to database, handling updates and deletions
       #
       # @param source [FileSource] The source record
-      # @param chunks [Array<String>] New chunk contents
+      # @param chunks [Array<Hash>] New chunks with :text and :cursor keys
       # @return [Hash] Sync statistics
       #
       def sync_chunks(source, chunks)
@@ -197,12 +202,16 @@ class HTM
         # Track which existing nodes we've matched
         matched_hashes = Set.new
-        # Process each new chunk
-        chunks.each_with_index do |chunk_content, position|
+        # Process each new chunk (chunks are now Hashes with :text and :cursor)
+        chunks.each_with_index do |chunk_data, position|
+          chunk_content = chunk_data[:text].strip
+          chunk_cursor = chunk_data[:cursor]
+          next if chunk_content.empty?
           chunk_hash = HTM::Models::Node.generate_content_hash(chunk_content)
           if existing_by_hash[chunk_hash]
-            # Chunk exists - update position if needed, restore if soft-deleted
+            # Chunk exists - update position/cursor if needed, restore if soft-deleted
             node = existing_by_hash[chunk_hash]
             matched_hashes << chunk_hash
@@ -210,13 +219,20 @@ class HTM
             changes[:chunk_position] = position if node.chunk_position != position
             changes[:deleted_at] = nil if node.deleted_at.present?
+            # Update cursor in metadata if changed
+            current_cursor = node.metadata&.dig('cursor')
+            if current_cursor != chunk_cursor
+              new_metadata = (node.metadata || {}).merge('cursor' => chunk_cursor)
+              changes[:metadata] = new_metadata
+            end
             if changes.any?
               node.update!(changes)
               updated += 1
             end
           else
-            # New chunk - create node
-            node = create_chunk_node(source, chunk_content, position)
+            # New chunk - create node with cursor in metadata
+            node = create_chunk_node(source, chunk_content, position, cursor: chunk_cursor)
             created += 1 if node
           end
         end
@@ -238,11 +254,15 @@ class HTM
       # @param source [FileSource] The source record
       # @param content [String] Chunk content
       # @param position [Integer] Position in file (0-indexed)
+      # @param cursor [Integer] Character offset in original file
       # @return [Node, nil] The created node or nil if duplicate
       #
-      def create_chunk_node(source, content, position)
+      def create_chunk_node(source, content, position, cursor: nil)
+        # Build metadata with cursor position (file path is in source, not duplicated here)
+        chunk_metadata = cursor ? { 'cursor' => cursor } : {}
         # Use remember to get proper embedding/tag processing
-        node_id = @htm.remember(content)
+        node_id = @htm.remember(content, metadata: chunk_metadata)
         # Update with source reference
         node = HTM::Models::Node.find(node_id)
@@ -254,7 +274,13 @@ class HTM
         # Find and link to this source
         existing = HTM::Models::Node.find_by_content(content)
         if existing && existing.source_id.nil?
-          existing.update!(source_id: source.id, chunk_position: position)
+          # Merge cursor into existing metadata
+          new_metadata = (existing.metadata || {}).merge('cursor' => cursor) if cursor
+          existing.update!(
+            source_id: source.id,
+            chunk_position: position,
+            metadata: new_metadata || existing.metadata
+          )
         end
         existing
       end

data/lib/htm/long_term_memory/fulltext_search.rb ADDED Viewed

@@ -0,0 +1,138 @@
+# frozen_string_literal: true
+class HTM
+  class LongTermMemory
+    # Full-text search using PostgreSQL tsvector and pg_trgm
+    #
+    # Performs keyword-based search using:
+    # - PostgreSQL full-text search (tsvector/tsquery) for stemmed word matching
+    # - Trigram fuzzy matching (pg_trgm) for typos and partial words
+    # - Combined scoring: tsvector matches rank higher, trigram provides fallback
+    #
+    # Results are cached for performance.
+    #
+    # Security: All queries use parameterized placeholders to prevent SQL injection.
+    #
+    module FulltextSearch
+      # Maximum results to prevent DoS via unbounded queries
+      MAX_FULLTEXT_LIMIT = 1000
+      # Minimum trigram similarity threshold (0.0-1.0)
+      # Lower = more fuzzy matches, higher = stricter matching
+      TRIGRAM_SIMILARITY_THRESHOLD = 0.1
+      # Score boost for tsvector matches over trigram matches
+      # Ensures exact word matches rank above fuzzy matches
+      TSVECTOR_SCORE_BOOST = 1.0
+      # Full-text search
+      #
+      # @param timeframe [Range] Time range to search
+      # @param query [String] Search query
+      # @param limit [Integer] Maximum results (capped at MAX_FULLTEXT_LIMIT)
+      # @param metadata [Hash] Filter by metadata fields (default: {})
+      # @return [Array<Hash>] Matching nodes
+      #
+      def search_fulltext(timeframe:, query:, limit:, metadata: {})
+        # Enforce limit to prevent DoS
+        safe_limit = [[limit.to_i, 1].max, MAX_FULLTEXT_LIMIT].min
+        start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        result = @cache.fetch(:fulltext, timeframe, query, safe_limit, metadata) do
+          search_fulltext_uncached(
+            timeframe: timeframe,
+            query: query,
+            limit: safe_limit,
+            metadata: metadata
+          )
+        end
+        elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round
+        HTM::Telemetry.search_latency.record(elapsed_ms, attributes: { 'strategy' => 'fulltext' })
+        result
+      end
+      private
+      # Uncached full-text search combining tsvector and trigram matching
+      #
+      # Uses UNION to combine:
+      # 1. tsvector matches (stemmed words, high priority)
+      # 2. trigram matches (fuzzy/partial, lower priority fallback)
+      #
+      # Deduplicates by taking highest score per node.
+      #
+      # @param timeframe [nil, Range, Array<Range>] Time range(s) to search (nil = no filter)
+      # @param query [String] Search query
+      # @param limit [Integer] Maximum results
+      # @param metadata [Hash] Filter by metadata fields (default: {})
+      # @return [Array<Hash>] Matching nodes
+      #
+      def search_fulltext_uncached(timeframe:, query:, limit:, metadata: {})
+        # Build filter conditions
+        timeframe_condition = HTM::SqlBuilder.timeframe_condition(timeframe)
+        metadata_condition = HTM::SqlBuilder.metadata_condition(metadata)
+        additional_conditions = []
+        additional_conditions << timeframe_condition if timeframe_condition
+        additional_conditions << metadata_condition if metadata_condition
+        additional_sql = additional_conditions.any? ? "AND #{additional_conditions.join(' AND ')}" : ""
+        # Combined tsvector + trigram search
+        # tsvector matches get boosted score, trigram provides fuzzy fallback
+        sql = <<~SQL
+          WITH tsvector_matches AS (
+            -- Primary: tsvector full-text search (stemmed word matching)
+            SELECT id, content, access_count, created_at, token_count,
+                   (? + ts_rank(to_tsvector('english', content), plainto_tsquery('english', ?))) as score,
+                   'tsvector' as match_type
+            FROM nodes
+            WHERE deleted_at IS NULL
+            AND to_tsvector('english', content) @@ plainto_tsquery('english', ?)
+            #{additional_sql}
+          ),
+          trigram_matches AS (
+            -- Fallback: trigram fuzzy matching (typos, partial words)
+            SELECT id, content, access_count, created_at, token_count,
+                   similarity(content, ?) as score,
+                   'trigram' as match_type
+            FROM nodes
+            WHERE deleted_at IS NULL
+            AND similarity(content, ?) >= ?
+            AND id NOT IN (SELECT id FROM tsvector_matches)
+            #{additional_sql}
+          ),
+          combined AS (
+            SELECT * FROM tsvector_matches
+            UNION ALL
+            SELECT * FROM trigram_matches
+          )
+          SELECT id, content, access_count, created_at, token_count,
+                 MAX(score) as rank, match_type
+          FROM combined
+          GROUP BY id, content, access_count, created_at, token_count, match_type
+          ORDER BY rank DESC
+          LIMIT ?
+        SQL
+        result = ActiveRecord::Base.connection.select_all(
+          ActiveRecord::Base.sanitize_sql_array([
+            sql,
+            TSVECTOR_SCORE_BOOST,  # boost for tsvector
+            query,                  # ts_rank query
+            query,                  # tsvector match query
+            query,                  # trigram similarity query
+            query,                  # trigram match query
+            TRIGRAM_SIMILARITY_THRESHOLD,
+            limit
+          ])
+        )
+        # Track access for retrieved nodes
+        node_ids = result.map { |r| r['id'] }
+        track_access(node_ids)
+        result.to_a
+      end
+    end
+  end
+end