RubyGems - ragnar-cli - Versions diffs - 0.1.0.pre.3 → 0.1.0.pre.5 - Mend

ragnar-cli 0.1.0.pre.3 → 0.1.0.pre.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/README.md +249 -41
data/lib/ragnar/cli.rb +563 -219
data/lib/ragnar/cli_umap.rb +86 -0
data/lib/ragnar/cli_visualization.rb +184 -0
data/lib/ragnar/config.rb +320 -0
data/lib/ragnar/database.rb +94 -8
data/lib/ragnar/embedder.rb +1 -1
data/lib/ragnar/indexer.rb +4 -2
data/lib/ragnar/llm_manager.rb +31 -27
data/lib/ragnar/query_processor.rb +123 -70
data/lib/ragnar/query_rewriter.rb +21 -18
data/lib/ragnar/topic_modeling.rb +13 -10
data/lib/ragnar/umap_processor.rb +131 -95
data/lib/ragnar/umap_transform_service.rb +169 -88
data/lib/ragnar/version.rb +1 -1
data/lib/ragnar.rb +3 -1
metadata +71 -30
data/lib/ragnar/topic_modeling/engine.rb +0 -301
data/lib/ragnar/topic_modeling/labeling_strategies.rb +0 -300
data/lib/ragnar/topic_modeling/llm_adapter.rb +0 -131
data/lib/ragnar/topic_modeling/metrics.rb +0 -186
data/lib/ragnar/topic_modeling/term_extractor.rb +0 -170
data/lib/ragnar/topic_modeling/topic.rb +0 -117
data/lib/ragnar/topic_modeling/topic_labeler.rb +0 -61

data/lib/ragnar/database.rb CHANGED Viewed

@@ -5,6 +5,7 @@ module Ragnar
     def initialize(db_path, table_name: "documents")
       @db_path = db_path
       @table_name = table_name
+      @dataset_cache = nil  # Cache to prevent file descriptor leaks
       ensure_database_exists
     end
@@ -34,16 +35,23 @@ module Ragnar
         metadata: :string
       }
+      # Clear cache before modifying dataset
+      clear_dataset_cache
       # Use the new open_or_create method from Lancelot
       # This automatically handles both creating new and opening existing datasets
       dataset = Lancelot::Dataset.open_or_create(@db_path, schema: schema)
       dataset.add_documents(data)
+      # Clear cache after modification to ensure fresh data on next read
+      clear_dataset_cache
     end
     def get_embeddings(limit: nil, offset: 0)
       return [] unless dataset_exists?
-      dataset = Lancelot::Dataset.open(@db_path)
+      dataset = cached_dataset
+      return [] unless dataset
       # Get all documents or a subset
       docs = if limit && offset > 0
@@ -67,7 +75,8 @@ module Ragnar
     def update_reduced_embeddings(updates)
       return if updates.empty?
-      dataset = Lancelot::Dataset.open(@db_path)
+      dataset = cached_dataset
+      return unless dataset
       # Get all existing documents and safely extract their data
       all_docs = dataset.to_a.map do |doc|
@@ -113,17 +122,24 @@ module Ragnar
         metadata: :string
       }
+      # Clear cache before recreating dataset
+      clear_dataset_cache
       # Remove old dataset and create new one with updated data
       FileUtils.rm_rf(@db_path)
       # Use open_or_create which will create since we just deleted the path
       dataset = Lancelot::Dataset.open_or_create(@db_path, schema: schema)
       dataset.add_documents(updated_docs)
+      # Clear cache after modification
+      clear_dataset_cache
     end
     def search_similar(embedding, k: 10, use_reduced: false)
       return [] unless dataset_exists?
-      dataset = Lancelot::Dataset.open(@db_path)
+      dataset = cached_dataset
+      return [] unless dataset
       embedding_field = use_reduced ? :reduced_embedding : :embedding
@@ -149,7 +165,9 @@ module Ragnar
     def count
       return 0 unless dataset_exists?
-      dataset = Lancelot::Dataset.open(@db_path)
+      dataset = cached_dataset
+      return 0 unless dataset
       dataset.to_a.size
     end
@@ -166,7 +184,18 @@ module Ragnar
         }
       end
-      dataset = Lancelot::Dataset.open(@db_path)
+      dataset = cached_dataset
+      unless dataset
+        return {
+          document_count: 0,
+          total_documents: 0,
+          unique_files: 0,
+          total_chunks: 0,
+          with_embeddings: 0,
+          with_reduced_embeddings: 0,
+          total_size_mb: 0.0
+        }
+      end
       # Get all documents
       all_docs = dataset.to_a
@@ -214,7 +243,9 @@ module Ragnar
     def get_all_documents_with_embeddings(limit: nil)
       return [] unless dataset_exists?
-      dataset = Lancelot::Dataset.open(@db_path)
+      dataset = cached_dataset
+      return [] unless dataset
       all_docs = limit ? dataset.first(limit) : dataset.to_a
       all_docs.select { |doc| doc[:embedding] && !doc[:embedding].empty? }
@@ -223,7 +254,8 @@ module Ragnar
     def full_text_search(query, limit: 10)
       return [] unless dataset_exists?
-      dataset = Lancelot::Dataset.open(@db_path)
+      dataset = cached_dataset
+      return [] unless dataset
       # Use Lancelot's full-text search
       results = dataset.full_text_search(
@@ -243,11 +275,49 @@ module Ragnar
       end
     end
+    # Get the total number of documents in the database
+    def document_count
+      count
+    end
+    # Get documents by their IDs
+    # @param ids [Array<String>] Document IDs to fetch
+    # @return [Array<Hash>] Documents with their embeddings
+    def get_documents_by_ids(ids)
+      return [] if ids.empty? || !dataset_exists?
+      dataset = cached_dataset
+      return [] unless dataset
+      # Create ID lookup set for efficiency
+      id_set = ids.to_set
+      # Filter documents by IDs
+      dataset.to_a.select { |doc| id_set.include?(doc[:id]) }.map do |doc|
+        {
+          id: doc[:id],
+          chunk_text: doc[:chunk_text],
+          file_path: doc[:file_path],
+          chunk_index: doc[:chunk_index],
+          embedding: doc[:embedding],
+          reduced_embedding: doc[:reduced_embedding],
+          metadata: JSON.parse(doc[:metadata] || "{}")
+        }
+      end
+    end
     def dataset_exists?
       return false unless File.exist?(@db_path)
+      # Try to use cached dataset if available
+      if @dataset_cache
+        return true
+      end
+      # Otherwise check if we can open it
       begin
-        Lancelot::Dataset.open(@db_path)
+        # Don't cache here, just check existence
+        dataset = Lancelot::Dataset.open(@db_path)
         true
       rescue
         false
@@ -263,5 +333,21 @@ module Ragnar
     def table_exists?
       dataset_exists?
     end
+    # Cached dataset accessor to prevent file descriptor leaks
+    def cached_dataset
+      return nil unless File.exist?(@db_path)
+      @dataset_cache ||= begin
+        Lancelot::Dataset.open(@db_path)
+      rescue => e
+        nil
+      end
+    end
+    # Clear the cached dataset (e.g., after modifications)
+    def clear_dataset_cache
+      @dataset_cache = nil
+    end
   end
 end

data/lib/ragnar/embedder.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module Ragnar
     def embed_batch(texts, show_progress: true)
       embeddings = []
-      if show_progress
+      if show_progress && $stdout.respond_to?(:ioctl)
         progressbar = TTY::ProgressBar.new(
           "Generating embeddings [:bar] :percent :current/:total",
           total: texts.size,

data/lib/ragnar/indexer.rb CHANGED Viewed

@@ -31,7 +31,7 @@ module Ragnar
       puts "Found #{files.size} file(s) to process" if @show_progress
-      file_progress = if @show_progress
+      file_progress = if @show_progress && $stdout.respond_to?(:ioctl)
         TTY::ProgressBar.new(
           "Processing [:bar] :percent :current/:total - :filename",
           total: files.size,
@@ -43,13 +43,15 @@ module Ragnar
         nil
       end
-      files.each do |file_path|
+      files.each_with_index do |file_path, idx|
         begin
           if file_progress
             # Update the progress bar with current filename
             filename = File.basename(file_path)
             filename = filename[0..27] + "..." if filename.length > 30
             file_progress.advance(0, filename: filename)
+          elsif @show_progress
+            puts "Processing (#{idx + 1}/#{files.size}): #{File.basename(file_path)}"
           end
           process_file(file_path, stats, file_progress)

data/lib/ragnar/llm_manager.rb CHANGED Viewed

@@ -1,43 +1,47 @@
 module Ragnar
-  # Singleton manager for LLM instances to avoid reloading models
+  # Singleton manager for RubyLLM chat instances to avoid reloading models.
+  # Supports any RubyLLM provider (red_candle for local, openai, anthropic, etc.)
   class LLMManager
     include Singleton
     def initialize
-      @llms = {}
+      @chats = {}
       @mutex = Mutex.new
     end
-    # Get or create an LLM instance
-    # @param model_id [String] The model identifier
-    # @param gguf_file [String, nil] Optional GGUF file for quantized models
-    # @return [Candle::LLM] The LLM instance
-    def get_llm(model_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-                gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
-      cache_key = "#{model_id}:#{gguf_file}"
+    # Get or create a RubyLLM chat instance
+    # @param provider [String, Symbol] The RubyLLM provider (default from config)
+    # @param model [String] The model identifier (default from config)
+    # @return [RubyLLM::Chat] A cached chat instance
+    def get_chat(provider: nil, model: nil)
+      config = Config.instance
+      provider ||= config.llm_provider
+      model ||= config.llm_model
+      cache_key = "#{provider}:#{model}"
       @mutex.synchronize do
-        @llms[cache_key] ||= begin
-          puts "Loading LLM: #{model_id}..." unless @llms.key?(cache_key)
-          if gguf_file
-            Candle::LLM.from_pretrained(model_id, gguf_file: gguf_file)
-          else
-            Candle::LLM.from_pretrained(model_id)
-          end
+        @chats[cache_key] ||= begin
+          puts "Loading LLM: #{model} (#{provider})..." if ENV['DEBUG']
+          Config.instance.create_chat
         end
       end
     end
-    # Clear all cached models (useful for memory management)
+    # Clear all cached chat instances (useful for memory management)
     def clear_cache
       @mutex.synchronize do
-        @llms.clear
+        @chats.clear
       end
     end
-    # Get the default LLM for the application
-    def default_llm
-      get_llm
+    # Get the default chat instance for the application
+    def default_chat
+      get_chat
     end
+    # Backwards compatibility aliases
+    alias_method :get_llm, :get_chat
+    alias_method :default_llm, :default_chat
   end
-end
+end

data/lib/ragnar/query_processor.rb CHANGED Viewed

@@ -16,29 +16,55 @@ module Ragnar
       @reranker = nil # Will initialize when needed
     end
-    def query(user_query, top_k: 3, verbose: false)
+    def query(user_query, top_k: 3, verbose: false, enable_rewriting: true, enable_reranking: false)
       puts "Processing query: #{user_query}" if verbose
-      # Step 1: Rewrite and analyze the query
-      puts "\n#{'-'*60}" if verbose
-      puts "STEP 1: Query Analysis & Rewriting" if verbose
-      puts "-"*60 if verbose
-      rewritten = @rewriter.rewrite(user_query)
-      if verbose
-        puts "\nOriginal Query: #{user_query}"
-        puts "\nRewritten Query Analysis:"
-        puts "  Clarified Intent: #{rewritten['clarified_intent']}"
-        puts "  Query Type: #{rewritten['query_type']}"
-        puts "  Context Needed: #{rewritten['context_needed']}"
-        puts "\nGenerated Sub-queries (#{rewritten['sub_queries'].length}):"
-        rewritten['sub_queries'].each_with_index do |sq, idx|
-          puts "  #{idx + 1}. #{sq}"
+      # Step 1: Rewrite and analyze the query (if enabled)
+      if enable_rewriting
+        puts "\n#{'-'*60}" if verbose
+        puts "STEP 1: Query Analysis & Rewriting" if verbose
+        puts "-"*60 if verbose
+        rewritten = @rewriter.rewrite(user_query)
+        # Always include the original query in sub-queries to ensure direct matches
+        # are found regardless of how the rewriter reformulates
+        sub_queries = rewritten['sub_queries'] || []
+        unless sub_queries.include?(user_query)
+          sub_queries.unshift(user_query)
         end
-        if rewritten['key_terms'] && !rewritten['key_terms'].empty?
-          puts "\nKey Terms Identified:"
-          puts "  #{rewritten['key_terms'].join(', ')}"
+        rewritten['sub_queries'] = sub_queries
+        if verbose
+          puts "\nOriginal Query: #{user_query}"
+          puts "\nRewritten Query Analysis:"
+          puts "  Clarified Intent: #{rewritten['clarified_intent']}"
+          puts "  Query Type: #{rewritten['query_type']}"
+          puts "  Context Needed: #{rewritten['context_needed']}"
+          puts "\nGenerated Sub-queries (#{rewritten['sub_queries'].length}):"
+          rewritten['sub_queries'].each_with_index do |sq, idx|
+            puts "  #{idx + 1}. #{sq}"
+          end
+          if rewritten['key_terms'] && !rewritten['key_terms'].empty?
+            puts "\nKey Terms Identified:"
+            puts "  #{rewritten['key_terms'].join(', ')}"
+          end
+        end
+      else
+        # Skip rewriting - use original query directly
+        rewritten = {
+          'clarified_intent' => user_query,
+          'query_type' => 'direct',
+          'context_needed' => 'general',
+          'sub_queries' => [user_query],
+          'key_terms' => []
+        }
+        if verbose
+          puts "\n#{'-'*60}"
+          puts "STEP 1: Query Analysis (Rewriting Disabled)"
+          puts "-"*60
+          puts "\nUsing original query directly"
         end
       end
@@ -77,18 +103,25 @@ module Ragnar
         puts "-"*60
       end
-      reranked = rerank_documents(
-        query: rewritten['clarified_intent'],
-        documents: candidates,
-        top_k: top_k * 2  # Get more than we need for context
-      )
+      if enable_reranking
+        reranked = rerank_documents(
+          query: user_query,
+          documents: candidates,
+          top_k: top_k * 2
+        )
+      else
+        # Use retrieval order (RRF scores) directly — often more reliable than
+        # small cross-encoder rerankers on domain-specific corpora
+        reranked = candidates
+      end
       if verbose && reranked.any?
-        puts "\nTop Reranked Documents:"
+        puts "\nTop #{enable_reranking ? 'Reranked' : 'Retrieved'} Documents:"
         reranked[0..2].each_with_index do |doc, idx|
           full_text = (doc[:chunk_text] || doc[:text] || "").gsub(/\s+/, ' ')
           puts "  #{idx + 1}. [#{File.basename(doc[:file_path] || 'unknown')}]"
           puts "     Score: #{doc[:score]&.round(4) if doc[:score]}"
+          puts "     Distance: #{doc[:distance]&.round(4) if doc[:distance]}"
           puts "     Full chunk (#{full_text.length} chars):"
           puts "     \"#{full_text}\""
           puts ""
@@ -156,12 +189,12 @@ module Ragnar
         query: user_query,
         clarified: rewritten['clarified_intent'],
         answer: response,
-        sources: context_docs.map { |d|
+        sources: context_docs.map { |d|
           {
-            source_file: d[:file_path] || d[:source_file],
-            chunk_index: d[:chunk_index]
+            source_file: d[:file_path] || d[:source_file] || d["file_path"],
+            chunk_index: d[:chunk_index] || d["chunk_index"]
           }
-        },
+        }.reject { |s| s[:source_file].nil? },
         sub_queries: rewritten['sub_queries'],
         confidence: calculate_confidence(reranked[0...top_k])
       }
@@ -242,22 +275,43 @@ module Ragnar
           k: k,
           use_reduced: use_reduced
         )
         if verbose
-          puts "  Found #{vector_results.length} matches"
+          puts "  Vector search: #{vector_results.length} matches"
           if vector_results.any?
             best = vector_results.first
-            puts "  Best match: [#{File.basename(best[:file_path] || 'unknown')}] (distance: #{best[:distance]&.round(3)})"
+            puts "  Best vector match: [#{File.basename(best[:file_path] || 'unknown')}] (distance: #{best[:distance]&.round(3)})"
           end
         end
         # Add query index for RRF
         vector_results.each do |result|
           result[:query_idx] = idx
           result[:retrieval_method] = :vector
         end
         all_results.concat(vector_results)
+        # Full-text search for keyword matching (hybrid search)
+        begin
+          fts_results = @database.full_text_search(query, limit: k)
+          if verbose && fts_results.any?
+            puts "  FTS: #{fts_results.length} matches"
+            best_fts = fts_results.first
+            puts "  Best FTS match: [#{File.basename(best_fts[:file_path] || 'unknown')}]"
+          end
+          fts_results.each_with_index do |result, rank|
+            # Synthesize a distance from FTS rank (lower rank = better match)
+            result[:distance] = 0.1 + (rank * 0.05)
+            result[:query_idx] = idx
+            result[:retrieval_method] = :fts
+          end
+          all_results.concat(fts_results)
+        rescue => e
+          puts "  FTS unavailable: #{e.message}" if verbose
+        end
       end
       if verbose
@@ -281,10 +335,18 @@ module Ragnar
       results.each do |result|
         doc_id = result[:id]
-        doc_scores[doc_id] ||= {
-          score: 0.0,
-          document: result
-        }
+        if doc_scores[doc_id]
+          # Prefer the document with more complete metadata
+          existing = doc_scores[doc_id][:document]
+          if result[:file_path] && !existing[:file_path]
+            doc_scores[doc_id][:document] = result
+          end
+        else
+          doc_scores[doc_id] = {
+            score: 0.0,
+            document: result
+          }
+        end
         # RRF formula: 1 / (k + rank)
         # Using distance as a proxy for rank (lower distance = better rank)
@@ -319,14 +381,14 @@ module Ragnar
       # Initialize reranker if not already done
       @reranker ||= Candle::Reranker.from_pretrained(
-        "cross-encoder/ms-marco-MiniLM-L-12-v2"
+        Config.instance.reranker_model
       )
       # Prepare document texts - use chunk_text field
       texts = unique_docs.map { |doc| doc[:chunk_text] || doc[:text] || "" }
-      # Rerank - returns array of {doc_id:, score:, text:}
-      reranked = @reranker.rerank(query, texts)
+      # Rerank - use raw logits (no sigmoid) for better score separation
+      reranked = @reranker.rerank(query, texts, apply_sigmoid: false)
       # Map back to original documents with scores
       reranked.map do |result|
@@ -343,46 +405,37 @@ module Ragnar
       # In the future, we could fetch neighboring chunks for more context
       context_size = case context_needed
                      when "extensive" then 5
-                     when "moderate" then 3
-                     else 2
+                     when "moderate" then 4
+                     else 3
                      end
       documents.first(context_size)
     end
     def generate_response(query:, repacked_context:, query_type:)
-      # Get cached LLM from manager
-      llm = @llm_manager.default_llm
-      # Create prompt with repacked context
-      prompt = build_prompt(query, repacked_context, query_type)
-      # Generate response using default config
-      llm.generate(prompt)
+      # Create a fresh chat for each query to avoid conversation history bleed
+      chat = Config.instance.create_chat
+      chat.with_instructions(
+        "You are a helpful assistant. Answer questions based ONLY on the provided context. " \
+        "If the answer is not in the context, say \"I don't have enough information to answer that question.\" " \
+        "Be concise and direct. /no_think"
+      )
+      prompt = "Context:\n#{repacked_context}\n\nQuestion: #{query}"
+      response = chat.ask(prompt).content
+      # Strip <think>...</think> blocks that some models (e.g. Qwen3) include
+      strip_think_tags(response)
     rescue => e
       # Fallback to returning the repacked context
       puts "Warning: LLM generation failed (#{e.message})"
       "Based on the retrieved information:\n\n#{repacked_context[0..500]}..."
     end
-    def build_prompt(query, context, query_type)
-      base_prompt = <<~PROMPT
-        <|system|>
-        You are a helpful assistant. Answer questions based ONLY on the provided context.
-        If the answer is not in the context, say "I don't have enough information to answer that question."
-        </s>
-        <|user|>
-        Context:
-        #{context}
-        Question: #{query}
-        </s>
-        <|assistant|>
-      PROMPT
-      base_prompt
+    def strip_think_tags(text)
+      return text unless text
+      text.gsub(/<think>.*?<\/think>/m, '').strip
     end
     def calculate_confidence(documents)
       return 0.0 if documents.empty?

data/lib/ragnar/query_rewriter.rb CHANGED Viewed

@@ -3,11 +3,11 @@ module Ragnar
     def initialize(llm_manager: nil)
       @llm_manager = llm_manager || LLMManager.instance
     end
     def rewrite(query)
-      # Get the cached LLM
-      model = @llm_manager.default_llm
+      # Create a fresh chat for each rewrite to avoid conversation history bleed
+      chat = Config.instance.create_chat
       # Define the JSON schema for structured output
       schema = {
         type: "object",
@@ -41,25 +41,28 @@ module Ragnar
         },
         required: ["clarified_intent", "query_type", "sub_queries", "key_terms", "context_needed"]
       }
       prompt = <<~PROMPT
         Analyze the following user query and break it down for retrieval-augmented generation.
         Focus on understanding the user's intent and creating effective sub-queries for searching.
         User Query: #{query}
-        Provide a structured analysis that will help retrieve the most relevant documents.
+        Provide a structured analysis that will help retrieve the most relevant documents. /no_think
       PROMPT
       begin
-        # Use structured generation with schema
-        result = model.generate_structured(
-          prompt,
-          schema: schema
-        )
-        # The result should already be a JSON string
-        JSON.parse(result)
+        response = chat.with_schema(schema).ask(prompt)
+        result = response.content
+        # RubyLLM with_schema returns parsed content; handle both String and Hash
+        if result.is_a?(String)
+          JSON.parse(result)
+        elsif result.is_a?(Hash)
+          result.transform_keys(&:to_s)
+        else
+          result
+        end
       rescue => e
         # Fallback to simple rewriting if structured generation fails
         {
@@ -72,4 +75,4 @@ module Ragnar
       end
     end
   end
-end
+end