RubyGems - ragnar-cli - Versions diffs - 0.1.0.pre.4 → 0.1.0.pre.5 - Mend

ragnar-cli 0.1.0.pre.4 → 0.1.0.pre.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/README.md +99 -42
data/lib/ragnar/cli.rb +94 -105
data/lib/ragnar/cli_umap.rb +86 -0
data/lib/ragnar/config.rb +101 -7
data/lib/ragnar/embedder.rb +1 -1
data/lib/ragnar/indexer.rb +4 -2
data/lib/ragnar/llm_manager.rb +31 -30
data/lib/ragnar/query_processor.rb +87 -52
data/lib/ragnar/query_rewriter.rb +21 -18
data/lib/ragnar/umap_processor.rb +54 -30
data/lib/ragnar/umap_transform_service.rb +1 -1
data/lib/ragnar/version.rb +1 -1
data/lib/ragnar.rb +3 -1
metadata +36 -16

data/lib/ragnar/config.rb CHANGED Viewed

@@ -57,12 +57,74 @@ module Ragnar
       get('embeddings.chunk_overlap', Ragnar::DEFAULT_CHUNK_OVERLAP)
     end
+    # LLM Profile support
+    # Profiles allow switching between LLM providers/models via --profile flag
+    # Backwards compatible: flat llm.provider/llm.default_model still work if no profiles defined
+    def set_active_profile(name)
+      name = name.to_s
+      profiles = llm_profiles
+      unless profiles.key?(name)
+        available = profiles.keys.join(', ')
+        raise ArgumentError, "Unknown profile '#{name}'. Available profiles: #{available}"
+      end
+      @active_profile = name
+    end
+    def llm_profile_name
+      @active_profile || get('llm.default_profile', nil) || llm_profiles.keys.first || 'default'
+    end
+    def llm_profiles
+      configured = get('llm.profiles', nil)
+      if configured.is_a?(Hash) && !configured.empty?
+        configured
+      else
+        # Backwards compat: synthesize a profile from flat keys
+        {
+          'default' => {
+            'provider' => get('llm.provider', 'red_candle'),
+            'model' => get('llm.default_model', 'MaziyarPanahi/Qwen3-4B-GGUF')
+          }
+        }
+      end
+    end
+    def llm_profile
+      llm_profiles[llm_profile_name] || llm_profiles.values.first
+    end
+    def available_profiles
+      llm_profiles.keys
+    end
+    # Create a new RubyLLM chat instance with the active profile's settings
+    def create_chat
+      api_key = llm_api_key
+      provider = llm_provider.to_sym
+      # Configure RubyLLM with the API key if present
+      if api_key
+        configure_provider_api_key(provider, api_key)
+      end
+      RubyLLM.chat(provider: provider, model: llm_model)
+    end
+    def llm_provider
+      llm_profile&.dig('provider') || get('llm.provider', 'red_candle')
+    end
     def llm_model
-      get('llm.default_model', "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
+      llm_profile&.dig('model') || get('llm.default_model', 'MaziyarPanahi/Qwen3-4B-GGUF')
     end
     def llm_gguf_file
-      get('llm.default_gguf_file', "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf")
+      get('llm.default_gguf_file', "Qwen3-4B.Q4_K_M.gguf")
+    end
+    def llm_api_key
+      llm_profile&.dig('api_key') || get('llm.api_key', nil)
     end
     def interactive_prompt
@@ -84,6 +146,14 @@ module Ragnar
     def enable_query_rewriting?
       get('query.enable_query_rewriting', true)
     end
+    def enable_reranking?
+      get('query.enable_reranking', true)
+    end
+    def reranker_model
+      get('query.reranker_model', 'BAAI/bge-reranker-base')
+    end
     # Config file management
     def config_file_path
@@ -121,12 +191,27 @@ module Ragnar
           'model_filename' => 'umap_model.bin'
         },
         'llm' => {
-          'default_model' => 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
-          'default_gguf_file' => 'tinyllama-1.1b-chat-v1.0.q4_k_m.gguf'
+          'default_profile' => 'red_candle',
+          'profiles' => {
+            'red_candle' => {
+              'provider' => 'red_candle',
+              'model' => 'MaziyarPanahi/Qwen3-4B-GGUF'
+            },
+            'opus' => {
+              'provider' => 'anthropic',
+              'model' => 'claude-opus-4-6'
+            },
+            'sonnet' => {
+              'provider' => 'anthropic',
+              'model' => 'claude-sonnet-4-6'
+            }
+          }
         },
         'query' => {
           'top_k' => 3,
-          'enable_query_rewriting' => true
+          'enable_query_rewriting' => true,
+          'enable_reranking' => true,
+          'reranker_model' => 'BAAI/bge-reranker-base'
         },
         'interactive' => {
           'prompt' => 'ragnar> ',
@@ -146,7 +231,16 @@ module Ragnar
     end
     private
+    def configure_provider_api_key(provider, api_key)
+      case provider
+      when :anthropic
+        RubyLLM.configure { |c| c.anthropic_api_key = api_key }
+      when :openai
+        RubyLLM.configure { |c| c.openai_api_key = api_key }
+      end
+    end
     def load_config
       @config_file_path = find_config_file

data/lib/ragnar/embedder.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module Ragnar
     def embed_batch(texts, show_progress: true)
       embeddings = []
-      if show_progress
+      if show_progress && $stdout.respond_to?(:ioctl)
         progressbar = TTY::ProgressBar.new(
           "Generating embeddings [:bar] :percent :current/:total",
           total: texts.size,

data/lib/ragnar/indexer.rb CHANGED Viewed

@@ -31,7 +31,7 @@ module Ragnar
       puts "Found #{files.size} file(s) to process" if @show_progress
-      file_progress = if @show_progress
+      file_progress = if @show_progress && $stdout.respond_to?(:ioctl)
         TTY::ProgressBar.new(
           "Processing [:bar] :percent :current/:total - :filename",
           total: files.size,
@@ -43,13 +43,15 @@ module Ragnar
         nil
       end
-      files.each do |file_path|
+      files.each_with_index do |file_path, idx|
         begin
           if file_progress
             # Update the progress bar with current filename
             filename = File.basename(file_path)
             filename = filename[0..27] + "..." if filename.length > 30
             file_progress.advance(0, filename: filename)
+          elsif @show_progress
+            puts "Processing (#{idx + 1}/#{files.size}): #{File.basename(file_path)}"
           end
           process_file(file_path, stats, file_progress)

data/lib/ragnar/llm_manager.rb CHANGED Viewed

@@ -1,46 +1,47 @@
 module Ragnar
-  # Singleton manager for LLM instances to avoid reloading models
+  # Singleton manager for RubyLLM chat instances to avoid reloading models.
+  # Supports any RubyLLM provider (red_candle for local, openai, anthropic, etc.)
   class LLMManager
     include Singleton
     def initialize
-      @llms = {}
+      @chats = {}
       @mutex = Mutex.new
     end
-    # Get or create an LLM instance
-    # @param model_id [String] The model identifier
-    # @param gguf_file [String, nil] Optional GGUF file for quantized models
-    # @return [Candle::LLM] The LLM instance
-    def get_llm(model_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-                gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
-      cache_key = "#{model_id}:#{gguf_file}"
+    # Get or create a RubyLLM chat instance
+    # @param provider [String, Symbol] The RubyLLM provider (default from config)
+    # @param model [String] The model identifier (default from config)
+    # @return [RubyLLM::Chat] A cached chat instance
+    def get_chat(provider: nil, model: nil)
+      config = Config.instance
+      provider ||= config.llm_provider
+      model ||= config.llm_model
+      cache_key = "#{provider}:#{model}"
       @mutex.synchronize do
-        @llms[cache_key] ||= begin
-          # Only show loading message if not in interactive mode or if verbose
-          show_loading = ENV['DEBUG'] # Only show in debug mode for now
-          puts "Loading LLM: #{model_id}..." if show_loading && !@llms.key?(cache_key)
-          if gguf_file
-            Candle::LLM.from_pretrained(model_id, gguf_file: gguf_file)
-          else
-            Candle::LLM.from_pretrained(model_id)
-          end
+        @chats[cache_key] ||= begin
+          puts "Loading LLM: #{model} (#{provider})..." if ENV['DEBUG']
+          Config.instance.create_chat
         end
       end
     end
-    # Clear all cached models (useful for memory management)
+    # Clear all cached chat instances (useful for memory management)
     def clear_cache
       @mutex.synchronize do
-        @llms.clear
+        @chats.clear
       end
     end
-    # Get the default LLM for the application
-    def default_llm
-      get_llm
+    # Get the default chat instance for the application
+    def default_chat
+      get_chat
     end
+    # Backwards compatibility aliases
+    alias_method :get_llm, :get_chat
+    alias_method :default_llm, :default_chat
   end
-end
+end

data/lib/ragnar/query_processor.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module Ragnar
       @reranker = nil # Will initialize when needed
     end
-    def query(user_query, top_k: 3, verbose: false, enable_rewriting: true)
+    def query(user_query, top_k: 3, verbose: false, enable_rewriting: true, enable_reranking: false)
       puts "Processing query: #{user_query}" if verbose
       # Step 1: Rewrite and analyze the query (if enabled)
@@ -26,7 +26,15 @@ module Ragnar
         puts "-"*60 if verbose
         rewritten = @rewriter.rewrite(user_query)
+        # Always include the original query in sub-queries to ensure direct matches
+        # are found regardless of how the rewriter reformulates
+        sub_queries = rewritten['sub_queries'] || []
+        unless sub_queries.include?(user_query)
+          sub_queries.unshift(user_query)
+        end
+        rewritten['sub_queries'] = sub_queries
         if verbose
           puts "\nOriginal Query: #{user_query}"
           puts "\nRewritten Query Analysis:"
@@ -95,18 +103,25 @@ module Ragnar
         puts "-"*60
       end
-      reranked = rerank_documents(
-        query: rewritten['clarified_intent'],
-        documents: candidates,
-        top_k: top_k * 2  # Get more than we need for context
-      )
+      if enable_reranking
+        reranked = rerank_documents(
+          query: user_query,
+          documents: candidates,
+          top_k: top_k * 2
+        )
+      else
+        # Use retrieval order (RRF scores) directly — often more reliable than
+        # small cross-encoder rerankers on domain-specific corpora
+        reranked = candidates
+      end
       if verbose && reranked.any?
-        puts "\nTop Reranked Documents:"
+        puts "\nTop #{enable_reranking ? 'Reranked' : 'Retrieved'} Documents:"
         reranked[0..2].each_with_index do |doc, idx|
           full_text = (doc[:chunk_text] || doc[:text] || "").gsub(/\s+/, ' ')
           puts "  #{idx + 1}. [#{File.basename(doc[:file_path] || 'unknown')}]"
           puts "     Score: #{doc[:score]&.round(4) if doc[:score]}"
+          puts "     Distance: #{doc[:distance]&.round(4) if doc[:distance]}"
           puts "     Full chunk (#{full_text.length} chars):"
           puts "     \"#{full_text}\""
           puts ""
@@ -174,12 +189,12 @@ module Ragnar
         query: user_query,
         clarified: rewritten['clarified_intent'],
         answer: response,
-        sources: context_docs.map { |d|
+        sources: context_docs.map { |d|
           {
-            source_file: d[:file_path] || d[:source_file],
-            chunk_index: d[:chunk_index]
+            source_file: d[:file_path] || d[:source_file] || d["file_path"],
+            chunk_index: d[:chunk_index] || d["chunk_index"]
           }
-        },
+        }.reject { |s| s[:source_file].nil? },
         sub_queries: rewritten['sub_queries'],
         confidence: calculate_confidence(reranked[0...top_k])
       }
@@ -260,22 +275,43 @@ module Ragnar
           k: k,
           use_reduced: use_reduced
         )
         if verbose
-          puts "  Found #{vector_results.length} matches"
+          puts "  Vector search: #{vector_results.length} matches"
           if vector_results.any?
             best = vector_results.first
-            puts "  Best match: [#{File.basename(best[:file_path] || 'unknown')}] (distance: #{best[:distance]&.round(3)})"
+            puts "  Best vector match: [#{File.basename(best[:file_path] || 'unknown')}] (distance: #{best[:distance]&.round(3)})"
           end
         end
         # Add query index for RRF
         vector_results.each do |result|
           result[:query_idx] = idx
           result[:retrieval_method] = :vector
         end
         all_results.concat(vector_results)
+        # Full-text search for keyword matching (hybrid search)
+        begin
+          fts_results = @database.full_text_search(query, limit: k)
+          if verbose && fts_results.any?
+            puts "  FTS: #{fts_results.length} matches"
+            best_fts = fts_results.first
+            puts "  Best FTS match: [#{File.basename(best_fts[:file_path] || 'unknown')}]"
+          end
+          fts_results.each_with_index do |result, rank|
+            # Synthesize a distance from FTS rank (lower rank = better match)
+            result[:distance] = 0.1 + (rank * 0.05)
+            result[:query_idx] = idx
+            result[:retrieval_method] = :fts
+          end
+          all_results.concat(fts_results)
+        rescue => e
+          puts "  FTS unavailable: #{e.message}" if verbose
+        end
       end
       if verbose
@@ -299,10 +335,18 @@ module Ragnar
       results.each do |result|
         doc_id = result[:id]
-        doc_scores[doc_id] ||= {
-          score: 0.0,
-          document: result
-        }
+        if doc_scores[doc_id]
+          # Prefer the document with more complete metadata
+          existing = doc_scores[doc_id][:document]
+          if result[:file_path] && !existing[:file_path]
+            doc_scores[doc_id][:document] = result
+          end
+        else
+          doc_scores[doc_id] = {
+            score: 0.0,
+            document: result
+          }
+        end
         # RRF formula: 1 / (k + rank)
         # Using distance as a proxy for rank (lower distance = better rank)
@@ -337,14 +381,14 @@ module Ragnar
       # Initialize reranker if not already done
       @reranker ||= Candle::Reranker.from_pretrained(
-        "cross-encoder/ms-marco-MiniLM-L-12-v2"
+        Config.instance.reranker_model
       )
       # Prepare document texts - use chunk_text field
       texts = unique_docs.map { |doc| doc[:chunk_text] || doc[:text] || "" }
-      # Rerank - returns array of {doc_id:, score:, text:}
-      reranked = @reranker.rerank(query, texts)
+      # Rerank - use raw logits (no sigmoid) for better score separation
+      reranked = @reranker.rerank(query, texts, apply_sigmoid: false)
       # Map back to original documents with scores
       reranked.map do |result|
@@ -361,46 +405,37 @@ module Ragnar
       # In the future, we could fetch neighboring chunks for more context
       context_size = case context_needed
                      when "extensive" then 5
-                     when "moderate" then 3
-                     else 2
+                     when "moderate" then 4
+                     else 3
                      end
       documents.first(context_size)
     end
     def generate_response(query:, repacked_context:, query_type:)
-      # Get cached LLM from manager
-      llm = @llm_manager.default_llm
-      # Create prompt with repacked context
-      prompt = build_prompt(query, repacked_context, query_type)
-      # Generate response using default config
-      llm.generate(prompt)
+      # Create a fresh chat for each query to avoid conversation history bleed
+      chat = Config.instance.create_chat
+      chat.with_instructions(
+        "You are a helpful assistant. Answer questions based ONLY on the provided context. " \
+        "If the answer is not in the context, say \"I don't have enough information to answer that question.\" " \
+        "Be concise and direct. /no_think"
+      )
+      prompt = "Context:\n#{repacked_context}\n\nQuestion: #{query}"
+      response = chat.ask(prompt).content
+      # Strip <think>...</think> blocks that some models (e.g. Qwen3) include
+      strip_think_tags(response)
     rescue => e
       # Fallback to returning the repacked context
       puts "Warning: LLM generation failed (#{e.message})"
       "Based on the retrieved information:\n\n#{repacked_context[0..500]}..."
     end
-    def build_prompt(query, context, query_type)
-      base_prompt = <<~PROMPT
-        <|system|>
-        You are a helpful assistant. Answer questions based ONLY on the provided context.
-        If the answer is not in the context, say "I don't have enough information to answer that question."
-        </s>
-        <|user|>
-        Context:
-        #{context}
-        Question: #{query}
-        </s>
-        <|assistant|>
-      PROMPT
-      base_prompt
+    def strip_think_tags(text)
+      return text unless text
+      text.gsub(/<think>.*?<\/think>/m, '').strip
     end
     def calculate_confidence(documents)
       return 0.0 if documents.empty?

data/lib/ragnar/query_rewriter.rb CHANGED Viewed

@@ -3,11 +3,11 @@ module Ragnar
     def initialize(llm_manager: nil)
       @llm_manager = llm_manager || LLMManager.instance
     end
     def rewrite(query)
-      # Get the cached LLM
-      model = @llm_manager.default_llm
+      # Create a fresh chat for each rewrite to avoid conversation history bleed
+      chat = Config.instance.create_chat
       # Define the JSON schema for structured output
       schema = {
         type: "object",
@@ -41,25 +41,28 @@ module Ragnar
         },
         required: ["clarified_intent", "query_type", "sub_queries", "key_terms", "context_needed"]
       }
       prompt = <<~PROMPT
         Analyze the following user query and break it down for retrieval-augmented generation.
         Focus on understanding the user's intent and creating effective sub-queries for searching.
         User Query: #{query}
-        Provide a structured analysis that will help retrieve the most relevant documents.
+        Provide a structured analysis that will help retrieve the most relevant documents. /no_think
       PROMPT
       begin
-        # Use structured generation with schema
-        result = model.generate_structured(
-          prompt,
-          schema: schema
-        )
-        # The result should already be a JSON string
-        JSON.parse(result)
+        response = chat.with_schema(schema).ask(prompt)
+        result = response.content
+        # RubyLLM with_schema returns parsed content; handle both String and Hash
+        if result.is_a?(String)
+          JSON.parse(result)
+        elsif result.is_a?(Hash)
+          result.transform_keys(&:to_s)
+        else
+          result
+        end
       rescue => e
         # Fallback to simple rewriting if structured generation fails
         {
@@ -72,4 +75,4 @@ module Ragnar
       end
     end
   end
-end
+end

data/lib/ragnar/umap_processor.rb CHANGED Viewed

@@ -138,45 +138,69 @@ module Ragnar
       # Perform the actual training using the class-based API
       puts "  Training UMAP model (this may take a moment)..."
+      attempts = 0
+      max_attempts = 3
       begin
+        attempts += 1
         @umap_instance = ClusterKit::Dimensionality::UMAP.new(
           n_components: n_components,
           n_neighbors: n_neighbors
         )
         @reduced_embeddings = @umap_instance.fit_transform(embedding_matrix)
         puts "  ✓ UMAP training complete"
-      rescue => e
-        # Provide helpful error message without exposing internal stack trace
-        error_msg = "\n❌ UMAP training failed\n\n"
-        if e.message.include?("index out of bounds")
-          error_msg += "The UMAP algorithm encountered an index out of bounds error.\n\n"
-          error_msg += "This typically happens when:\n"
-          error_msg += "  • The embedding data contains invalid values (NaN, Infinity)\n"
-          error_msg += "  • The parameters are incompatible with your data\n"
-          error_msg += "  • There are duplicate or corrupted embeddings\n\n"
-          error_msg += "Suggested solutions:\n"
-          error_msg += "  1. Try with more conservative parameters:\n"
-          error_msg += "     ragnar train-umap --n-components 10 --n-neighbors 5\n\n"
-          error_msg += "  2. Re-index your documents to regenerate embeddings:\n"
-          error_msg += "     ragnar index <path> --force\n\n"
-          error_msg += "  3. Check your embedding model configuration\n\n"
-          error_msg += "Current parameters:\n"
-          error_msg += "  • n_components: #{n_components}\n"
-          error_msg += "  • n_neighbors: #{n_neighbors}\n"
-          error_msg += "  • embeddings: #{embeddings.size} samples\n"
-          error_msg += "  • dimensions: #{original_dims}\n"
+      rescue Exception => e
+        # Catch Exception (not just StandardError) because Rust panics from
+        # ClusterKit raise fatal errors that bypass the default rescue
+        if e.message.include?("LapackInvalidValue") || e.message.include?("SGESDD") || e.message.include?("illegal value")
+          if attempts < max_attempts
+            # LAPACK SVD can fail with certain dimension combinations — retry with fewer components
+            n_components = [n_components / 2, 2].max
+            n_neighbors = [n_neighbors, n_components - 1, 3].min
+            puts "  ⚠️  LAPACK error, retrying with n_components=#{n_components}, n_neighbors=#{n_neighbors} (attempt #{attempts + 1}/#{max_attempts})..."
+            retry
+          end
+          raise RuntimeError, "\n❌ UMAP training failed due to a LAPACK numerical error.\n\n" \
+            "This can happen with certain data/dimension combinations.\n" \
+            "Try reducing n_components:\n" \
+            "  ragnar umap train --n-components 10 --n-neighbors 5\n\n" \
+            "Current parameters:\n" \
+            "  • n_components: #{n_components}\n" \
+            "  • n_neighbors: #{n_neighbors}\n" \
+            "  • embeddings: #{embeddings.size} samples\n" \
+            "  • dimensions: #{original_dims}\n"
+        elsif e.message.include?("index out of bounds")
+          raise RuntimeError, "\n❌ UMAP training failed\n\n" \
+            "The UMAP algorithm encountered an index out of bounds error.\n\n" \
+            "This typically happens when:\n" \
+            "  • The embedding data contains invalid values (NaN, Infinity)\n" \
+            "  • The parameters are incompatible with your data\n" \
+            "  • There are duplicate or corrupted embeddings\n\n" \
+            "Suggested solutions:\n" \
+            "  1. Try with more conservative parameters:\n" \
+            "     ragnar umap train --n-components 10 --n-neighbors 5\n\n" \
+            "  2. Re-index your documents to regenerate embeddings:\n" \
+            "     ragnar index <path> --force\n\n" \
+            "  3. Check your embedding model configuration\n\n" \
+            "Current parameters:\n" \
+            "  • n_components: #{n_components}\n" \
+            "  • n_neighbors: #{n_neighbors}\n" \
+            "  • embeddings: #{embeddings.size} samples\n" \
+            "  • dimensions: #{original_dims}\n"
+        elsif e.is_a?(StandardError) || e.message.include?("unwrap")
+          raise RuntimeError, "\n❌ UMAP training failed\n\n" \
+            "Error: #{e.message}\n\n" \
+            "This may be due to incompatible parameters or data issues.\n" \
+            "Try using more conservative parameters:\n" \
+            "  ragnar umap train --n-components 10 --n-neighbors 5\n"
         else
-          error_msg += "Error: #{e.message}\n\n"
-          error_msg += "This may be due to incompatible parameters or data issues.\n"
-          error_msg += "Try using more conservative parameters:\n"
-          error_msg += "  ragnar train-umap --n-components 10 --n-neighbors 5\n"
+          # Re-raise non-application exceptions (Interrupt, SignalException, etc.)
+          raise
         end
-        raise RuntimeError, error_msg
       end
       # Store the parameters for saving

data/lib/ragnar/umap_transform_service.rb CHANGED Viewed

@@ -160,7 +160,7 @@ module Ragnar
       return if @umap_model
       unless File.exist?(@model_path)
-        raise "UMAP model not found at #{@model_path}. Please train a model first using 'ragnar train-umap'."
+        raise "UMAP model not found at #{@model_path}. Please train a model first using 'ragnar umap train'."
       end
       @umap_model = ClusterKit::Dimensionality::UMAP.load_model(@model_path)

data/lib/ragnar/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Ragnar
-  VERSION = "0.1.0.pre.4"
+  VERSION = "0.1.0.pre.5"
 end