RubyGems - ragdoll-cli - Versions diffs - 0.1.8 → 0.1.10 - Mend

ragdoll-cli 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +42 -4
data/Rakefile +26 -7
data/lib/ragdoll/cli/commands/analytics.rb +222 -0
data/lib/ragdoll/cli/commands/keywords.rb +317 -0
data/lib/ragdoll/cli/commands/search.rb +133 -10
data/lib/ragdoll/cli/commands/update.rb +1 -1
data/lib/ragdoll/cli/standalone_client.rb +233 -2
data/lib/ragdoll/cli/version.rb +1 -1
data/lib/ragdoll/cli.rb +235 -23
metadata +174 -3

data/lib/ragdoll/cli/commands/search.rb CHANGED Viewed

@@ -9,26 +9,112 @@ module Ragdoll
         client = StandaloneClient.new
         puts "Searching for: #{query}"
-        puts "Options: #{options.to_h}" unless options.to_h.empty?
+        puts "Search type: #{options[:search_type] || 'semantic'}"
+        # Show hybrid search weights if applicable
+        if options[:search_type] == 'hybrid'
+          semantic_w = options[:semantic_weight] || 0.7
+          text_w = options[:text_weight] || 0.3
+          puts "Weights: semantic=#{semantic_w}, text=#{text_w}"
+        end
+        # Show keyword search mode if keywords are provided
+        if options[:keywords]
+          keywords_array = options[:keywords].split(',').map(&:strip)
+          keywords_mode = options[:keywords_all] ? "ALL keywords (AND)" : "ANY keywords (OR)"
+          puts "Keywords: #{keywords_array.join(', ')} [#{keywords_mode}]"
+        end
+        # Show other options, excluding display-related ones
+        relevant_options = options.to_h.except(:keywords, :keywords_all, :search_type, :semantic_weight, :text_weight, :format)
+        puts "Options: #{relevant_options}" unless relevant_options.empty?
         puts
         search_options = {}
         search_options[:limit] = options[:limit] if options[:limit]
+        search_options[:threshold] = options[:threshold] if options[:threshold]
         search_options[:content_type] = options[:content_type] if options[:content_type]
         search_options[:classification] = options[:classification] if options[:classification]
-        search_options[:keywords] = options[:keywords].split(',').map(&:strip) if options[:keywords]
+        if options[:keywords]
+          keywords_array = options[:keywords].split(',').map(&:strip)
+          search_options[:keywords] = keywords_array
+          search_options[:keywords_all] = options[:keywords_all] if options[:keywords_all]
+        end
         search_options[:tags] = options[:tags].split(',').map(&:strip) if options[:tags]
+        # Add search tracking options
+        search_options[:session_id] = options[:session_id] if options[:session_id]
+        search_options[:user_id] = options[:user_id] if options[:user_id]
+        search_options[:track_search] = options[:track_search] if options.respond_to?(:key?) ? options.key?(:track_search) : options.track_search
-        search_response = client.search(query: query, **search_options)
+        # Select search method based on search_type
+        search_response = case options[:search_type]
+                         when 'hybrid'
+                           # Add weight parameters if provided
+                           search_options[:semantic_weight] = options[:semantic_weight] if options[:semantic_weight]
+                           search_options[:text_weight] = options[:text_weight] if options[:text_weight]
+                           client.hybrid_search(query, **search_options)
+                         when 'fulltext'
+                           client.fulltext_search(query, **search_options)
+                         else
+                           # Default to semantic search
+                           client.search(query: query, **search_options)
+                         end
         # Extract the actual results array from the response
         results = search_response[:results] || search_response['results'] || []
         if results.empty?
+          # Get statistics for better feedback
+          statistics = search_response[:statistics] || search_response['statistics']
+          execution_time = search_response[:execution_time_ms] || search_response['execution_time_ms']
           total = search_response[:total_results] || search_response['total_results'] || 0
           puts "No results found for '#{query}'"
-          puts "(Total documents in system: #{total})" if total > 0
-          puts "Try adjusting your search terms or check if documents have been processed."
+          puts
+          if statistics
+            threshold = statistics[:threshold_used] || statistics['threshold_used']
+            highest = statistics[:highest_similarity] || statistics['highest_similarity']
+            lowest = statistics[:lowest_similarity] || statistics['lowest_similarity']
+            average = statistics[:average_similarity] || statistics['average_similarity']
+            above_threshold = statistics[:similarities_above_threshold] || statistics['similarities_above_threshold']
+            total_checked = statistics[:total_embeddings_checked] || statistics['total_embeddings_checked']
+            puts "Search Analysis:"
+            puts "  • Similarity threshold: #{threshold&.round(3) || 'N/A'}"
+            puts "  • Embeddings analyzed: #{total_checked || 0}"
+            if highest && lowest && average
+              puts "  • Similarity range: #{lowest.round(3)} - #{highest.round(3)} (avg: #{average.round(3)})"
+            end
+            puts "  • Results above threshold: #{above_threshold || 0}"
+            puts "  • Search time: #{execution_time || 0}ms"
+            puts
+            # Provide actionable suggestions
+            if highest && threshold
+              if highest < threshold
+                suggested_threshold = (highest * 0.9).round(3)
+                puts "💡 Suggestions:"
+                puts "  • Lower the similarity threshold (highest found: #{highest.round(3)})"
+                puts "  • Try: ragdoll search '#{query}' --threshold=#{suggested_threshold}"
+                if highest < 0.3
+                  puts "  • Your query might not match the document content well"
+                  puts "  • Try different or more specific search terms"
+                  puts "  • Try keyword-based search: ragdoll keywords search KEYWORD"
+                  puts "  • List available keywords: ragdoll keywords list"
+                end
+              elsif above_threshold > 0
+                puts "💡 Note: Found #{above_threshold} similar content above threshold #{threshold}"
+                puts "  This suggests an issue with result processing."
+              end
+            end
+          else
+            puts "(Total documents in system: #{total})" if total > 0
+            puts "Try adjusting your search terms or check if documents have been processed."
+            puts "Alternative: Use keyword-based search: ragdoll keywords search KEYWORD"
+          end
           return
         end
@@ -41,28 +127,65 @@ module Ragdoll
             content = safe_string_value(result, [:content, :text], '')
             puts "#{index + 1}. #{title}"
             puts "   ID: #{result[:document_id] || result[:id]}"
-            puts "   Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
+            # Show appropriate score based on search type
+            if options[:search_type] == 'hybrid'
+              puts "   Combined Score: #{result[:combined_score]&.round(3) || 'N/A'}"
+              if result[:search_types]
+                puts "   Match Types: #{result[:search_types].join(', ')}"
+              end
+            elsif options[:search_type] == 'fulltext'
+              puts "   Text Match: #{result[:fulltext_similarity]&.round(3) || 'N/A'}"
+            else
+              puts "   Similarity: #{result[:similarity]&.round(3) || 'N/A'}"
+            end
             puts "   Content: #{content[0..200]}..."
             puts
           end
         else
           # Table format (default)
-          puts "Found #{results.length} results:"
+          puts "Found #{results.length} results (#{search_response[:search_type] || 'semantic'} search):"
           puts
-          puts 'Rank'.ljust(5) + 'Title'.ljust(30) + 'Similarity'.ljust(12) + 'Content Preview'
+          # Adjust column header based on search type
+          score_header = case options[:search_type]
+                        when 'hybrid'
+                          'Score'.ljust(12)
+                        when 'fulltext'
+                          'Text Match'.ljust(12)
+                        else
+                          'Similarity'.ljust(12)
+                        end
+          puts 'Rank'.ljust(5) + 'Title'.ljust(30) + score_header + 'Content Preview'
           puts '-' * 80
           results.each_with_index do |result, index|
             rank = (index + 1).to_s.ljust(5)
             title = safe_string_value(result, [:title, :document_title], 'Untitled')[0..29].ljust(30)
-            similarity = (result[:similarity]&.round(3) || 'N/A').to_s.ljust(12)
+            # Get appropriate score based on search type
+            score = case options[:search_type]
+                   when 'hybrid'
+                     result[:combined_score] || result[:weighted_score]
+                   when 'fulltext'
+                     result[:fulltext_similarity]
+                   else
+                     result[:similarity]
+                   end
+            score_str = (score&.round(3) || 'N/A').to_s.ljust(12)
             content = safe_string_value(result, [:content, :text], '')[0..50]
             content += '...' if content.length == 50
-            puts "#{rank}#{title}#{similarity}#{content}"
+            puts "#{rank}#{title}#{score_str}#{content}"
           end
           puts
+          if options[:search_type] == 'hybrid' && (options[:semantic_weight] || options[:text_weight])
+            puts "Weights: semantic=#{options[:semantic_weight] || 0.7}, text=#{options[:text_weight] || 0.3}"
+          end
           puts 'Use --format=json for complete results or --format=plain for detailed view'
         end
       end

data/lib/ragdoll/cli/commands/update.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Ragdoll
           return
         end
-        result = client.update_document(id, update_options)
+        result = client.update_document(id, **update_options)
         if result[:success]
           puts "Document ID #{id} updated successfully."

data/lib/ragdoll/cli/standalone_client.rb CHANGED Viewed

@@ -4,6 +4,7 @@ module Ragdoll
   module CLI
     class StandaloneClient
       include DebugMe
       def add_document(path, **options)
         Ragdoll.add_document(path: path, **options)
       end
@@ -34,8 +35,12 @@ module Ragdoll
       end
-      def search(query, **options)
-        Ragdoll.search(query: query, **options)
+      def search(query = nil, **options)
+        if query
+          Ragdoll.search(query: query, **options)
+        else
+          Ragdoll.search(**options)
+        end
       end
@@ -53,6 +58,96 @@ module Ragdoll
         Ragdoll.stats
       end
+      def search_analytics(days: 30)
+        # TODO: This will delegate to Ragdoll core when analytics are implemented
+        if defined?(Ragdoll) && Ragdoll.respond_to?(:search_analytics)
+          Ragdoll.search_analytics(days: days)
+        else
+          # Placeholder response for now
+          {
+            total_searches: 0,
+            unique_queries: 0,
+            avg_results_per_search: 0.0,
+            avg_execution_time: 0.0,
+            search_types: {},
+            searches_with_results: 0,
+            avg_click_through_rate: 0.0
+          }
+        end
+      end
+      def search_history(limit: 20, **options)
+        # TODO: This will delegate to Ragdoll core when analytics are implemented
+        if defined?(Ragdoll) && Ragdoll.respond_to?(:search_history)
+          Ragdoll.search_history(limit: limit, **options)
+        else
+          # Placeholder response for now
+          []
+        end
+      end
+      def trending_queries(limit: 10, days: 7)
+        # TODO: This will delegate to Ragdoll core when analytics are implemented
+        if defined?(Ragdoll) && Ragdoll.respond_to?(:trending_queries)
+          Ragdoll.trending_queries(limit: limit, days: days)
+        else
+          # Placeholder response for now
+          []
+        end
+      end
+      def cleanup_searches(**options)
+        # TODO: This will delegate to Ragdoll core when analytics are implemented
+        if defined?(Ragdoll) && Ragdoll.respond_to?(:cleanup_searches)
+          Ragdoll.cleanup_searches(**options)
+        else
+          # Placeholder response for now
+          { orphaned_count: 0, unused_count: 0 }
+        end
+      end
+      def hybrid_search(query, **options)
+        # Properly delegate to Ragdoll core's hybrid_search
+        Ragdoll.hybrid_search(query: query, **options)
+      end
+      def fulltext_search(query, **options)
+        # Perform full-text search using Document.search_content
+        limit = options[:limit] || 20
+        threshold = options[:threshold] || 0.0
+        # Get full-text search results
+        documents = Ragdoll::Document.search_content(query, **options)
+        # Format results to match expected structure
+        results = documents.map do |doc|
+          {
+            document_id: doc.id.to_s,
+            document_title: doc.title,
+            document_location: doc.location,
+            content: doc.content[0..500], # Preview
+            fulltext_similarity: doc.respond_to?(:fulltext_similarity) ? doc.fulltext_similarity : nil,
+            document_type: doc.document_type,
+            status: doc.status
+          }
+        end
+        {
+          query: query,
+          search_type: 'fulltext',
+          results: results,
+          total_results: results.length,
+          threshold_used: threshold
+        }
+      rescue StandardError => e
+        {
+          query: query,
+          search_type: 'fulltext',
+          results: [],
+          total_results: 0,
+          error: "Full-text search failed: #{e.message}"
+        }
+      end
       def healthy?
         Ragdoll.healthy?
@@ -63,6 +158,142 @@ module Ragdoll
         Ragdoll.configuration
       end
+      # Keywords-specific search methods
+      def search_by_keywords(keywords, **options)
+        if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords)
+          Ragdoll::Document.search_by_keywords(keywords, **options).map(&:to_hash)
+        else
+          # Fallback to regular search with keywords filter
+          search(keywords: keywords, **options)
+        end
+      end
+      def search_by_keywords_all(keywords, **options)
+        if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:search_by_keywords_all)
+          Ragdoll::Document.search_by_keywords_all(keywords, **options).map(&:to_hash)
+        else
+          # Fallback to regular search with keywords filter
+          search(keywords: keywords, **options)
+        end
+      end
+      def keyword_frequencies(limit: 100, min_count: 1)
+        if defined?(Ragdoll::Document) && Ragdoll::Document.respond_to?(:keyword_frequencies)
+          frequencies = Ragdoll::Document.keyword_frequencies
+          # Filter by min_count and limit
+          filtered = frequencies.select { |_keyword, count| count >= min_count }
+          filtered.first(limit).to_h
+        else
+          {}
+        end
+      end
+      def add_keywords_to_document(document_id, keywords)
+        begin
+          document = Ragdoll::Document.find(document_id)
+          Array(keywords).each { |keyword| document.add_keyword(keyword) }
+          document.save!
+          {
+            success: true,
+            keywords: document.keywords_array
+          }
+        rescue StandardError => e
+          {
+            success: false,
+            message: e.message
+          }
+        end
+      end
+      def remove_keywords_from_document(document_id, keywords)
+        begin
+          document = Ragdoll::Document.find(document_id)
+          Array(keywords).each { |keyword| document.remove_keyword(keyword) }
+          document.save!
+          {
+            success: true,
+            keywords: document.keywords_array
+          }
+        rescue StandardError => e
+          {
+            success: false,
+            message: e.message
+          }
+        end
+      end
+      def set_document_keywords(document_id, keywords)
+        begin
+          document = Ragdoll::Document.find(document_id)
+          document.keywords = Array(keywords)
+          document.save!
+          {
+            success: true,
+            keywords: document.keywords_array
+          }
+        rescue StandardError => e
+          {
+            success: false,
+            message: e.message
+          }
+        end
+      end
+      def keyword_statistics
+        begin
+          total_keywords = 0
+          documents_with_keywords = 0
+          total_keyword_count = 0
+          keyword_frequencies = {}
+          if defined?(Ragdoll::Document)
+            documents_with_keywords = Ragdoll::Document.where.not(keywords: []).count
+            Ragdoll::Document.where.not(keywords: []).find_each do |doc|
+              doc_keywords = doc.keywords_array
+              total_keyword_count += doc_keywords.length
+              doc_keywords.each do |keyword|
+                keyword_frequencies[keyword] = (keyword_frequencies[keyword] || 0) + 1
+              end
+            end
+            total_keywords = keyword_frequencies.keys.length
+            avg_keywords_per_document = documents_with_keywords > 0 ? (total_keyword_count.to_f / documents_with_keywords) : 0
+            # Top 10 most common keywords
+            top_keywords = keyword_frequencies.sort_by { |_k, v| -v }.first(10)
+            # Count singleton keywords (used by only 1 document)
+            singleton_keywords = keyword_frequencies.count { |_k, v| v == 1 }
+            {
+              total_keywords: total_keywords,
+              documents_with_keywords: documents_with_keywords,
+              avg_keywords_per_document: avg_keywords_per_document,
+              top_keywords: top_keywords,
+              singleton_keywords: singleton_keywords
+            }
+          else
+            {
+              total_keywords: 0,
+              documents_with_keywords: 0,
+              avg_keywords_per_document: 0,
+              top_keywords: [],
+              singleton_keywords: 0
+            }
+          end
+        rescue StandardError => e
+          {
+            total_keywords: 0,
+            documents_with_keywords: 0,
+            avg_keywords_per_document: 0,
+            top_keywords: [],
+            singleton_keywords: 0,
+            error: e.message
+          }
+        end
+      end
     end
   end

data/lib/ragdoll/cli/version.rb CHANGED Viewed

@@ -3,6 +3,6 @@
 module Ragdoll
   module CLI
-    VERSION = "0.1.8"
+    VERSION = "0.1.10"
   end
 end