RubyGems - vectra-client - Versions diffs - 0.3.4 → 0.4.0 - Mend

vectra-client 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +17 -4
data/README.md +35 -2
data/docs/examples/real-world.md +62 -22
data/docs/guides/getting-started.md +70 -2
data/docs/providers/index.md +12 -0
data/docs/providers/memory.md +145 -0
data/docs/providers/weaviate.md +84 -25
data/examples/README.md +12 -0
data/lib/vectra/batch.rb +63 -8
data/lib/vectra/client.rb +188 -1
data/lib/vectra/configuration.rb +4 -2
data/lib/vectra/credential_rotation.rb +2 -3
data/lib/vectra/providers/base.rb +19 -1
data/lib/vectra/providers/memory.rb +298 -0
data/lib/vectra/providers/qdrant.rb +31 -0
data/lib/vectra/vector.rb +56 -0
data/lib/vectra/version.rb +1 -1
data/lib/vectra.rb +20 -0
metadata +3 -1

data/lib/vectra/batch.rb CHANGED Viewed

@@ -17,6 +17,17 @@ module Vectra
   #   )
   #   puts "Upserted: #{result[:upserted_count]}"
   #
+  # @example With progress tracking
+  #   batch.upsert_async(
+  #     index: 'docs',
+  #     vectors: large_array,
+  #     on_progress: ->(stats) {
+  #       puts "Progress: #{stats[:percentage]}% (#{stats[:processed]}/#{stats[:total]})"
+  #       puts "  Chunk #{stats[:current_chunk] + 1}/#{stats[:total_chunks]}"
+  #       puts "  Success: #{stats[:success_count]}, Failed: #{stats[:failed_count]}"
+  #     }
+  #   )
+  #
   class Batch
     DEFAULT_CONCURRENCY = 4
     DEFAULT_CHUNK_SIZE = 100
@@ -38,12 +49,23 @@ module Vectra
     # @param vectors [Array<Hash>] vectors to upsert
     # @param namespace [String, nil] optional namespace
     # @param chunk_size [Integer] vectors per chunk (default: 100)
+    # @param on_progress [Proc, nil] optional callback called after each chunk completes
+    #   Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
     # @return [Hash] aggregated result with :upserted_count, :chunks, :errors
-    def upsert_async(index:, vectors:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
+    #
+    # @example With progress callback
+    #   batch.upsert_async(
+    #     index: 'docs',
+    #     vectors: large_array,
+    #     on_progress: ->(stats) {
+    #       puts "Progress: #{stats[:percentage]}% (#{stats[:processed]}/#{stats[:total]})"
+    #     }
+    #   )
+    def upsert_async(index:, vectors:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
       chunks = vectors.each_slice(chunk_size).to_a
       return { upserted_count: 0, chunks: 0, errors: [] } if chunks.empty?
-      results = process_chunks_concurrently(chunks) do |chunk|
+      results = process_chunks_concurrently(chunks, total_items: vectors.size, on_progress: on_progress) do |chunk|
         client.upsert(index: index, vectors: chunk, namespace: namespace)
       end
@@ -56,12 +78,14 @@ module Vectra
     # @param ids [Array<String>] IDs to delete
     # @param namespace [String, nil] optional namespace
     # @param chunk_size [Integer] IDs per chunk (default: 100)
+    # @param on_progress [Proc, nil] optional callback called after each chunk completes
+    #   Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
     # @return [Hash] aggregated result
-    def delete_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
+    def delete_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
       chunks = ids.each_slice(chunk_size).to_a
       return { deleted_count: 0, chunks: 0, errors: [] } if chunks.empty?
-      results = process_chunks_concurrently(chunks) do |chunk|
+      results = process_chunks_concurrently(chunks, total_items: ids.size, on_progress: on_progress) do |chunk|
         client.delete(index: index, ids: chunk, namespace: namespace)
       end
@@ -74,12 +98,14 @@ module Vectra
     # @param ids [Array<String>] IDs to fetch
     # @param namespace [String, nil] optional namespace
     # @param chunk_size [Integer] IDs per chunk (default: 100)
+    # @param on_progress [Proc, nil] optional callback called after each chunk completes
+    #   Callback receives hash with: processed, total, percentage, current_chunk, total_chunks, success_count, failed_count
     # @return [Hash<String, Vector>] merged results
-    def fetch_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE)
+    def fetch_async(index:, ids:, namespace: nil, chunk_size: DEFAULT_CHUNK_SIZE, on_progress: nil)
       chunks = ids.each_slice(chunk_size).to_a
       return {} if chunks.empty?
-      results = process_chunks_concurrently(chunks) do |chunk|
+      results = process_chunks_concurrently(chunks, total_items: ids.size, on_progress: on_progress) do |chunk|
         client.fetch(index: index, ids: chunk, namespace: namespace)
       end
@@ -88,15 +114,43 @@ module Vectra
     private
-    def process_chunks_concurrently(chunks)
+    # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
+    def process_chunks_concurrently(chunks, total_items: nil, on_progress: nil)
       pool = Concurrent::FixedThreadPool.new(concurrency)
       futures = []
+      progress_mutex = Mutex.new
+      completed_count = Concurrent::AtomicFixnum.new(0)
+      success_count = Concurrent::AtomicFixnum.new(0)
+      failed_count = Concurrent::AtomicFixnum.new(0)
       chunks.each_with_index do |chunk, index|
         futures << Concurrent::Future.execute(executor: pool) do
-          { index: index, result: yield(chunk), error: nil }
+          result = yield(chunk)
+          success_count.increment
+          { index: index, result: result, error: nil }
         rescue StandardError => e
+          failed_count.increment
           { index: index, result: nil, error: e }
+        ensure
+          # Call progress callback when chunk completes
+          if on_progress
+            completed = completed_count.increment
+            total_size = chunks.size * chunks.first.size
+            processed = [completed * chunks.first.size, total_items || total_size].min
+            percentage = total_items ? (processed.to_f / total_items * 100).round(2) : (completed.to_f / chunks.size * 100).round(2)
+            progress_mutex.synchronize do
+              on_progress.call(
+                processed: processed,
+                total: total_items || total_size,
+                percentage: percentage,
+                current_chunk: completed - 1,
+                total_chunks: chunks.size,
+                success_count: success_count.value,
+                failed_count: failed_count.value
+              )
+            end
+          end
         end
       end
@@ -107,6 +161,7 @@ module Vectra
       results.sort_by { |r| r[:index] }
     end
+    # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
     def aggregate_results(results, total_vectors)
       errors = results.select { |r| r[:error] }.map { |r| r[:error] }

data/lib/vectra/client.rb CHANGED Viewed

@@ -98,8 +98,26 @@ module Vectra
     #     filter: { category: 'programming' }
     #   )
     #
-    def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
+    # @example Chainable query builder
+    #   results = client.query("my-index")
+    #     .vector([0.1, 0.2, 0.3])
+    #     .top_k(10)
+    #     .filter(category: "programming")
+    #     .with_metadata
+    #     .execute
+    #
+    def query(index_arg = nil, index: nil, vector: nil, top_k: 10, namespace: nil, filter: nil,
               include_values: false, include_metadata: true)
+      # If called with a positional index string only, return a query builder:
+      #   client.query("docs").vector(vec).top_k(10).filter(...).execute
+      if index_arg && index.nil? && vector.nil? && !block_given?
+        return QueryBuilder.new(self, index_arg)
+      end
+      # Handle positional argument for index in non-builder case
+      index = index_arg if index_arg && index.nil?
+      # Backwards-compatible path: perform query immediately
       validate_index!(index)
       validate_query_vector!(vector)
@@ -276,6 +294,137 @@ module Vectra
       provider.provider_name
     end
+    # Quick health check - tests if provider connection is healthy
+    #
+    # @param timeout [Float] timeout in seconds (default: 5)
+    # @return [Boolean] true if connection is healthy
+    #
+    # @example
+    #   if client.healthy?
+    #     client.upsert(...)
+    #   else
+    #     handle_unhealthy_connection
+    #   end
+    def healthy?
+      start = Time.now
+      provider.list_indexes
+      true
+    rescue StandardError => e
+      log_error("Health check failed", e)
+      false
+    ensure
+      duration = ((Time.now - start) * 1000).round(2) if defined?(start)
+      log_debug("Health check completed in #{duration}ms") if duration
+    end
+    # Ping provider and get connection health status with latency
+    #
+    # @param timeout [Float] timeout in seconds (default: 5)
+    # @return [Hash] health status with :healthy, :provider, :latency_ms
+    #
+    # @example
+    #   status = client.ping
+    #   puts "Provider: #{status[:provider]}, Healthy: #{status[:healthy]}, Latency: #{status[:latency_ms]}ms"
+    def ping
+      start = Time.now
+      healthy = true
+      error_info = nil
+      begin
+        provider.list_indexes
+      rescue StandardError => e
+        healthy = false
+        error_info = { error: e.class.name, error_message: e.message }
+        log_error("Health check failed", e)
+      end
+      duration = ((Time.now - start) * 1000).round(2)
+      result = {
+        healthy: healthy,
+        provider: provider_name,
+        latency_ms: duration
+      }
+      result.merge!(error_info) if error_info
+      result
+    end
+    # Chainable query builder
+    #
+    # @api public
+    # @example
+    #   results = client.query("docs")
+    #     .vector(embedding)
+    #     .top_k(20)
+    #     .namespace("prod")
+    #     .filter(category: "ruby")
+    #     .with_metadata
+    #     .execute
+    #
+    class QueryBuilder
+      def initialize(client, index)
+        @client = client
+        @index = index
+        @vector = nil
+        @top_k = 10
+        @namespace = nil
+        @filter = nil
+        @include_values = false
+        @include_metadata = true
+      end
+      attr_reader :index
+      def vector(value)
+        @vector = value
+        self
+      end
+      def top_k(value)
+        @top_k = value.to_i
+        self
+      end
+      def namespace(value)
+        @namespace = value
+        self
+      end
+      def filter(value = nil, **kwargs)
+        @filter = value || kwargs
+        self
+      end
+      def with_values
+        @include_values = true
+        self
+      end
+      def with_metadata
+        @include_metadata = true
+        self
+      end
+      def without_metadata
+        @include_metadata = false
+        self
+      end
+      # Execute the built query and return a QueryResult
+      def execute
+        @client.query(
+          index: @index,
+          vector: @vector,
+          top_k: @top_k,
+          namespace: @namespace,
+          filter: @filter,
+          include_values: @include_values,
+          include_metadata: @include_metadata
+        )
+      end
+    end
     private
     def build_config(provider_name, api_key, environment, host, options)
@@ -306,6 +455,8 @@ module Vectra
         Providers::Weaviate.new(config)
       when :pgvector
         Providers::Pgvector.new(config)
+      when :memory
+        Providers::Memory.new(config)
       else
         raise UnsupportedProviderError, "Provider '#{config.provider}' is not supported"
       end
@@ -317,11 +468,32 @@ module Vectra
       raise ValidationError, "Index name cannot be empty" if index.empty?
     end
+    # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
     def validate_vectors!(vectors)
       raise ValidationError, "Vectors cannot be nil" if vectors.nil?
       raise ValidationError, "Vectors must be an array" unless vectors.is_a?(Array)
       raise ValidationError, "Vectors cannot be empty" if vectors.empty?
+      # Check dimension consistency
+      first_vector = vectors.first
+      first_values = first_vector.is_a?(Vector) ? first_vector.values : first_vector[:values]
+      first_dim = first_values&.size
+      return unless first_dim
+      vectors.each_with_index do |vec, index|
+        values = vec.is_a?(Vector) ? vec.values : vec[:values]
+        dim = values&.size
+        next unless dim && dim != first_dim
+        raise ValidationError,
+              "Inconsistent vector dimensions at index #{index}: " \
+              "expected #{first_dim}, got #{dim}. " \
+              "All vectors in a batch must have the same dimension."
+      end
     end
+    # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
     def validate_query_vector!(vector)
       raise ValidationError, "Query vector cannot be nil" if vector.nil?
@@ -340,5 +512,20 @@ module Vectra
       raise ValidationError, "ID must be a string" unless id.is_a?(String)
       raise ValidationError, "ID cannot be empty" if id.empty?
     end
+    def log_error(message, error = nil)
+      return unless config.logger
+      config.logger.error("[Vectra] #{message}")
+      config.logger.error("[Vectra] #{error.class}: #{error.message}") if error
+      config.logger.error("[Vectra] #{error.backtrace&.first(3)&.join("\n")}") if error&.backtrace
+    end
+    def log_debug(message, data = nil)
+      return unless config.logger
+      config.logger.debug("[Vectra] #{message}")
+      config.logger.debug("[Vectra] #{data.inspect}") if data
+    end
   end
 end

data/lib/vectra/configuration.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module Vectra
   #   end
   #
   class Configuration
-    SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector].freeze
+    SUPPORTED_PROVIDERS = %i[pinecone qdrant weaviate pgvector memory].freeze
     attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
                   :max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
@@ -117,7 +117,7 @@ module Vectra
     # Providers that don't require API key (local instances)
     def api_key_optional_provider?
-      %i[qdrant pgvector].include?(provider)
+      %i[qdrant pgvector memory].include?(provider)
     end
     def validate_provider_specific!
@@ -130,6 +130,8 @@ module Vectra
         validate_weaviate!
       when :pgvector
         validate_pgvector!
+      when :memory
+        # Memory provider has no special requirements
       end
     end

data/lib/vectra/credential_rotation.rb CHANGED Viewed

@@ -37,13 +37,12 @@ module Vectra
     # Test if secondary key is valid
     #
-    # @param timeout [Float] Test timeout in seconds
     # @return [Boolean] true if secondary key works
-    def test_secondary(timeout: 5)
+    def test_secondary
       return false if secondary_key.nil? || secondary_key.empty?
       client = build_test_client(secondary_key)
-      client.healthy?(timeout: timeout)
+      client.healthy?
     rescue StandardError
       false
     end

data/lib/vectra/providers/base.rb CHANGED Viewed

@@ -232,16 +232,34 @@ module Vectra
       #
       # @param body [Hash, String, nil] response body
       # @return [String]
+      # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
       def extract_error_message(body)
         case body
         when Hash
-          body["message"] || body["error"] || body.to_s
+          # Primary error message
+          msg = body["message"] || body["error"] || body["error_message"] || body.to_s
+          # Add context from details
+          details = body["details"] || body["error_details"] || body["detail"]
+          if details
+            details_str = details.is_a?(Hash) ? details.to_json : details.to_s
+            msg += " (#{details_str})" unless msg.include?(details_str)
+          end
+          # Add field-specific errors if available
+          if body["errors"].is_a?(Array)
+            field_errors = body["errors"].map { |e| e.is_a?(Hash) ? e["field"] || e["message"] : e }.join(", ")
+            msg += " [Fields: #{field_errors}]" if field_errors && !msg.include?(field_errors)
+          end
+          msg
         when String
           body
         else
           "Unknown error"
         end
       end
+      # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
       # Log debug information
       #