RubyGems - vectra-client - Versions diffs - 1.0.7 → 1.1.0 - Mend

vectra-client 1.0.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +53 -0
data/docs/_layouts/home.html +1 -1
data/docs/api/cheatsheet.md +43 -0
data/docs/api/methods.md +30 -6
data/docs/api/overview.md +6 -0
data/docs/guides/middleware.md +324 -0
data/examples/middleware_demo.rb +103 -0
data/lib/vectra/active_record.rb +52 -1
data/lib/vectra/cache.rb +49 -0
data/lib/vectra/client.rb +148 -28
data/lib/vectra/health_check.rb +4 -2
data/lib/vectra/middleware/base.rb +97 -0
data/lib/vectra/middleware/cost_tracker.rb +121 -0
data/lib/vectra/middleware/instrumentation.rb +44 -0
data/lib/vectra/middleware/logging.rb +62 -0
data/lib/vectra/middleware/pii_redaction.rb +65 -0
data/lib/vectra/middleware/request.rb +62 -0
data/lib/vectra/middleware/response.rb +65 -0
data/lib/vectra/middleware/retry.rb +103 -0
data/lib/vectra/middleware/stack.rb +74 -0
data/lib/vectra/version.rb +1 -1
data/lib/vectra.rb +9 -0
metadata +12 -1

data/lib/vectra/active_record.rb CHANGED Viewed

@@ -2,8 +2,9 @@
 require "active_support/concern"
-# Ensure Client and Providers are loaded (for Rails autoloading compatibility)
+# Ensure Client and supporting classes are loaded (for Rails autoloading compatibility)
 require_relative "client" unless defined?(Vectra::Client)
+require_relative "batch" unless defined?(Vectra::Batch)
 module Vectra
   # ActiveRecord integration for vector embeddings
@@ -26,6 +27,7 @@ module Vectra
   #   # Search similar documents
   #   results = Document.vector_search([0.1, 0.2, ...], limit: 10)
   #
+  # rubocop:disable Metrics/ModuleLength
   module ActiveRecord
     extend ActiveSupport::Concern
@@ -86,6 +88,54 @@ module Vectra
         end
       end
+      # Reindex all vectors for this model using current configuration.
+      #
+      # @param scope [ActiveRecord::Relation] records to reindex (default: all)
+      # @param batch_size [Integer] number of records per batch
+      # @param on_progress [Proc, nil] optional callback called after each batch
+      #   Receives a hash with :processed and :total keys (and any other stats from Batch)
+      #
+      # @return [Integer] number of records processed
+      def reindex_vectors(scope: all, batch_size: 1_000, on_progress: nil)
+        config = _vectra_config
+        client = vectra_client
+        batch = Vectra::Batch.new(client)
+        processed = 0
+        scope.in_batches(of: batch_size).each do |relation|
+          records = relation.to_a
+          vectors = records.map do |record|
+            vector = record.send(config[:attribute])
+            next if vector.nil?
+            metadata = config[:metadata_fields].each_with_object({}) do |field, hash|
+              hash[field.to_s] = record.send(field) if record.respond_to?(field)
+            end
+            {
+              id: "#{config[:index]}_#{record.id}",
+              values: vector,
+              metadata: metadata
+            }
+          end.compact
+          next if vectors.empty?
+          batch.upsert_async(
+            index: config[:index],
+            vectors: vectors,
+            namespace: nil,
+            on_progress: on_progress
+          )
+          processed += vectors.size
+        end
+        processed
+      end
       # Search vectors
       #
       # @api private
@@ -195,4 +245,5 @@ module Vectra
       "#{self.class._vectra_config[:index]}_#{id}"
     end
   end
+  # rubocop:enable Metrics/ModuleLength
 end

data/lib/vectra/cache.rb CHANGED Viewed

@@ -258,4 +258,53 @@ module Vectra
       "#{index}:f:#{id}:#{namespace || 'default'}"
     end
   end
+  # Helper for caching embeddings based on model, record ID and input text.
+  #
+  # @example
+  #   cache = Vectra::Cache.new(ttl: 600, max_size: 1000)
+  #
+  #   embedding = Vectra::Embeddings.fetch(
+  #     cache: cache,
+  #     model_name: "Product",
+  #     id: product.id,
+  #     input: product.description,
+  #     field: :description
+  #   ) do
+  #     EmbeddingService.generate(product.description)
+  #   end
+  #
+  module Embeddings
+    module_function
+    # Build a stable cache key for an embedding.
+    #
+    # @param model_name [String] model class name (e.g. "Product")
+    # @param id [Integer, String] record ID
+    # @param input [String] raw input used for embedding
+    # @param field [Symbol, String, nil] optional field name
+    #
+    # @return [String] cache key
+    def cache_key(model_name:, id:, input:, field: nil)
+      field_part = field ? field.to_s : "default"
+      base = "#{model_name}:#{field_part}:#{id}:#{input}"
+      digest = Digest::SHA256.hexdigest(base)[0, 32]
+      "emb:#{model_name}:#{field_part}:#{digest}"
+    end
+    # Fetch an embedding from cache or compute and store it.
+    #
+    # @param cache [Vectra::Cache] cache instance
+    # @param model_name [String] model class name
+    # @param id [Integer, String] record ID
+    # @param input [String] input used for embedding
+    # @param field [Symbol, String, nil] optional field name
+    #
+    # @yield block that computes the embedding when not cached
+    # @return [Object] cached or computed embedding
+    def fetch(cache:, model_name:, id:, input:, field: nil, &block)
+      key = cache_key(model_name: model_name, id: id, input: input, field: field)
+      cache.fetch(key, &block)
+    end
+  end
 end

data/lib/vectra/client.rb CHANGED Viewed

@@ -40,7 +40,38 @@ module Vectra
   class Client
     include Vectra::HealthCheck
-    attr_reader :config, :provider
+    attr_reader :config, :provider, :default_index, :default_namespace
+    class << self
+      # Get the global middleware stack
+      #
+      # @return [Array<Array>] Array of [middleware_class, options] pairs
+      def middleware
+        @middleware ||= []
+      end
+      # Add middleware to the global stack
+      #
+      # @param middleware_class [Class] Middleware class
+      # @param options [Hash] Options to pass to middleware constructor
+      #
+      # @example Add global logging middleware
+      #   Vectra::Client.use Vectra::Middleware::Logging
+      #
+      # @example Add middleware with options
+      #   Vectra::Client.use Vectra::Middleware::Retry, max_attempts: 5
+      #
+      def use(middleware_class, **options)
+        middleware << [middleware_class, options]
+      end
+      # Clear all global middleware
+      #
+      # @return [void]
+      def clear_middleware!
+        @middleware = []
+      end
+    end
     # Initialize a new Client
     #
@@ -49,17 +80,23 @@ module Vectra
     # @param environment [String, nil] environment/region
     # @param host [String, nil] custom host URL
     # @param options [Hash] additional options
+    # @option options [String] :index default index name
+    # @option options [String] :namespace default namespace
+    # @option options [Array<Class, Object>] :middleware instance-level middleware
     def initialize(provider: nil, api_key: nil, environment: nil, host: nil, **options)
       @config = build_config(provider, api_key, environment, host, options)
       @config.validate!
       @provider = build_provider
+      @default_index = options[:index]
+      @default_namespace = options[:namespace]
+      @middleware = build_middleware_stack(options[:middleware])
     end
     # Upsert vectors into an index
     #
-    # @param index [String] the index/collection name
     # @param vectors [Array<Hash, Vector>] vectors to upsert
-    # @param namespace [String, nil] optional namespace (provider-specific)
+    # @param index [String, nil] the index/collection name (falls back to client's default)
+    # @param namespace [String, nil] optional namespace (provider-specific, falls back to client's default)
     # @return [Hash] upsert response with :upserted_count
     #
     # @example Upsert vectors
@@ -71,7 +108,9 @@ module Vectra
     #     ]
     #   )
     #
-    def upsert(index:, vectors:, namespace: nil)
+    def upsert(vectors:, index: nil, namespace: nil)
+      index ||= default_index
+      namespace ||= default_namespace
       validate_index!(index)
       validate_vectors!(vectors)
@@ -81,7 +120,7 @@ module Vectra
         index: index,
         metadata: { vector_count: vectors.size }
       ) do
-        provider.upsert(index: index, vectors: vectors, namespace: namespace)
+        @middleware.call(:upsert, index: index, vectors: vectors, namespace: namespace, provider: provider_name)
       end
     end
@@ -130,6 +169,10 @@ module Vectra
       # Handle positional argument for index in non-builder case
       index = index_arg if index_arg && index.nil?
+      # Fall back to default index/namespace when not provided
+      index ||= default_index
+      namespace ||= default_namespace
       # Backwards-compatible path: perform query immediately
       validate_index!(index)
       validate_query_vector!(vector)
@@ -141,14 +184,16 @@ module Vectra
         index: index,
         metadata: { top_k: top_k }
       ) do
-        result = provider.query(
+        result = @middleware.call(
+          :query,
           index: index,
           vector: vector,
           top_k: top_k,
           namespace: namespace,
           filter: filter,
           include_values: include_values,
-          include_metadata: include_metadata
+          include_metadata: include_metadata,
+          provider: provider_name
         )
       end
@@ -157,16 +202,18 @@ module Vectra
     # Fetch vectors by IDs
     #
-    # @param index [String] the index/collection name
     # @param ids [Array<String>] vector IDs to fetch
-    # @param namespace [String, nil] optional namespace
+    # @param index [String, nil] the index/collection name (falls back to client's default)
+    # @param namespace [String, nil] optional namespace (falls back to client's default)
     # @return [Hash<String, Vector>] hash of ID to Vector
     #
     # @example Fetch vectors
     #   vectors = client.fetch(index: 'my-index', ids: ['vec1', 'vec2'])
     #   vectors['vec1'].values # => [0.1, 0.2, 0.3]
     #
-    def fetch(index:, ids:, namespace: nil)
+    def fetch(ids:, index: nil, namespace: nil)
+      index ||= default_index
+      namespace ||= default_namespace
       validate_index!(index)
       validate_ids!(ids)
@@ -176,14 +223,14 @@ module Vectra
         index: index,
         metadata: { id_count: ids.size }
       ) do
-        provider.fetch(index: index, ids: ids, namespace: namespace)
+        @middleware.call(:fetch, index: index, ids: ids, namespace: namespace, provider: provider_name)
       end
     end
     # Update a vector's metadata or values
     #
-    # @param index [String] the index/collection name
     # @param id [String] vector ID
+    # @param index [String, nil] the index/collection name (falls back to client's default)
     # @param metadata [Hash, nil] new metadata (merged with existing)
     # @param values [Array<Float>, nil] new vector values
     # @param namespace [String, nil] optional namespace
@@ -196,7 +243,9 @@ module Vectra
     #     metadata: { category: 'updated' }
     #   )
     #
-    def update(index:, id:, metadata: nil, values: nil, namespace: nil)
+    def update(id:, index: nil, metadata: nil, values: nil, namespace: nil)
+      index ||= default_index
+      namespace ||= default_namespace
       validate_index!(index)
       validate_id!(id)
@@ -208,12 +257,14 @@ module Vectra
         index: index,
         metadata: { has_metadata: !metadata.nil?, has_values: !values.nil? }
       ) do
-        provider.update(
+        @middleware.call(
+          :update,
           index: index,
           id: id,
           metadata: metadata,
           values: values,
-          namespace: namespace
+          namespace: namespace,
+          provider: provider_name
         )
       end
     end
@@ -236,7 +287,9 @@ module Vectra
     # @example Delete all
     #   client.delete(index: 'my-index', delete_all: true)
     #
-    def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
+    def delete(index: nil, ids: nil, namespace: nil, filter: nil, delete_all: false)
+      index ||= default_index
+      namespace ||= default_namespace
       validate_index!(index)
       if ids.nil? && filter.nil? && !delete_all
@@ -249,12 +302,14 @@ module Vectra
         index: index,
         metadata: { id_count: ids&.size, delete_all: delete_all, has_filter: !filter.nil? }
       ) do
-        provider.delete(
+        @middleware.call(
+          :delete,
           index: index,
           ids: ids,
           namespace: namespace,
           filter: filter,
-          delete_all: delete_all
+          delete_all: delete_all,
+          provider: provider_name
         )
       end
     end
@@ -268,7 +323,7 @@ module Vectra
     #   indexes.each { |idx| puts idx[:name] }
     #
     def list_indexes
-      provider.list_indexes
+      @middleware.call(:list_indexes, provider: provider_name)
     end
     # Describe an index
@@ -280,9 +335,10 @@ module Vectra
     #   info = client.describe_index(index: 'my-index')
     #   puts info[:dimension]
     #
-    def describe_index(index:)
+    def describe_index(index: nil)
+      index ||= default_index
       validate_index!(index)
-      provider.describe_index(index: index)
+      @middleware.call(:describe_index, index: index, provider: provider_name)
     end
     # Get index statistics
@@ -295,9 +351,11 @@ module Vectra
     #   stats = client.stats(index: 'my-index')
     #   puts "Total vectors: #{stats[:total_vector_count]}"
     #
-    def stats(index:, namespace: nil)
+    def stats(index: nil, namespace: nil)
+      index ||= default_index
+      namespace ||= default_namespace
       validate_index!(index)
-      provider.stats(index: index, namespace: namespace)
+      @middleware.call(:stats, index: index, namespace: namespace, provider: provider_name)
     end
     # Create a new index
@@ -323,7 +381,7 @@ module Vectra
         index: name,
         metadata: { dimension: dimension, metric: metric }
       ) do
-        provider.create_index(name: name, dimension: dimension, metric: metric, **options)
+        @middleware.call(:create_index, name: name, dimension: dimension, metric: metric, provider: provider_name, **options)
       end
     end
@@ -346,7 +404,7 @@ module Vectra
         provider: provider_name,
         index: name
       ) do
-        provider.delete_index(name: name)
+        @middleware.call(:delete_index, name: name, provider: provider_name)
       end
     end
@@ -359,7 +417,8 @@ module Vectra
     #   namespaces = client.list_namespaces(index: 'documents')
     #   namespaces.each { |ns| puts "Namespace: #{ns}" }
     #
-    def list_namespaces(index:)
+    def list_namespaces(index: nil)
+      index ||= default_index
       validate_index!(index)
       stats_data = provider.stats(index: index)
       namespaces = stats_data[:namespaces] || {}
@@ -408,6 +467,8 @@ module Vectra
     #
     def hybrid_search(index:, vector:, text:, alpha: 0.5, top_k: 10, namespace: nil,
                       filter: nil, include_values: false, include_metadata: true)
+      index ||= default_index
+      namespace ||= default_namespace
       validate_index!(index)
       validate_query_vector!(vector)
       raise ValidationError, "Text query cannot be nil or empty" if text.nil? || text.empty?
@@ -418,7 +479,8 @@ module Vectra
               "Hybrid search is not supported by #{provider_name} provider"
       end
-      provider.hybrid_search(
+      @middleware.call(
+        :hybrid_search,
         index: index,
         vector: vector,
         text: text,
@@ -427,7 +489,8 @@ module Vectra
         namespace: namespace,
         filter: filter,
         include_values: include_values,
-        include_metadata: include_metadata
+        include_metadata: include_metadata,
+        provider: provider_name
       )
     end
@@ -606,6 +669,21 @@ module Vectra
       end
     end
+    def build_middleware_stack(instance_middleware = nil)
+      # Combine class-level + instance-level middleware
+      all_middleware = self.class.middleware.map do |klass, opts|
+        klass.new(**opts)
+      end
+      if instance_middleware
+        all_middleware += Array(instance_middleware).map do |mw|
+          mw.is_a?(Class) ? mw.new : mw
+        end
+      end
+      Middleware::Stack.new(@provider, all_middleware)
+    end
     def validate_index!(index)
       raise ValidationError, "Index name cannot be nil" if index.nil?
       raise ValidationError, "Index name must be a string" unless index.is_a?(String)
@@ -671,6 +749,48 @@ module Vectra
       config.logger.debug("[Vectra] #{message}")
       config.logger.debug("[Vectra] #{data.inspect}") if data
     end
+    # Temporarily override default index within a block.
+    #
+    # @param index [String] temporary index name
+    # @yield [Client] yields self with overridden index
+    # @return [Object] block result
+    def with_index(index)
+      previous = @default_index
+      @default_index = index
+      yield self
+    ensure
+      @default_index = previous
+    end
+    # Temporarily override default namespace within a block.
+    #
+    # @param namespace [String] temporary namespace
+    # @yield [Client] yields self with overridden namespace
+    # @return [Object] block result
+    def with_namespace(namespace)
+      previous = @default_namespace
+      @default_namespace = namespace
+      yield self
+    ensure
+      @default_namespace = previous
+    end
+    # Temporarily override both index and namespace within a block.
+    #
+    # @param index [String] temporary index name
+    # @param namespace [String] temporary namespace
+    # @yield [Client] yields self with overridden index and namespace
+    # @return [Object] block result
+    def with_index_and_namespace(index, namespace)
+      with_index(index) do
+        with_namespace(namespace) do
+          yield self
+        end
+      end
+    end
+    public :with_index, :with_namespace, :with_index_and_namespace
   end
   # rubocop:enable Metrics/ClassLength
 end

data/lib/vectra/health_check.rb CHANGED Viewed

@@ -29,7 +29,9 @@ module Vectra
     def health_check(index: nil, include_stats: false, timeout: 5)
       start_time = Time.now
-      indexes = with_timeout(timeout) { list_indexes }
+      # For health checks we bypass client middleware and call the provider
+      # directly to avoid interference from custom stacks.
+      indexes = with_timeout(timeout) { provider.list_indexes }
       index_name = index || indexes.first&.dig(:name)
       result = base_result(start_time, indexes)
@@ -70,7 +72,7 @@ module Vectra
     def add_index_stats(result, index_name, include_stats, timeout)
       return unless include_stats && index_name
-      stats = with_timeout(timeout) { stats(index: index_name) }
+      stats = with_timeout(timeout) { provider.stats(index: index_name) }
       result[:index] = index_name
       result[:stats] = {
         vector_count: stats[:total_vector_count],

data/lib/vectra/middleware/base.rb ADDED Viewed

@@ -0,0 +1,97 @@
+# frozen_string_literal: true
+module Vectra
+  module Middleware
+    # Base class for all middleware
+    #
+    # Middleware can hook into three lifecycle events:
+    # - before(request): Called before the next middleware/provider
+    # - after(request, response): Called after successful execution
+    # - on_error(request, error): Called when an error occurs
+    #
+    # @example Simple logging middleware
+    #   class LoggingMiddleware < Vectra::Middleware::Base
+    #     def before(request)
+    #       puts "Starting #{request.operation}"
+    #     end
+    #
+    #     def after(request, response)
+    #       puts "Completed #{request.operation}"
+    #     end
+    #   end
+    #
+    # @example Error handling middleware
+    #   class ErrorHandlerMiddleware < Vectra::Middleware::Base
+    #     def on_error(request, error)
+    #       ErrorTracker.notify(error, context: { operation: request.operation })
+    #     end
+    #   end
+    #
+    class Base
+      # Execute the middleware
+      #
+      # This is the main entry point called by the middleware stack.
+      # It handles the before/after/error lifecycle hooks.
+      #
+      # @param request [Request] The request object
+      # @param app [Proc] The next middleware in the chain
+      # @return [Response] The response object
+      def call(request, app)
+        # Before hook
+        before(request)
+        # Call next middleware
+        response = app.call(request)
+        # Check if response has an error
+        if response.error
+          on_error(request, response.error)
+        end
+        # After hook
+        after(request, response)
+        response
+      rescue StandardError => e
+        # Error handling hook (for exceptions raised directly)
+        on_error(request, e)
+        raise
+      end
+      protected
+      # Hook called before the next middleware
+      #
+      # Override this method to add logic before the operation executes.
+      #
+      # @param request [Request] The request object
+      # @return [void]
+      def before(request)
+        # Override in subclass
+      end
+      # Hook called after successful execution
+      #
+      # Override this method to add logic after the operation completes.
+      #
+      # @param request [Request] The request object
+      # @param response [Response] The response object
+      # @return [void]
+      def after(request, response)
+        # Override in subclass
+      end
+      # Hook called when an error occurs
+      #
+      # Override this method to add error handling logic.
+      # The error will be re-raised after this hook executes.
+      #
+      # @param request [Request] The request object
+      # @param error [Exception] The error that occurred
+      # @return [void]
+      def on_error(request, error)
+        # Override in subclass
+      end
+    end
+  end
+end