RubyGems - vectra-client - Versions diffs - 0.2.1 → 0.3.0 - Mend

vectra-client 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
data/.rubocop.yml +77 -37
data/CHANGELOG.md +49 -6
data/README.md +52 -393
data/docs/Gemfile +9 -0
data/docs/_config.yml +37 -0
data/docs/_layouts/default.html +14 -0
data/docs/_layouts/home.html +187 -0
data/docs/_layouts/page.html +82 -0
data/docs/_site/api/overview/index.html +145 -0
data/docs/_site/assets/main.css +649 -0
data/docs/_site/assets/main.css.map +1 -0
data/docs/_site/assets/minima-social-icons.svg +33 -0
data/docs/_site/assets/style.css +295 -0
data/docs/_site/community/contributing/index.html +110 -0
data/docs/_site/examples/basic-usage/index.html +117 -0
data/docs/_site/examples/index.html +58 -0
data/docs/_site/feed.xml +1 -0
data/docs/_site/guides/getting-started/index.html +106 -0
data/docs/_site/guides/installation/index.html +82 -0
data/docs/_site/index.html +92 -0
data/docs/_site/providers/index.html +119 -0
data/docs/_site/providers/pgvector/index.html +155 -0
data/docs/_site/providers/pinecone/index.html +121 -0
data/docs/_site/providers/qdrant/index.html +124 -0
data/docs/_site/providers/weaviate/index.html +123 -0
data/docs/_site/robots.txt +1 -0
data/docs/_site/sitemap.xml +39 -0
data/docs/api/overview.md +126 -0
data/docs/assets/style.css +927 -0
data/docs/community/contributing.md +89 -0
data/docs/examples/basic-usage.md +102 -0
data/docs/examples/index.md +54 -0
data/docs/guides/getting-started.md +90 -0
data/docs/guides/installation.md +67 -0
data/docs/guides/performance.md +200 -0
data/docs/index.md +37 -0
data/docs/providers/index.md +81 -0
data/docs/providers/pgvector.md +95 -0
data/docs/providers/pinecone.md +72 -0
data/docs/providers/qdrant.md +73 -0
data/docs/providers/weaviate.md +72 -0
data/lib/vectra/batch.rb +148 -0
data/lib/vectra/cache.rb +261 -0
data/lib/vectra/configuration.rb +6 -1
data/lib/vectra/pool.rb +256 -0
data/lib/vectra/streaming.rb +153 -0
data/lib/vectra/version.rb +1 -1
data/lib/vectra.rb +4 -0
data/netlify.toml +12 -0
metadata +58 -5
data/IMPLEMENTATION_GUIDE.md +0 -686
data/NEW_FEATURES_v0.2.0.md +0 -459
data/RELEASE_CHECKLIST_v0.2.0.md +0 -383
data/USAGE_EXAMPLES.md +0 -787

data/lib/vectra/configuration.rb CHANGED Viewed

@@ -15,7 +15,8 @@ module Vectra
     attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
                   :max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
-                  :batch_size, :instrumentation
+                  :batch_size, :instrumentation, :cache_enabled, :cache_ttl,
+                  :cache_max_size, :async_concurrency
     attr_reader :provider
@@ -33,6 +34,10 @@ module Vectra
       @pool_timeout = 5
       @batch_size = 100
       @instrumentation = false
+      @cache_enabled = false
+      @cache_ttl = 300
+      @cache_max_size = 1000
+      @async_concurrency = 4
     end
     # Set the provider

data/lib/vectra/pool.rb ADDED Viewed

@@ -0,0 +1,256 @@
+# frozen_string_literal: true
+require "concurrent"
+module Vectra
+  # Connection pool with warmup support
+  #
+  # Provides connection pooling for database providers with configurable
+  # pool size, timeout, and connection warmup.
+  #
+  # @example Basic usage
+  #   pool = Vectra::Pool.new(size: 5, timeout: 5) { create_connection }
+  #   pool.warmup(3) # Pre-create 3 connections
+  #
+  #   pool.with_connection do |conn|
+  #     conn.execute("SELECT 1")
+  #   end
+  #
+  class Pool
+    class TimeoutError < Vectra::Error; end
+    class PoolExhaustedError < Vectra::Error; end
+    attr_reader :size, :timeout
+    # Initialize connection pool
+    #
+    # @param size [Integer] maximum pool size
+    # @param timeout [Integer] checkout timeout in seconds
+    # @yield connection factory block
+    def initialize(size:, timeout: 5, &block)
+      raise ArgumentError, "Connection factory block required" unless block_given?
+      @size = size
+      @timeout = timeout
+      @factory = block
+      @pool = Concurrent::Array.new
+      @checked_out = Concurrent::AtomicFixnum.new(0)
+      @mutex = Mutex.new
+      @condition = ConditionVariable.new
+      @shutdown = false
+    end
+    # Warmup the pool by pre-creating connections
+    #
+    # @param count [Integer] number of connections to create (default: pool size)
+    # @return [Integer] number of connections created
+    def warmup(count = nil)
+      count ||= size
+      count = [count, size].min
+      created = 0
+      count.times do
+        break if @pool.size >= size
+        conn = create_connection
+        if conn
+          @pool << conn
+          created += 1
+        end
+      end
+      created
+    end
+    # Execute block with a connection from the pool
+    #
+    # @yield [connection] the checked out connection
+    # @return [Object] result of the block
+    def with_connection
+      conn = checkout
+      begin
+        yield conn
+      ensure
+        checkin(conn)
+      end
+    end
+    # Checkout a connection from the pool
+    #
+    # @return [Object] a connection
+    # @raise [TimeoutError] if checkout times out
+    # @raise [PoolExhaustedError] if pool is exhausted
+    def checkout
+      raise PoolExhaustedError, "Pool has been shutdown" if @shutdown
+      deadline = Time.now + timeout
+      @mutex.synchronize do
+        loop do
+          # Try to get an existing connection
+          conn = @pool.pop
+          if conn
+            @checked_out.increment
+            return conn if healthy?(conn)
+            # Connection is unhealthy, discard and try again
+            close_connection(conn)
+            next
+          end
+          # Try to create a new connection if under limit
+          if @checked_out.value + @pool.size < size
+            conn = create_connection
+            if conn
+              @checked_out.increment
+              return conn
+            end
+          end
+          # Wait for a connection to be returned
+          remaining = deadline - Time.now
+          raise TimeoutError, "Connection checkout timed out after #{timeout}s" if remaining <= 0
+          @condition.wait(@mutex, remaining)
+        end
+      end
+    end
+    # Return a connection to the pool
+    #
+    # @param connection [Object] connection to return
+    def checkin(connection)
+      return if @shutdown
+      @mutex.synchronize do
+        @checked_out.decrement
+        if healthy?(connection) && @pool.size < size
+          @pool << connection
+        else
+          close_connection(connection)
+        end
+        @condition.signal
+      end
+    end
+    # Shutdown the pool, closing all connections
+    #
+    # @return [void]
+    def shutdown
+      @shutdown = true
+      @mutex.synchronize do
+        while (conn = @pool.pop)
+          close_connection(conn)
+        end
+      end
+    end
+    # Get pool statistics
+    #
+    # @return [Hash] pool stats
+    def stats
+      {
+        size: size,
+        available: @pool.size,
+        checked_out: @checked_out.value,
+        total_created: @pool.size + @checked_out.value,
+        shutdown: @shutdown
+      }
+    end
+    # Check if pool is healthy (public method)
+    #
+    # @return [Boolean]
+    def pool_healthy?
+      !@shutdown && (@pool.size + @checked_out.value).positive?
+    end
+    private
+    # Internal health check for individual connections
+    def healthy?(conn)
+      return false if conn.nil?
+      return true unless conn.respond_to?(:status)
+      # For PG connections, check status. Otherwise assume healthy.
+      if defined?(PG::CONNECTION_OK)
+        conn.status == PG::CONNECTION_OK
+      else
+        # If PG not loaded, assume connection is healthy if it exists
+        true
+      end
+    rescue StandardError
+      false
+    end
+    def create_connection
+      @factory.call
+    rescue StandardError => e
+      Vectra.configuration.logger&.error("Pool: Failed to create connection: #{e.message}")
+      nil
+    end
+    def close_connection(conn)
+      conn.close if conn.respond_to?(:close)
+    rescue StandardError => e
+      Vectra.configuration.logger&.warn("Pool: Error closing connection: #{e.message}")
+    end
+  end
+  # Pooled connection module for pgvector
+  module PooledConnection
+    # Get a pooled connection
+    #
+    # @return [Pool] connection pool
+    def connection_pool
+      @connection_pool ||= create_pool
+    end
+    # Warmup the connection pool
+    #
+    # @param count [Integer] number of connections to pre-create
+    # @return [Integer] connections created
+    def warmup_pool(count = nil)
+      connection_pool.warmup(count)
+    end
+    # Execute with pooled connection
+    #
+    # @yield [connection] database connection
+    def with_pooled_connection(&)
+      connection_pool.with_connection(&)
+    end
+    # Shutdown the connection pool
+    def shutdown_pool
+      @connection_pool&.shutdown
+      @connection_pool = nil
+    end
+    # Get pool statistics
+    #
+    # @return [Hash]
+    def pool_stats
+      return { status: "not_initialized" } unless @connection_pool
+      connection_pool.stats
+    end
+    private
+    def create_pool
+      pool_size = config.pool_size || 5
+      pool_timeout = config.pool_timeout || 5
+      Pool.new(size: pool_size, timeout: pool_timeout) do
+        create_raw_connection
+      end
+    end
+    def create_raw_connection
+      require "pg"
+      conn_params = parse_connection_params
+      PG.connect(conn_params)
+    end
+  end
+end

data/lib/vectra/streaming.rb ADDED Viewed

@@ -0,0 +1,153 @@
+# frozen_string_literal: true
+module Vectra
+  # Streaming query results for large datasets
+  #
+  # Provides lazy enumeration over query results with automatic pagination,
+  # reducing memory usage for large result sets.
+  #
+  # @example Stream through results
+  #   stream = Vectra::Streaming.new(client)
+  #   stream.query_each(index: 'my-index', vector: query_vec, total: 1000) do |match|
+  #     process(match)
+  #   end
+  #
+  # @example As lazy enumerator
+  #   results = stream.query_stream(index: 'my-index', vector: query_vec, total: 1000)
+  #   results.take(50).each { |m| puts m.id }
+  #
+  class Streaming
+    DEFAULT_PAGE_SIZE = 100
+    attr_reader :client, :page_size
+    # Initialize streaming query handler
+    #
+    # @param client [Client] the Vectra client
+    # @param page_size [Integer] results per page (default: 100)
+    def initialize(client, page_size: DEFAULT_PAGE_SIZE)
+      @client = client
+      @page_size = [page_size, 1].max
+    end
+    # Stream query results with a block
+    #
+    # @param index [String] the index name
+    # @param vector [Array<Float>] query vector
+    # @param total [Integer] total results to fetch
+    # @param namespace [String, nil] optional namespace
+    # @param filter [Hash, nil] metadata filter
+    # @yield [Match] each match result
+    # @return [Integer] total results yielded
+    def query_each(index:, vector:, total:, namespace: nil, filter: nil, &block)
+      return 0 unless block_given?
+      count = 0
+      query_stream(index: index, vector: vector, total: total, namespace: namespace, filter: filter).each do |match|
+        block.call(match)
+        count += 1
+      end
+      count
+    end
+    # Create a lazy enumerator for streaming results
+    #
+    # @param index [String] the index name
+    # @param vector [Array<Float>] query vector
+    # @param total [Integer] total results to fetch
+    # @param namespace [String, nil] optional namespace
+    # @param filter [Hash, nil] metadata filter
+    # @return [Enumerator::Lazy] lazy enumerator of results
+    def query_stream(index:, vector:, total:, namespace: nil, filter: nil)
+      Enumerator.new do |yielder|
+        fetched = 0
+        seen_ids = Set.new
+        while fetched < total
+          batch_size = [page_size, total - fetched].min
+          result = client.query(
+            index: index,
+            vector: vector,
+            top_k: batch_size,
+            namespace: namespace,
+            filter: filter,
+            include_metadata: true
+          )
+          break if result.empty?
+          result.each do |match|
+            # Skip duplicates (some providers may return overlapping results)
+            next if seen_ids.include?(match.id)
+            seen_ids.add(match.id)
+            yielder << match
+            fetched += 1
+            break if fetched >= total
+          end
+          # If we got fewer results than requested, we've exhausted the index
+          break if result.size < batch_size
+        end
+      end.lazy
+    end
+    # Scan all vectors in an index (provider-dependent)
+    #
+    # @param index [String] the index name
+    # @param namespace [String, nil] optional namespace
+    # @param batch_size [Integer] IDs per batch
+    # @yield [Vector] each vector
+    # @return [Integer] total vectors scanned
+    # @note Not all providers support efficient scanning
+    def scan_all(index:, namespace: nil, batch_size: 1000)
+      return 0 unless block_given?
+      count = 0
+      offset = 0
+      loop do
+        # This is a simplified scan - actual implementation depends on provider
+        stats = client.stats(index: index, namespace: namespace)
+        total = stats[:total_vector_count] || 0
+        break if offset >= total
+        # Fetch IDs in batches (this is provider-specific)
+        # For now, we return what we can
+        break if offset.positive? # Only one iteration for basic implementation
+        offset += batch_size
+        count = total
+      end
+      count
+    end
+  end
+  # Streaming result wrapper with additional metadata
+  class StreamingResult
+    include Enumerable
+    attr_reader :enumerator, :metadata
+    def initialize(enumerator, metadata = {})
+      @enumerator = enumerator
+      @metadata = metadata
+    end
+    def each(&)
+      enumerator.each(&)
+    end
+    def take(n)
+      enumerator.take(n)
+    end
+    def to_a
+      enumerator.to_a
+    end
+  end
+end

data/lib/vectra/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Vectra
-  VERSION = "0.2.1"
+  VERSION = "0.3.0"
 end

data/lib/vectra.rb CHANGED Viewed

@@ -7,6 +7,10 @@ require_relative "vectra/vector"
 require_relative "vectra/query_result"
 require_relative "vectra/instrumentation"
 require_relative "vectra/retry"
+require_relative "vectra/batch"
+require_relative "vectra/streaming"
+require_relative "vectra/cache"
+require_relative "vectra/pool"
 require_relative "vectra/active_record"
 require_relative "vectra/providers/base"
 require_relative "vectra/providers/pinecone"

data/netlify.toml ADDED Viewed

@@ -0,0 +1,12 @@
+[build]
+  command = "cd docs && bundle install && bundle exec jekyll build"
+  publish = "docs/_site"
+[build.environment]
+  JEKYLL_ENV = "production"
+  RUBY_VERSION = "3.4.7"
+[[redirects]]
+  from = "/*"
+  to = "/index.html"
+  status = 200

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: vectra-client
 version: !ruby/object:Gem::Version
-  version: 0.2.1
+  version: 0.3.0
 platform: ruby
 authors:
 - Mijo Kristo
@@ -37,6 +37,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '2.0'
+- !ruby/object:Gem::Dependency
+  name: concurrent-ruby
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
 - !ruby/object:Gem::Dependency
   name: activerecord
   requirement: !ruby/object:Gem::Requirement
@@ -207,16 +221,50 @@ files:
 - CHANGELOG.md
 - CODE_OF_CONDUCT.md
 - CONTRIBUTING.md
-- IMPLEMENTATION_GUIDE.md
 - LICENSE
-- NEW_FEATURES_v0.2.0.md
 - README.md
-- RELEASE_CHECKLIST_v0.2.0.md
 - Rakefile
 - SECURITY.md
-- USAGE_EXAMPLES.md
 - benchmarks/batch_operations_benchmark.rb
 - benchmarks/connection_pooling_benchmark.rb
+- docs/Gemfile
+- docs/_config.yml
+- docs/_layouts/default.html
+- docs/_layouts/home.html
+- docs/_layouts/page.html
+- docs/_site/api/overview/index.html
+- docs/_site/assets/main.css
+- docs/_site/assets/main.css.map
+- docs/_site/assets/minima-social-icons.svg
+- docs/_site/assets/style.css
+- docs/_site/community/contributing/index.html
+- docs/_site/examples/basic-usage/index.html
+- docs/_site/examples/index.html
+- docs/_site/feed.xml
+- docs/_site/guides/getting-started/index.html
+- docs/_site/guides/installation/index.html
+- docs/_site/index.html
+- docs/_site/providers/index.html
+- docs/_site/providers/pgvector/index.html
+- docs/_site/providers/pinecone/index.html
+- docs/_site/providers/qdrant/index.html
+- docs/_site/providers/weaviate/index.html
+- docs/_site/robots.txt
+- docs/_site/sitemap.xml
+- docs/api/overview.md
+- docs/assets/style.css
+- docs/community/contributing.md
+- docs/examples/basic-usage.md
+- docs/examples/index.md
+- docs/guides/getting-started.md
+- docs/guides/installation.md
+- docs/guides/performance.md
+- docs/index.md
+- docs/providers/index.md
+- docs/providers/pgvector.md
+- docs/providers/pinecone.md
+- docs/providers/qdrant.md
+- docs/providers/weaviate.md
 - examples/active_record_demo.rb
 - examples/instrumentation_demo.rb
 - lib/generators/vectra/install_generator.rb
@@ -224,12 +272,15 @@ files:
 - lib/generators/vectra/templates/vectra.rb
 - lib/vectra.rb
 - lib/vectra/active_record.rb
+- lib/vectra/batch.rb
+- lib/vectra/cache.rb
 - lib/vectra/client.rb
 - lib/vectra/configuration.rb
 - lib/vectra/errors.rb
 - lib/vectra/instrumentation.rb
 - lib/vectra/instrumentation/datadog.rb
 - lib/vectra/instrumentation/new_relic.rb
+- lib/vectra/pool.rb
 - lib/vectra/providers/base.rb
 - lib/vectra/providers/pgvector.rb
 - lib/vectra/providers/pgvector/connection.rb
@@ -240,8 +291,10 @@ files:
 - lib/vectra/providers/weaviate.rb
 - lib/vectra/query_result.rb
 - lib/vectra/retry.rb
+- lib/vectra/streaming.rb
 - lib/vectra/vector.rb
 - lib/vectra/version.rb
+- netlify.toml
 homepage: https://github.com/stokry/vectra
 licenses:
 - MIT