RubyGems - fastembed - Versions diffs - 1.0.0 → 1.1.0 - Mend

fastembed 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/.rubocop.yml +1 -0
data/.yardopts +6 -0
data/BENCHMARKS.md +124 -1
data/CHANGELOG.md +14 -0
data/README.md +395 -74
data/benchmark/compare_all.rb +167 -0
data/benchmark/compare_python.py +60 -0
data/benchmark/memory_profile.rb +70 -0
data/benchmark/profile.rb +198 -0
data/benchmark/reranker_benchmark.rb +158 -0
data/exe/fastembed +6 -0
data/fastembed.gemspec +3 -0
data/lib/fastembed/async.rb +193 -0
data/lib/fastembed/base_model.rb +247 -0
data/lib/fastembed/base_model_info.rb +61 -0
data/lib/fastembed/cli.rb +745 -0
data/lib/fastembed/custom_model_registry.rb +255 -0
data/lib/fastembed/image_embedding.rb +313 -0
data/lib/fastembed/late_interaction_embedding.rb +260 -0
data/lib/fastembed/late_interaction_model_info.rb +91 -0
data/lib/fastembed/model_info.rb +59 -19
data/lib/fastembed/model_management.rb +82 -23
data/lib/fastembed/onnx_embedding_model.rb +25 -4
data/lib/fastembed/pooling.rb +39 -3
data/lib/fastembed/progress.rb +52 -0
data/lib/fastembed/quantization.rb +75 -0
data/lib/fastembed/reranker_model_info.rb +91 -0
data/lib/fastembed/sparse_embedding.rb +261 -0
data/lib/fastembed/sparse_model_info.rb +80 -0
data/lib/fastembed/text_cross_encoder.rb +217 -0
data/lib/fastembed/text_embedding.rb +161 -28
data/lib/fastembed/validators.rb +59 -0
data/lib/fastembed/version.rb +1 -1
data/lib/fastembed.rb +42 -1
data/plan.md +257 -0
data/scripts/verify_models.rb +229 -0
metadata +70 -3

data/lib/fastembed/async.rb ADDED Viewed

@@ -0,0 +1,193 @@
+# frozen_string_literal: true
+module Fastembed
+  # Async support for embedding operations
+  #
+  # Provides Future-like objects for running embeddings in background threads.
+  # Useful for parallelizing embedding generation across multiple documents.
+  #
+  # @example Async embedding
+  #   embedding = Fastembed::TextEmbedding.new
+  #   future = embedding.embed_async(documents)
+  #   # ... do other work ...
+  #   vectors = future.value  # blocks until complete
+  #
+  # @example Multiple concurrent embeddings
+  #   futures = documents.each_slice(100).map do |batch|
+  #     embedding.embed_async(batch)
+  #   end
+  #   results = futures.flat_map(&:value)
+  #
+  module Async
+    # A Future representing an async embedding operation
+    #
+    # Wraps a background thread that performs embedding, providing
+    # methods to check completion status and retrieve results.
+    #
+    class Future
+      # @return [Thread] The background thread
+      attr_reader :thread
+      # Create a new Future
+      #
+      # @yield Block to execute in background thread
+      # @return [Future]
+      def initialize(&block)
+        @result = nil
+        @error = nil
+        @completed = false
+        @mutex = Mutex.new
+        @condition = ConditionVariable.new
+        @thread = Thread.new do
+          result = block.call
+          @mutex.synchronize do
+            @result = result
+            @completed = true
+            @condition.broadcast
+          end
+        rescue StandardError => e
+          @mutex.synchronize do
+            @error = e
+            @completed = true
+            @condition.broadcast
+          end
+        end
+      end
+      # Check if the operation is complete
+      #
+      # @return [Boolean] True if complete (success or failure)
+      def complete?
+        @mutex.synchronize { @completed }
+      end
+      alias completed? complete?
+      # Check if the operation is still running
+      #
+      # @return [Boolean] True if still running
+      def pending?
+        !complete?
+      end
+      # Check if the operation completed successfully
+      #
+      # @return [Boolean] True if completed without error
+      def success?
+        @mutex.synchronize { @completed && @error.nil? }
+      end
+      # Check if the operation failed
+      #
+      # @return [Boolean] True if completed with error
+      def failure?
+        @mutex.synchronize { @completed && !@error.nil? }
+      end
+      # Get the result, blocking until complete
+      #
+      # @param timeout [Numeric, nil] Maximum seconds to wait (nil = forever)
+      # @return [Object] The result of the async operation
+      # @raise [StandardError] If the operation raised an error
+      # @raise [Timeout::Error] If timeout expires before completion
+      def value(timeout: nil)
+        wait(timeout: timeout)
+        raise @error if @error
+        @result
+      end
+      alias result value
+      # Wait for completion without retrieving the result
+      #
+      # @param timeout [Numeric, nil] Maximum seconds to wait (nil = forever)
+      # @return [Boolean] True if completed, false if timed out
+      def wait(timeout: nil)
+        @mutex.synchronize do
+          return true if @completed
+          if timeout
+            deadline = Time.now + timeout
+            until @completed
+              remaining = deadline - Time.now
+              break if remaining <= 0
+              @condition.wait(@mutex, remaining)
+            end
+          else
+            @condition.wait(@mutex) until @completed
+          end
+          @completed
+        end
+      end
+      # Get the error if the operation failed
+      #
+      # @return [StandardError, nil] The error, or nil if successful/pending
+      def error
+        @mutex.synchronize { @error }
+      end
+      # Apply a transformation to the result
+      #
+      # @yield [result] Block to transform the result
+      # @return [Future] A new Future with the transformed result
+      def then(&block)
+        Future.new do
+          block.call(value)
+        end
+      end
+      # Handle errors
+      #
+      # @yield [error] Block to handle errors
+      # @return [Future] A new Future that handles errors
+      def rescue(&block)
+        Future.new do
+          value
+        rescue StandardError => e
+          block.call(e)
+        end
+      end
+    end
+    # Run multiple futures concurrently and wait for all to complete
+    #
+    # @param futures [Array<Future>] Futures to wait for
+    # @param timeout [Numeric, nil] Maximum seconds to wait
+    # @return [Array] Results from all futures
+    # @raise [StandardError] If any future raised an error
+    def self.all(futures, timeout: nil)
+      futures.each { |f| f.wait(timeout: timeout) }
+      futures.map(&:value)
+    end
+    # Run multiple futures concurrently and return first completed
+    #
+    # @param futures [Array<Future>] Futures to race
+    # @param timeout [Numeric, nil] Maximum seconds to wait
+    # @return [Object] Result from first completed future
+    # @raise [Timeout::Error] If timeout expires before any future completes
+    def self.race(futures, timeout: nil)
+      raise ArgumentError, 'No futures provided' if futures.empty?
+      deadline = timeout ? Time.now + timeout : nil
+      sleep_time = 0.001 # Start with 1ms
+      loop do
+        futures.each do |future|
+          return future.value if future.complete?
+        end
+        raise Timeout::Error, 'No future completed within timeout' if deadline && Time.now >= deadline
+        sleep sleep_time
+        # Exponential backoff up to 10ms to reduce CPU usage for long waits
+        sleep_time = [sleep_time * 1.5, 0.01].min
+      end
+    end
+  end
+end

data/lib/fastembed/base_model.rb ADDED Viewed

@@ -0,0 +1,247 @@
+# frozen_string_literal: true
+require 'onnxruntime'
+require 'tokenizers'
+module Fastembed
+  # Shared functionality for model classes
+  #
+  # This module provides common initialization and utility methods used by
+  # all model types (TextEmbedding, TextCrossEncoder, TextSparseEmbedding, etc.).
+  # It handles model downloading, ONNX session creation, and tokenizer loading.
+  #
+  # @abstract Include in model classes and call {#initialize_model}
+  #
+  module BaseModel
+    # @!attribute [r] model_name
+    #   @return [String] Name of the loaded model
+    # @!attribute [r] model_info
+    #   @return [BaseModelInfo] Model metadata and configuration
+    # @!attribute [r] quantization
+    #   @return [Symbol] Current quantization type
+    attr_reader :model_name, :model_info, :quantization
+    private
+    # Common initialization logic for all model types
+    # @param model_name [String] Name of the model
+    # @param cache_dir [String, nil] Custom cache directory
+    # @param threads [Integer, nil] Number of threads for ONNX Runtime
+    # @param providers [Array<String>, nil] ONNX execution providers
+    # @param show_progress [Boolean] Whether to show download progress
+    # @param quantization [Symbol] Quantization type (:fp32, :fp16, :int8, :uint8, :q4)
+    def initialize_model(model_name:, cache_dir:, threads:, providers:, show_progress:, quantization: nil)
+      @model_name = model_name
+      @threads = threads
+      @providers = providers
+      @quantization = quantization || Quantization::DEFAULT
+      validate_quantization!
+      ModelManagement.cache_dir = cache_dir if cache_dir
+      @model_info = resolve_model_info(model_name)
+      @model_dir = retrieve_model(model_name, show_progress: show_progress)
+    end
+    # Validate that the quantization type is supported
+    # @raise [ArgumentError] If quantization type is invalid
+    def validate_quantization!
+      return if Quantization.valid?(@quantization)
+      valid_types = Quantization::TYPES.keys.join(', ')
+      raise ArgumentError, "Invalid quantization type: #{@quantization}. Valid types: #{valid_types}"
+    end
+    # Get the model file path, accounting for quantization
+    # @return [String] Path to quantized model file (or base if fp32)
+    def quantized_model_file
+      Quantization.model_file(@model_info.model_file, @quantization)
+    end
+    # Override in subclasses to resolve from appropriate registry
+    #
+    # @param _model_name [String] Name of the model
+    # @return [BaseModelInfo] Model information object
+    # @raise [NotImplementedError] If not overridden in subclass
+    # @abstract
+    def resolve_model_info(_model_name)
+      raise NotImplementedError, 'Subclasses must implement resolve_model_info'
+    end
+    # Download or retrieve cached model
+    #
+    # @param model_name [String] Name of the model
+    # @param show_progress [Boolean] Whether to show download progress
+    # @return [String] Path to model directory
+    def retrieve_model(model_name, show_progress:)
+      ModelManagement.retrieve_model(
+        model_name,
+        model_info: @model_info,
+        show_progress: show_progress
+      )
+    end
+    # Build ONNX session options hash
+    # @return [Hash] Options for OnnxRuntime::InferenceSession
+    def build_session_options
+      options = {}
+      options[:inter_op_num_threads] = @threads if @threads
+      options[:intra_op_num_threads] = @threads if @threads
+      options
+    end
+    # Load an ONNX inference session
+    #
+    # @param model_path [String] Path to ONNX model file
+    # @param providers [Array<String>, nil] Execution providers
+    # @return [OnnxRuntime::InferenceSession] The loaded session
+    # @raise [Error] If model file not found
+    def load_onnx_session(model_path, providers: nil)
+      raise Error, "Model file not found: #{model_path}" unless File.exist?(model_path)
+      OnnxRuntime::InferenceSession.new(
+        model_path,
+        **build_session_options,
+        providers: providers || ['CPUExecutionProvider']
+      )
+    end
+    # Load a HuggingFace tokenizer from file
+    #
+    # @param tokenizer_path [String] Path to tokenizer.json
+    # @param max_length [Integer] Maximum sequence length for truncation
+    # @return [Tokenizers::Tokenizer] Configured tokenizer
+    # @raise [Error] If tokenizer file not found
+    def load_tokenizer_from_file(tokenizer_path, max_length:)
+      raise Error, "Tokenizer not found: #{tokenizer_path}" unless File.exist?(tokenizer_path)
+      tokenizer = Tokenizers::Tokenizer.from_file(tokenizer_path)
+      tokenizer.enable_padding(pad_id: 0, pad_token: '[PAD]')
+      tokenizer.enable_truncation(max_length)
+      tokenizer
+    end
+    # Load both ONNX model and tokenizer using model_info paths
+    #
+    # Sets @session and @tokenizer instance variables.
+    # Uses @model_dir and @model_info which must be set first.
+    #
+    # @param model_file_override [String, nil] Override model file path
+    # @return [void]
+    def setup_model_and_tokenizer(model_file_override: nil)
+      model_file = model_file_override || @model_info.model_file
+      model_path = File.join(@model_dir, model_file)
+      @session = load_onnx_session(model_path, providers: @providers)
+      tokenizer_path = File.join(@model_dir, @model_info.tokenizer_file)
+      @tokenizer = load_tokenizer_from_file(tokenizer_path, max_length: @model_info.max_length)
+    end
+    # Get input names from the ONNX session
+    #
+    # @return [Array<String>] List of input tensor names
+    def session_input_names
+      @session_input_names ||= @session.inputs.map { |i| i[:name] }
+    end
+    # Prepare model inputs from tokenizer encodings
+    #
+    # @param encodings [Array<Tokenizers::Encoding>] Batch of tokenizer encodings
+    # @return [Hash] Input tensors for ONNX session
+    def prepare_model_inputs(encodings)
+      input_ids = encodings.map(&:ids)
+      attention_mask = encodings.map(&:attention_mask)
+      inputs = { 'input_ids' => input_ids }
+      # Add attention_mask/input_mask if the model expects it
+      # (SPLADE models use input_mask, most BERT models use attention_mask)
+      mask_key = if session_input_names.include?('attention_mask')
+                   'attention_mask'
+                 elsif session_input_names.include?('input_mask')
+                   'input_mask'
+                 end
+      inputs[mask_key] = attention_mask if mask_key
+      # Add token_type_ids/segment_ids if the model expects it
+      # (SPLADE models use segment_ids, BERT models use token_type_ids)
+      token_type_key = if session_input_names.include?('token_type_ids')
+                         'token_type_ids'
+                       elsif session_input_names.include?('segment_ids')
+                         'segment_ids'
+                       end
+      if token_type_key
+        token_type_ids = encodings.map { |e| e.type_ids || Array.new(e.ids.length, 0) }
+        inputs[token_type_key] = token_type_ids
+      end
+      inputs
+    end
+    # Tokenize texts and prepare inputs for the model
+    #
+    # @param texts [Array<String>] Texts to tokenize
+    # @return [Hash] Hash with :inputs and :attention_mask keys
+    def tokenize_and_prepare(texts)
+      encodings = @tokenizer.encode_batch(texts)
+      inputs = prepare_model_inputs(encodings)
+      { inputs: inputs, attention_mask: encodings.map(&:attention_mask) }
+    end
+    # Initialize model from local directory instead of downloading
+    #
+    # @param local_model_dir [String] Path to local model directory
+    # @param model_name [String] Name identifier for the model
+    # @param threads [Integer, nil] Number of threads for ONNX Runtime
+    # @param providers [Array<String>, nil] ONNX execution providers
+    # @param quantization [Symbol, nil] Quantization type
+    # @param model_file [String, nil] Override model file name
+    # @param tokenizer_file [String, nil] Override tokenizer file name
+    # @raise [ArgumentError] If local_model_dir doesn't exist
+    def initialize_from_local(local_model_dir:, model_name:, threads:, providers:, quantization:, model_file:,
+                              tokenizer_file:)
+      raise ArgumentError, "Local model directory not found: #{local_model_dir}" unless Dir.exist?(local_model_dir)
+      @model_name = model_name
+      @threads = threads
+      @providers = providers
+      @quantization = quantization || Quantization::DEFAULT
+      @model_dir = local_model_dir
+      validate_quantization!
+      # Try to get model info from registry, or create a minimal one
+      # Subclasses should override create_local_model_info
+      @model_info = resolve_model_info_safe(model_name) || create_local_model_info(
+        model_name: model_name,
+        model_file: model_file,
+        tokenizer_file: tokenizer_file
+      )
+    end
+    # Safely try to resolve model info, returning nil if not found
+    #
+    # @param model_name [String] Model name to look up
+    # @return [BaseModelInfo, nil] Model info or nil
+    def resolve_model_info_safe(model_name)
+      resolve_model_info(model_name)
+    rescue StandardError
+      nil
+    end
+    # Create model info for locally loaded model
+    # Subclasses should override this to create appropriate model info type
+    #
+    # @param model_name [String] Model name identifier
+    # @param model_file [String, nil] Model file path
+    # @param tokenizer_file [String, nil] Tokenizer file path
+    # @return [BaseModelInfo] Model info object
+    # @abstract
+    def create_local_model_info(model_name:, model_file:, tokenizer_file:)
+      raise NotImplementedError, 'Subclasses must implement create_local_model_info for local model loading'
+    end
+  end
+end

data/lib/fastembed/base_model_info.rb ADDED Viewed

@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+module Fastembed
+  # Shared functionality for model information classes
+  #
+  # This module provides common attributes and methods for all model info types
+  # (embedding, reranking, sparse, late interaction). It handles metadata like
+  # model name, file paths, and HuggingFace source information.
+  #
+  # @abstract Include in model info classes and call {#initialize_base}
+  #
+  module BaseModelInfo
+    # Default maximum sequence length for tokenization
+    DEFAULT_MAX_LENGTH = 512
+    # @!attribute [r] model_name
+    #   @return [String] Full model identifier (e.g., "BAAI/bge-small-en-v1.5")
+    # @!attribute [r] description
+    #   @return [String] Human-readable model description
+    # @!attribute [r] size_in_gb
+    #   @return [Float] Approximate model size in gigabytes
+    # @!attribute [r] model_file
+    #   @return [String] Relative path to ONNX model file
+    # @!attribute [r] tokenizer_file
+    #   @return [String] Relative path to tokenizer JSON file
+    # @!attribute [r] sources
+    #   @return [Hash] Source repositories (e.g., {hf: "Xenova/model-name"})
+    # @!attribute [r] max_length
+    #   @return [Integer] Maximum token sequence length
+    attr_reader :model_name, :description, :size_in_gb, :model_file,
+                :tokenizer_file, :sources, :max_length
+    # Returns the HuggingFace repository ID
+    # @return [String] The HF repo ID for downloading
+    def hf_repo
+      sources[:hf]
+    end
+    private
+    # Initialize common model info attributes
+    #
+    # @param model_name [String] Full model identifier
+    # @param description [String] Human-readable description
+    # @param size_in_gb [Float] Model size in GB
+    # @param sources [Hash] Source repositories
+    # @param model_file [String] Path to ONNX model file
+    # @param tokenizer_file [String] Path to tokenizer file
+    # @param max_length [Integer] Maximum sequence length
+    def initialize_base(model_name:, description:, size_in_gb:, sources:, model_file:, tokenizer_file:,
+                        max_length: DEFAULT_MAX_LENGTH)
+      @model_name = model_name
+      @description = description
+      @size_in_gb = size_in_gb
+      @sources = sources
+      @model_file = model_file
+      @tokenizer_file = tokenizer_file
+      @max_length = max_length
+    end
+  end
+end