RubyGems - rllama - Versions diffs - 1.0.0 → 1.0.1 - Mend

rllama 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 108458213c9b89f4ecdc5a8874d0ac5c18a6c4a32b57439ee6839e50bd584e39
-  data.tar.gz: 9abb5f179ca54740adb0321211283089ddcf46b7eefc156501419d174f46bc57
+  metadata.gz: e3ddb5865414a3b5393e2aab0eeaf8853ddce4a21df46038faf04dfe7fd1b4cd
+  data.tar.gz: cac2dae8787473817d3ddb8bcfbb1e97829a89e1ad7556d250a1fd3ce8f65819
 SHA512:
-  metadata.gz: 624a9689bc2ddc6b0ac12c17ea94b3e1a7169907ea775d2f9f2f6706475d025dafda35e5dbfd79e7143a36687591c62d488819ea9170908843944b2dd1cff6f6
-  data.tar.gz: 7ec05d5870c58f58f160045a272a4e07a8eba96101803846cf4e4689fde3c2fb2e64f61c2b36dfbcc0e18614d08aa6bedc39174a0b22d3ff1be9bce04ea33d12
+  metadata.gz: a111915a6be3bb92f1319f7abab57b21c6efcbef86dbd4e3c235b1d9ac7a5ce548b8a8a2d9e205e9847ab966f7509076bb1e46075333036bba29f0854e4d9e88
+  data.tar.gz: 6b6dc6ab6b76908e9f479d463ec1d952c9b78913d18c5bbb3f94e05a4d054e5f50228b762fb11f07faf806d6e185f7d8d5c065f6b3048b3f3594735bcdff7583

data/lib/rllama/context.rb CHANGED Viewed

@@ -12,12 +12,21 @@ module Rllama
       @ctx_params = Cpp.llama_context_default_params
-      @ctx_params[:n_ctx] = @n_ctx
-      @ctx_params[:n_batch] = @n_batch
+      @ctx_params[:n_ctx] = @n_ctx if @n_ctx
+      @ctx_params[:n_batch] = @n_batch if @n_batch
       if @embeddings
-        @ctx_params[:n_seq_max] = [@n_batch, @model.n_seq_max].min
+        seq_cap = @model.n_seq_max
+        if @n_batch&.positive? && seq_cap&.positive?
+          @ctx_params[:n_seq_max] = [@n_batch, seq_cap].min
+        elsif seq_cap&.positive?
+          @ctx_params[:n_seq_max] = seq_cap
+        end
         @ctx_params[:embeddings] = true
+        @ctx_params[:kv_unified] = true
+        @ctx_params[:n_ubatch] = @n_batch if @n_batch&.positive?
       end
       @pointer = Cpp.llama_init_from_model(model.pointer, @ctx_params)
@@ -141,19 +150,31 @@ module Rllama
     end
     alias message generate
-    def embed(strings, normalize: true, batch_size: 512)
-      is_array = strings.is_a?(Array)
+    def embed(strings_or_tokens, normalize: true, batch_size: 512)
+      is_tokens = strings_or_tokens.is_a?(Array) &&
+                  (strings_or_tokens[0].is_a?(Integer) ||
+                   (strings_or_tokens[0].is_a?(Array) && strings_or_tokens[0][0].is_a?(Integer)))
-      strings = Array(strings) unless is_array
+      input_is_array = is_tokens ? strings_or_tokens[0].is_a?(Array) : strings_or_tokens.is_a?(Array)
-      tokenized_strings = strings.map do |text|
-        max_tokens = text.bytesize + 2
-        tokens_ptr = FFI::MemoryPointer.new(:int32, max_tokens)
-        count = Cpp.llama_tokenize(@model.vocab, text, text.bytesize, tokens_ptr, max_tokens, true, false)
+      normalized_inputs = input_is_array ? strings_or_tokens : [strings_or_tokens]
+      tokenized_strings =
+        if is_tokens
+          input_is_array ? strings_or_tokens : [strings_or_tokens]
+        else
+          normalized_inputs.map { |text| @model.tokenize(text) }
+        end
-        raise Error, "Failed to tokenize text: '#{text}'" if count.negative?
+      max_tokens_in_prompt = tokenized_strings.map(&:length).max || 0
-        tokens_ptr.read_array_of_int32(count)
+      if max_tokens_in_prompt > batch_size
+        raise Error, "batch_size (#{batch_size}) is smaller than the longest prompt (#{max_tokens_in_prompt} tokens)."
+      end
+      if max_tokens_in_prompt > @n_batch
+        raise Error, "Context n_batch (#{@n_batch}) is smaller than the longest " \
+                     "prompt (#{max_tokens_in_prompt} tokens). Increase batch_size when calling embed."
       end
       all_embeddings = []
@@ -166,6 +187,9 @@ module Rllama
         batch[:n_tokens] = current_batch_token_count
+        memory_ptr = Cpp.llama_get_memory(@pointer)
+        Cpp.llama_memory_clear(memory_ptr, true) unless memory_ptr.null?
         raise Error, 'llama_decode failed' unless Cpp.llama_decode(@pointer, batch).zero?
         prompts_in_batch.each do |seq_id_in_batch|
@@ -179,7 +203,8 @@ module Rllama
         end
         current_batch_token_count = 0
-        prompts_in_batch = []
+        prompts_in_batch.clear
       end
       tokenized_strings.each do |tokens|
@@ -207,7 +232,7 @@ module Rllama
       Cpp.llama_batch_free(batch)
-      is_array ? all_embeddings : all_embeddings[0]
+      input_is_array ? all_embeddings : all_embeddings[0]
     end
     def embeddings?

data/lib/rllama/loader.rb CHANGED Viewed

@@ -62,6 +62,8 @@ module Rllama
       local_path = File.join(dir, org, repo, file_path)
+      return local_path if File.exist?(local_path)
       puts "Destination: #{local_path}"
       download_file(url, local_path, "HuggingFace model: #{hf_path}")
@@ -74,6 +76,8 @@ module Rllama
       local_path = File.join(dir, filename)
+      return local_path if File.exist?(local_path)
       puts "Destination: #{local_path}"
       download_file(url, local_path, "URL: #{url}")
@@ -82,8 +86,6 @@ module Rllama
     def download_file(url, local_path, description)
       FileUtils.mkdir_p(File.dirname(local_path))
-      return local_path if File.exist?(local_path)
       temp_path = File.join(File.dirname(local_path), "~#{File.basename(local_path)}")
       existing_size = File.exist?(temp_path) ? File.size(temp_path) : 0

data/lib/rllama/model.rb CHANGED Viewed

@@ -47,11 +47,32 @@ module Rllama
     alias message generate
     def embed(prompt, normalize: true, batch_size: 512, &block)
-      init_embedding_context do |ctx|
-        ctx.embed(prompt, normalize:, batch_size:, &block)
+      inputs = prompt.is_a?(Array) ? prompt : [prompt]
+      tokenized_inputs = inputs.map { |text| tokenize(text, max_tokens: n_ctx_train) }
+      max_token_length = tokenized_inputs.map(&:length).max || 0
+      effective_batch_size = [batch_size, max_token_length].max
+      effective_ctx = [n_ctx_train, max_token_length].min
+      init_embedding_context(n_ctx: effective_ctx, n_batch: effective_batch_size) do |ctx|
+        inputs = prompt.is_a?(Array) ? tokenized_inputs : tokenized_inputs[0]
+        ctx.embed(inputs, normalize:, batch_size: effective_batch_size, &block)
       end
     end
+    def tokenize(text, max_tokens: nil)
+      size = text.bytesize + 2
+      tokens_ptr = FFI::MemoryPointer.new(:int32, size)
+      count = Cpp.llama_tokenize(vocab, text, text.bytesize, tokens_ptr, size, true, false)
+      raise Error, "Failed to tokenize text: '#{text}'" if count.negative?
+      tokens_ptr.read_array_of_int32([count, max_tokens].compact.min)
+    end
     def close
       Cpp.llama_model_free(@pointer)
     end
@@ -70,7 +91,7 @@ module Rllama
       context
     end
-    def init_embedding_context(n_ctx: 2048, n_batch: 512, &)
+    def init_embedding_context(n_ctx: n_ctx_train, n_batch: 512, &)
       init_context(embeddings: true, n_ctx:, n_batch:, &)
     end

data/lib/rllama/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Rllama
-  VERSION = '1.0.0'
+  VERSION = '1.0.1'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rllama
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.1
 platform: ruby
 authors:
 - Pete Matsyburka