RubyGems - rllama - Versions diffs - 1.0.0-arm64-darwin → 1.0.1-arm64-darwin - Mend

rllama 1.0.0-arm64-darwin → 1.0.1-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 11d7ddcc0931dc67db511e81488bc3a2a19d564c3fdffbcdae938a9cf77fbd74
-  data.tar.gz: 157cb3a06a1469788ddffd31466ab363114e747587befa59583efd37e3b8796f
+  metadata.gz: 7a5a70bb24edc54a365dad60d5d906c8933ad88a285cb87f724a1b2b64b1d4c3
+  data.tar.gz: ed85fda6d1e26fef18a5c95cb9551d65b681d27fac6a1ab6c8c7feefc63a8926
 SHA512:
-  metadata.gz: 40c0670a3b6aa77aefedc09e8763e9175f1be7074b243dc682763389ba6187c522650cb1b5ea54f3921b129733f30d1512acfe5f8793e31ff7747ad1ec41a4e2
-  data.tar.gz: 1c81121425b454ce3803a650754a9ba2882a4254c4f414d98c30e707897965493ef1f2f3288fbcbae363af11259cefe8336ebc5cec94c0fdf6a6e1cf86d0d93a
+  metadata.gz: 7abfdab156080240b3a45165d5ad6a10e9277affbc941c02ffa45246697ba534d25dc1a557acabb7b5ed32925fd09c9530b5df6bec529f9a813c5641593b2059
+  data.tar.gz: e18319da1fcb8fe9cac23f5c71dfafff1d3047d52081d742c7a39c10cd4b194871652bd1f04aa876cb5ebf8e8e13775bce98900fff40eefeedbe23fcd8a44889

data/lib/rllama/context.rb CHANGED Viewed

@@ -12,12 +12,21 @@ module Rllama
       @ctx_params = Cpp.llama_context_default_params
-      @ctx_params[:n_ctx] = @n_ctx
-      @ctx_params[:n_batch] = @n_batch
+      @ctx_params[:n_ctx] = @n_ctx if @n_ctx
+      @ctx_params[:n_batch] = @n_batch if @n_batch
       if @embeddings
-        @ctx_params[:n_seq_max] = [@n_batch, @model.n_seq_max].min
+        seq_cap = @model.n_seq_max
+        if @n_batch&.positive? && seq_cap&.positive?
+          @ctx_params[:n_seq_max] = [@n_batch, seq_cap].min
+        elsif seq_cap&.positive?
+          @ctx_params[:n_seq_max] = seq_cap
+        end
         @ctx_params[:embeddings] = true
+        @ctx_params[:kv_unified] = true
+        @ctx_params[:n_ubatch] = @n_batch if @n_batch&.positive?
       end
       @pointer = Cpp.llama_init_from_model(model.pointer, @ctx_params)
@@ -141,19 +150,31 @@ module Rllama
     end
     alias message generate
-    def embed(strings, normalize: true, batch_size: 512)
-      is_array = strings.is_a?(Array)
+    def embed(strings_or_tokens, normalize: true, batch_size: 512)
+      is_tokens = strings_or_tokens.is_a?(Array) &&
+                  (strings_or_tokens[0].is_a?(Integer) ||
+                   (strings_or_tokens[0].is_a?(Array) && strings_or_tokens[0][0].is_a?(Integer)))
-      strings = Array(strings) unless is_array
+      input_is_array = is_tokens ? strings_or_tokens[0].is_a?(Array) : strings_or_tokens.is_a?(Array)
-      tokenized_strings = strings.map do |text|
-        max_tokens = text.bytesize + 2
-        tokens_ptr = FFI::MemoryPointer.new(:int32, max_tokens)
-        count = Cpp.llama_tokenize(@model.vocab, text, text.bytesize, tokens_ptr, max_tokens, true, false)
+      normalized_inputs = input_is_array ? strings_or_tokens : [strings_or_tokens]
+      tokenized_strings =
+        if is_tokens
+          input_is_array ? strings_or_tokens : [strings_or_tokens]
+        else
+          normalized_inputs.map { |text| @model.tokenize(text) }
+        end
-        raise Error, "Failed to tokenize text: '#{text}'" if count.negative?
+      max_tokens_in_prompt = tokenized_strings.map(&:length).max || 0
-        tokens_ptr.read_array_of_int32(count)
+      if max_tokens_in_prompt > batch_size
+        raise Error, "batch_size (#{batch_size}) is smaller than the longest prompt (#{max_tokens_in_prompt} tokens)."
+      end
+      if max_tokens_in_prompt > @n_batch
+        raise Error, "Context n_batch (#{@n_batch}) is smaller than the longest " \
+                     "prompt (#{max_tokens_in_prompt} tokens). Increase batch_size when calling embed."
       end
       all_embeddings = []
@@ -166,6 +187,9 @@ module Rllama
         batch[:n_tokens] = current_batch_token_count
+        memory_ptr = Cpp.llama_get_memory(@pointer)
+        Cpp.llama_memory_clear(memory_ptr, true) unless memory_ptr.null?
         raise Error, 'llama_decode failed' unless Cpp.llama_decode(@pointer, batch).zero?
         prompts_in_batch.each do |seq_id_in_batch|
@@ -179,7 +203,8 @@ module Rllama
         end
         current_batch_token_count = 0
-        prompts_in_batch = []
+        prompts_in_batch.clear
       end
       tokenized_strings.each do |tokens|
@@ -207,7 +232,7 @@ module Rllama
       Cpp.llama_batch_free(batch)
-      is_array ? all_embeddings : all_embeddings[0]
+      input_is_array ? all_embeddings : all_embeddings[0]
     end
     def embeddings?

data/lib/rllama/loader.rb CHANGED Viewed

@@ -62,6 +62,8 @@ module Rllama
       local_path = File.join(dir, org, repo, file_path)
+      return local_path if File.exist?(local_path)
       puts "Destination: #{local_path}"
       download_file(url, local_path, "HuggingFace model: #{hf_path}")
@@ -74,6 +76,8 @@ module Rllama
       local_path = File.join(dir, filename)
+      return local_path if File.exist?(local_path)
       puts "Destination: #{local_path}"
       download_file(url, local_path, "URL: #{url}")
@@ -82,8 +86,6 @@ module Rllama
     def download_file(url, local_path, description)
       FileUtils.mkdir_p(File.dirname(local_path))
-      return local_path if File.exist?(local_path)
       temp_path = File.join(File.dirname(local_path), "~#{File.basename(local_path)}")
       existing_size = File.exist?(temp_path) ? File.size(temp_path) : 0

data/lib/rllama/model.rb CHANGED Viewed

@@ -47,11 +47,32 @@ module Rllama
     alias message generate
     def embed(prompt, normalize: true, batch_size: 512, &block)
-      init_embedding_context do |ctx|
-        ctx.embed(prompt, normalize:, batch_size:, &block)
+      inputs = prompt.is_a?(Array) ? prompt : [prompt]
+      tokenized_inputs = inputs.map { |text| tokenize(text, max_tokens: n_ctx_train) }
+      max_token_length = tokenized_inputs.map(&:length).max || 0
+      effective_batch_size = [batch_size, max_token_length].max
+      effective_ctx = [n_ctx_train, max_token_length].min
+      init_embedding_context(n_ctx: effective_ctx, n_batch: effective_batch_size) do |ctx|
+        inputs = prompt.is_a?(Array) ? tokenized_inputs : tokenized_inputs[0]
+        ctx.embed(inputs, normalize:, batch_size: effective_batch_size, &block)
       end
     end
+    def tokenize(text, max_tokens: nil)
+      size = text.bytesize + 2
+      tokens_ptr = FFI::MemoryPointer.new(:int32, size)
+      count = Cpp.llama_tokenize(vocab, text, text.bytesize, tokens_ptr, size, true, false)
+      raise Error, "Failed to tokenize text: '#{text}'" if count.negative?
+      tokens_ptr.read_array_of_int32([count, max_tokens].compact.min)
+    end
     def close
       Cpp.llama_model_free(@pointer)
     end
@@ -70,7 +91,7 @@ module Rllama
       context
     end
-    def init_embedding_context(n_ctx: 2048, n_batch: 512, &)
+    def init_embedding_context(n_ctx: n_ctx_train, n_batch: 512, &)
       init_context(embeddings: true, n_ctx:, n_batch:, &)
     end

data/lib/rllama/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Rllama
-  VERSION = '1.0.0'
+  VERSION = '1.0.1'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rllama
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.1
 platform: arm64-darwin
 authors:
 - Pete Matsyburka