RubyGems - llama_cpp - Versions diffs - 0.11.1 → 0.12.0 - Mend

llama_cpp 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 705f8a7e2228a324f14fa23ca093e2ce4408eacb839d891284c40e319b07940a
-  data.tar.gz: fc04d232f2c7ecfa1402aa711eda63e36a03d287a6bc7c8e0d2c791194ad9e9a
+  metadata.gz: 87010edca1b352ae7bdd3a693451893b13dd75e9e109f9e2b42f6164cc186b08
+  data.tar.gz: ff34254b6377698903dcf771663b91c3c804111228888d96e91363bd0f29d3a6
 SHA512:
-  metadata.gz: 888ba5bfa23ab51746d49c2cc071f8a220d3de39f6c3a34576f35bcb993fc0be841481dcbca9762504397ca6555571f43b4ba2c0ae3dae3fcd8d29bd2735ae16
-  data.tar.gz: e8940f8ab7a542569a71ad5c869ac781b8ad958ca02d2a5547b792008c228ae1e9ff23cc5b9552e83b0c631805ec7edfea6138c5ae9d922daca06e5ab3f1490d
+  metadata.gz: a23aa59fa4936940b28942398bfe98bdb09574162943ebaff31cdbda19394c7690f6c780f49da31eecc4b77427718a8b7ee58e62b2adb087100e1eee66310abc
+  data.tar.gz: 5cc105e69fc81d4616d93cd036af70f809be0c99b9155a6d3e386c9900ca012123353c23417ce56a5a64a1d805108b35de2d9feb5a6265c110d9341e5a2e242b

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,12 @@
+## [[0.12.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.1...v0.12.0)] - 2024-01-11
+- Add `get_one` singleton method to `Batch`.
+**Breaking Changes**
+- Add deprecation warning to `eval`, `eval_embd`, and `sample_temperature` methods on `Context`.
+- Change to avoid using deprecated methods on `generate` method and example scripts.
 ## [[0.11.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.0...v0.11.1)] - 2024-01-08
 - Fix to set the values of `@n_tokens` and `@has_evaluated` instance variables in `decode` method of `Context`.

data/README.md CHANGED Viewed

@@ -22,14 +22,14 @@ If bundler is not being used to manage dependencies, install the gem by executin
 $ gem install llama_cpp
 ```
-There are several installation options for improving execution performance:
+There are several installation options:
 ```sh
 # use OpenBLAS
 $ gem install llama_cpp -- --with-openblas
-# use Metal on macOS
-$ gem install llama_cpp -- --with-metal
+# use CUDA
+$ gem install llama_cpp -- --with-cuda
 ```
 Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.

data/examples/chat.rb CHANGED Viewed

@@ -9,6 +9,7 @@
 require 'llama_cpp'
 require 'thor'
 require 'readline'
+require 'etc'
 class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
   default_command :main
@@ -30,12 +31,15 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
   option :typical_p, type: :numeric, desc: 'locally typical sampling, parameter p', default: 1.0
   option :temp, type: :numeric, desc: 'temperature', default: 0.8
   option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
+  option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
   def main # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
     mdl_params = LLaMACpp::ModelParams.new
     mdl_params.n_gpu_layers = options[:n_gpu_layers]
     model = LLaMACpp::Model.new(model_path: options[:model], params: mdl_params)
     ctx_params = LLaMACpp::ContextParams.new
     ctx_params.seed = options[:seed] if options[:seed] != -1
+    ctx_params.n_threads = options[:n_threads]
+    ctx_params.n_threads_batch = options[:n_threads]
     context = LLaMACpp::Context.new(model: model, params: ctx_params)
     antiprompt = options[:reverse_prompt] || 'User:'
@@ -70,7 +74,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
         0.step(embd.size - 1, options[:batch_size]) do |i|
           n_eval = [options[:batch_size], embd.size - i].min
-          context.eval(tokens: embd[i...i + n_eval], n_past: n_past)
+          context.decode(LLaMACpp::Batch.get_one(tokens: embd[i...(i + n_eval)], n_tokens: n_eval, pos_zero: n_past, seq_id: 0))
           n_past += n_eval
         end
       end
@@ -95,7 +99,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
         context.sample_tail_free(candidates, z: options[:tfs_z])
         context.sample_typical(candidates, prob: options[:typical_p])
         context.sample_top_p(candidates, prob: options[:top_p])
-        context.sample_temperature(candidates, temperature: options[:temp])
+        context.sample_temp(candidates, temp: options[:temp])
         id = context.sample_token(candidates)
         last_n_tokens.shift

data/examples/embedding.rb CHANGED Viewed

@@ -7,6 +7,7 @@
 require 'llama_cpp'
 require 'thor'
+require 'etc'
 class Embedding < Thor # rubocop:disable Style/Documentation
   default_command :main
@@ -15,6 +16,7 @@ class Embedding < Thor # rubocop:disable Style/Documentation
   option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
   option :prompt, type: :string, aliases: '-p', desc: 'prompt to generate embedding', required: true
   option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
+  option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
   def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
     mdl_params = LLaMACpp::ModelParams.new
     mdl_params.n_gpu_layers = options[:n_gpu_layers]
@@ -22,13 +24,15 @@ class Embedding < Thor # rubocop:disable Style/Documentation
     ctx_params = LLaMACpp::ContextParams.new
     ctx_params.embedding = true
     ctx_params.seed = options[:seed] if options[:seed] != -1
+    ctx_params.n_threads = options[:n_threads]
+    ctx_params.n_threads_batch = options[:n_threads]
     context = LLaMACpp::Context.new(model: model, params: ctx_params)
     embd_input = context.model.tokenize(text: options[:prompt], add_bos: true)
     return unless embd_input.size.positive?
-    context.eval(tokens: embd_input, n_past: 0)
+    context.decode(LLaMACpp::Batch.get_one(tokens: embd_input, n_tokens: embd_input.size, pos_zero: 0, seq_id: 0))
     context.embeddings.each { |val| print("#{val} ") }
     print("\n")

data/examples/simple.rb CHANGED Viewed

@@ -7,12 +7,14 @@
 require 'llama_cpp'
 require 'thor'
+require 'etc'
 class Simple < Thor # rubocop:disable Style/Documentation
   default_command :main
   desc 'main', 'Simple completion'
   option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
   option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
+  option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
   def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
     n_len = 32
     model_params = LLaMACpp::ModelParams.new
@@ -21,7 +23,8 @@ class Simple < Thor # rubocop:disable Style/Documentation
     context_params.seed = 1234
     context_params.n_ctx = 2048
     context_params.logits_all = true
-    context_params.n_threads = 4
+    context_params.n_threads = options[:n_threads]
+    context_params.n_threads_batch = options[:n_threads]
     context = LLaMACpp::Context.new(model: model, params: context_params)
     tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -51,6 +51,7 @@ public:
   static void define_class(VALUE outer) {
     rb_cLLaMABatch = rb_define_class_under(outer, "Batch", rb_cObject);
     rb_define_alloc_func(rb_cLLaMABatch, llama_batch_alloc);
+    rb_define_singleton_method(rb_cLLaMABatch, "get_one", RUBY_METHOD_FUNC(_llama_batch_get_one), -1);
     rb_define_method(rb_cLLaMABatch, "initialize", RUBY_METHOD_FUNC(_llama_batch_initialize), -1);
     rb_define_method(rb_cLLaMABatch, "n_tokens=", RUBY_METHOD_FUNC(_llama_batch_set_n_tokens), 1);
     rb_define_method(rb_cLLaMABatch, "n_tokens", RUBY_METHOD_FUNC(_llama_batch_get_n_tokens), 0);
@@ -75,6 +76,48 @@ public:
 private:
   static const rb_data_type_t llama_batch_type;
+  static VALUE _llama_batch_get_one(int argc, VALUE* argv, VALUE klass) {
+    VALUE kw_args = Qnil;
+    ID kw_table[4] = { rb_intern("tokens"), rb_intern("n_tokens"), rb_intern("pos_zero"), rb_intern("seq_id") };
+    VALUE kw_values[4] = { Qundef, Qundef, Qundef, Qundef };
+    rb_scan_args(argc, argv, ":", &kw_args);
+    rb_get_kwargs(kw_args, kw_table, 4, 0, kw_values);
+    if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
+      rb_raise(rb_eArgError, "tokens must be an array");
+      return Qnil;
+    }
+    if (!RB_INTEGER_TYPE_P(kw_values[1])) {
+      rb_raise(rb_eArgError, "n_tokens must be an integer");
+      return Qnil;
+    }
+    if (!RB_INTEGER_TYPE_P(kw_values[2])) {
+      rb_raise(rb_eArgError, "pos_zero must be an integer");
+      return Qnil;
+    }
+    if (!RB_INTEGER_TYPE_P(kw_values[3])) {
+      rb_raise(rb_eArgError, "seq_id must be an integer");
+      return Qnil;
+    }
+    const size_t sz_array = RARRAY_LEN(kw_values[0]);
+    const int32_t n_tokens = NUM2INT(kw_values[1]);
+    const llama_pos pos_zero = NUM2INT(kw_values[2]);
+    const llama_seq_id seq_id = NUM2INT(kw_values[3]);
+    LLaMABatchWrapper* ptr = (LLaMABatchWrapper*)ruby_xmalloc(sizeof(LLaMABatchWrapper));
+    new (ptr) LLaMABatchWrapper();
+    ptr->batch = llama_batch_get_one(nullptr, n_tokens, pos_zero, seq_id);
+    ptr->batch.token = (llama_token*)malloc(sizeof(llama_token) * sz_array);
+    for (size_t i = 0; i < sz_array; i++) {
+      VALUE el = rb_ary_entry(kw_values[0], i);
+      ptr->batch.token[i] = NUM2INT(el);
+    }
+    return TypedData_Wrap_Struct(klass, &llama_batch_type, ptr);
+  }
   static VALUE _llama_batch_initialize(int argc, VALUE* argv, VALUE self) {
     VALUE kw_args = Qnil;
     ID kw_table[3] = { rb_intern("max_n_token"), rb_intern("n_embd"), rb_intern("max_n_seq") };
@@ -2054,6 +2097,8 @@ private:
     rb_scan_args(argc, argv, ":", &kw_args);
     rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
+    rb_warn("eval is deprecated. Use decode instead.");
     if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
       rb_raise(rb_eArgError, "tokens must be an Array");
       return Qnil;
@@ -2104,6 +2149,8 @@ private:
     rb_scan_args(argc, argv, ":", &kw_args);
     rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
+    rb_warn("eval_embd is deprecated. Use decode instead.");
     if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
       rb_raise(rb_eArgError, "tokens must be an Array");
       return Qnil;
@@ -2794,6 +2841,8 @@ private:
     rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
     rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
+    rb_warn("sample_temperature is deprecated. Use sample_temp instead.");
     if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
       rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
       return Qnil;

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.11.1'
+  VERSION = '0.12.0'
   # The version of llama.cpp bundled with llama_cpp.rb.
   LLAMA_CPP_VERSION = 'b1768'

data/lib/llama_cpp.rb CHANGED Viewed

@@ -54,7 +54,7 @@ module LLaMACpp
           embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
         end
-        context.eval(tokens: embd, n_past: n_past)
+        context.decode(LLaMACpp::Batch.get_one(tokens: embd, n_tokens: embd.size, pos_zero: n_past, seq_id: 0))
       end
       n_past += embd.size
@@ -77,7 +77,7 @@ module LLaMACpp
         context.sample_tail_free(candidates, z: tfs_z)
         context.sample_typical(candidates, prob: typical_p)
         context.sample_top_p(candidates, prob: top_p)
-        context.sample_temperature(candidates, temperature: temperature)
+        context.sample_temp(candidates, temp: temperature)
         id = context.sample_token(candidates)
         last_n_tokens.shift

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -149,6 +149,7 @@ module LLaMACpp
   class Batch
     public
+    def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
     def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
     def n_tokens=: (Integer) -> Integer
     def n_tokens: () -> Integer

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.11.1
+  version: 0.12.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-01-08 00:00:00.000000000 Z
+date: 2024-01-11 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email: