RubyGems - llama_cpp - Versions diffs - 0.3.5 → 0.3.6 - Mend

llama_cpp 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +18 -2
data/ext/llama_cpp/extconf.rb +1 -1
data/ext/llama_cpp/llama_cpp.cpp +22 -8
data/ext/llama_cpp/src/ggml-alloc.c +541 -0
data/ext/llama_cpp/src/ggml-alloc.h +22 -0
data/ext/llama_cpp/src/ggml-cuda.cu +2090 -438
data/ext/llama_cpp/src/ggml-cuda.h +1 -0
data/ext/llama_cpp/src/ggml-metal.m +17 -16
data/ext/llama_cpp/src/ggml-metal.metal +4 -1
data/ext/llama_cpp/src/ggml.c +49 -26
data/ext/llama_cpp/src/ggml.h +12 -1
data/ext/llama_cpp/src/k_quants.c +32 -30
data/ext/llama_cpp/src/llama.cpp +199 -68
data/ext/llama_cpp/src/llama.h +1 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +2 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 991df3df6b16ec98a203a6c6565794988eec04697ccb963faab976f436e1bfcc
-  data.tar.gz: dafd3b8274640eb79353e11056f497a02392fef332a46dcc1717878c836f62bd
+  metadata.gz: 545786d4c9308ffe0f7e214a12427beaea0b26bec915ff84b16eed25ef1932a4
+  data.tar.gz: aaa0d4fc1710b13a26163306c8b51e423233c2f7e4b3d6127f94c9b6c4846f9c
 SHA512:
-  metadata.gz: 0a54fdf18c5be5273f01d4d991b59975a8e8b6a0a8f54087fb90df3f1a8a7ebad557d01fb119f3d61cafa8f6c59fb81641624779fda27b732eea2868cb4642e8
-  data.tar.gz: 6574064078070502e36ad933bd5efb2f479c94f47a1260286fe485e48d35a7b3985274943c6163189326891349b47b1d9815d77c600b015fe111cc3842179392
+  metadata.gz: 12b3ac122fd7ea59b51e2d6ff905ed1a71cf8a8b3650a269d4a3793ae32a0149f6836a792c8f216d0fdb0c39aeb3b47914e73ffc74b574bbe686660e6be84ea1
+  data.tar.gz: 5056b95552f3434692a6c19653810d77bb28ddf9b28abd78712ccfb4ee4f7d836a5d54e283513fcfc617cc79ffa7bb9257d4ac2b6d96ec89158bf94acd4cec86

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,8 @@
+## [[0.3.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.5...v0.3.6)] - 2023-08-04
+- Bump bundled llama.cpp from master-1a94186 to master-468ea24.
+  - Add `mul_mat_q` option to ContextParams.
 ## [[0.3.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.4...v0.3.5)] - 2023-07-29
 - Bump bundled llama.cpp from master-d924522 to master-1a94186.

data/README.md CHANGED Viewed

@@ -12,11 +12,27 @@ This gem is still under development and may undergo many changes in the future.
 Install the gem and add to the application's Gemfile by executing:
-    $ bundle add llama_cpp
+```sh
+$ bundle add llama_cpp
+```
 If bundler is not being used to manage dependencies, install the gem by executing:
-    $ gem install llama_cpp
+```sh
+$ gem install llama_cpp
+```
+There are several installation options for improving execution performance:
+```sh
+# use OpenBLAS
+$ gem install llama_cpp -- --with-openblas
+# use Metal on macOS
+$ gem install llama_cpp -- --with-metal
+```
+Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
 ## Usage

data/ext/llama_cpp/extconf.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'fileutils'
 abort 'libstdc++ is not found.' unless have_library('stdc++')
-$srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
+$srcs = %w[ggml.c ggml-alloc.c llama.cpp llama_cpp.cpp]
 $srcs << 'ggml-opencl.cpp' if with_config('clblast')
 $srcs << 'ggml-mpi.c' if with_config('mpi')
 $CFLAGS << ' -w -DNDEBUG'

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -414,6 +414,8 @@ public:
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
     rb_define_method(rb_cLLaMAContextParams, "low_vram=", RUBY_METHOD_FUNC(_llama_context_params_set_low_vram), 1);
     rb_define_method(rb_cLLaMAContextParams, "low_vram", RUBY_METHOD_FUNC(_llama_context_params_get_low_vram), 0);
+    rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
+    rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
     rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
     rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
     rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
@@ -527,7 +529,7 @@ private:
   // low_vram
   static VALUE _llama_context_params_set_low_vram(VALUE self, VALUE low_vram) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
-    ptr->params.low_vram = low_vram == Qtrue ? true : false;
+    ptr->params.low_vram = RTEST(low_vram) ? true : false;
     return ptr->params.low_vram ? Qtrue : Qfalse;
   }
@@ -536,6 +538,18 @@ private:
     return ptr->params.low_vram ? Qtrue : Qfalse;
   }
+  // mul_mat_q
+  static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
+    return ptr->params.mul_mat_q ? Qtrue : Qfalse;
+  }
+  static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return ptr->params.mul_mat_q ? Qtrue : Qfalse;
+  }
   // seed
   static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -555,7 +569,7 @@ private:
   // f16_kv
   static VALUE _llama_context_params_set_f16_kv(VALUE self, VALUE f16_kv) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
-    ptr->params.f16_kv = f16_kv == Qtrue ? true : false;
+    ptr->params.f16_kv = RTEST(f16_kv) ? true : false;
     return ptr->params.f16_kv ? Qtrue : Qfalse;
   }
@@ -567,7 +581,7 @@ private:
   // logits_all
   static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
-    ptr->params.logits_all = logits_all == Qtrue ? true : false;
+    ptr->params.logits_all = RTEST(logits_all) ? true : false;
     return ptr->params.logits_all ? Qtrue : Qfalse;
   }
@@ -579,7 +593,7 @@ private:
   // vocab_only
   static VALUE _llama_context_params_set_vocab_only(VALUE self, VALUE vocab_only) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
-    ptr->params.vocab_only = vocab_only == Qtrue ? true : false;
+    ptr->params.vocab_only = RTEST(vocab_only) ? true : false;
     return ptr->params.vocab_only ? Qtrue : Qfalse;
   }
@@ -591,7 +605,7 @@ private:
   // use_mmap
   static VALUE _llama_context_params_set_use_mmap(VALUE self, VALUE use_mmap) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
-    ptr->params.use_mmap = use_mmap == Qtrue ? true : false;
+    ptr->params.use_mmap = RTEST(use_mmap) ? true : false;
     return ptr->params.use_mmap ? Qtrue : Qfalse;
   }
@@ -603,7 +617,7 @@ private:
   // use_mlock
   static VALUE _llama_context_params_set_use_mlock(VALUE self, VALUE use_mlock) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
-    ptr->params.use_mlock = use_mlock == Qtrue ? true : false;
+    ptr->params.use_mlock = RTEST(use_mlock) ? true : false;
     return ptr->params.use_mlock ? Qtrue : Qfalse;
   }
@@ -615,7 +629,7 @@ private:
   // embedding
   static VALUE _llama_context_params_set_embedding(VALUE self, VALUE embedding) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
-    ptr->params.embedding = embedding == Qtrue ? true : false;
+    ptr->params.embedding = RTEST(embedding) ? true : false;
     return ptr->params.embedding ? Qtrue : Qfalse;
   }
@@ -2408,7 +2422,7 @@ static VALUE rb_llama_llama_backend_init(int argc, VALUE* argv, VALUE self) {
   rb_scan_args(argc, argv, ":", &kw_args);
   rb_get_kwargs(kw_args, kw_table, 0, 1, kw_values);
-  const bool numa = kw_values[0] == Qundef ? false : (RTEST ? true : false);
+  const bool numa = kw_values[0] == Qundef ? false : (RTEST(kw_values[0]) ? true : false);
   llama_backend_init(numa);
   return Qnil;