RubyGems - llama_cpp - Versions diffs - 0.5.1 → 0.5.2 - Mend

llama_cpp 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -3
data/examples/prompt_jp.txt +1 -1
data/ext/llama_cpp/extconf.rb +1 -1
data/ext/llama_cpp/llama_cpp.cpp +30 -0
data/ext/llama_cpp/src/ggml-alloc.c +0 -5
data/ext/llama_cpp/src/ggml-cuda.cu +1011 -655
data/ext/llama_cpp/src/ggml-metal.m +57 -15
data/ext/llama_cpp/src/ggml-metal.metal +271 -137
data/ext/llama_cpp/src/ggml.c +7 -3
data/ext/llama_cpp/src/ggml.h +1 -1
data/ext/llama_cpp/src/k_quants.c +4 -1
data/ext/llama_cpp/src/llama.cpp +617 -141
data/ext/llama_cpp/src/llama.h +8 -6
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +1 -1
data/sig/llama_cpp.rbs +4 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fd67587510fff74b8b1d55e2e5861711709dfb5d8c44cf40b3bf762276e57d5b
-  data.tar.gz: 5cb5319136e538eb2ec9a6406caaaacdabdb2dceec5cade43769eda1b02de9c5
+  metadata.gz: 9e38c82f6ce7404a78b3ecdbc9574ae860322e6945499f0c4a905956bcbd2be7
+  data.tar.gz: 4a5effb6fcf3182baad091717bc510176eb127ccd660342ce0cc46bf2d392b4a
 SHA512:
-  metadata.gz: c2ab28fe9bf5674976ff2e676ea4d76157bd2ebf24b92ca2f959a6cdf2c19de94fe95d76ab21ca313d9017f835387b0f9ad616cb3700024fc5394fa1e9984fda
-  data.tar.gz: 0ce0be3db250eb7d35f3784bd7a3bd54e7ab8833378745417da3504f69bc31910d4fec459d29ad28218fce2614e8321462e9873c96ed1c3793eb5f9bbe5a9eac
+  metadata.gz: c471bd6c6afee142945d03da1c4908355fe900a5f0c259583b7b65f97d495d07c5397d1b551da888a5970170944596959ddef73d2df803acf001b8d079d0affb
+  data.tar.gz: 99cbb2d978723f9814d8ac7163f03c642a1ac6cabbd6cf09d003f563c629563a920d909ab797729f1e233f30d5776bf9f70f4c473919e5bf101d3e3f5fd6e938

data/CHANGELOG.md CHANGED Viewed

@@ -1,11 +1,19 @@
+## [[0.5.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.1...v0.5.2)] - 2023-09-16
+- Bump bundled llama.cpp from b1198 to b1.
+  - Add `n_ctx_train` method to Model and Context.
+- Add nvcc option to avoid link error ([#8](https://github.com/yoshoku/llama_cpp.rb/pull/8)).
+- Set encoding on output of `generate` module function to avoid encoding error ([#9](https://github.com/yoshoku/llama_cpp.rb/pull/9)).
+- Add `only_copy` option to ModelQuantizeParams.
 ## [[0.5.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.0...v0.5.1)] - 2023-09-08
-- Bump bundled llama.cpp from master-b1140 to master-b1198.
+- Bump bundled llama.cpp from b1140 to b1198.
 ## [[0.5.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.4.0...v0.5.0)] - 2023-09-02
 **Breaking Changes**
-- Bump bundled llama.cpp from master-b1060 to master-b1140.
+- Bump bundled llama.cpp from b1060 to b1140.
   - Rename `token_to_str` method on Context to `token_to_piece` method.
   - Rename `token_to_str` method on Model to `token_to_piece` method.
   - Rename `type` method on Model to `desc` method.
@@ -14,7 +22,7 @@
 ## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
 **Breaking Changes**
-- Bump bundled llama.cpp from master-097e121 to master-b1060.
+- Bump bundled llama.cpp from master-097e121 to b1060.
   - Support new file format GGUF.
     - You should re-convert / re-quantize your model files.
   - Remove vocab methods.

data/examples/prompt_jp.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 UserがTaroという名前のアシスタントと対話するダイアログのトランスクリプト。
-Taroは親切で、親切で、正直で、文章を書くのが上手で、ユーザーのリクエストに即座に正確に答えることを怠りません。
+Taroは親切で、正直で、文章を書くのが上手で、ユーザーのリクエストに即座に正確に答えることを怠りません。
 User: こんにちには、Taro。
 Taro: こんにちは、今日はどのような要件ですか？

data/ext/llama_cpp/extconf.rb CHANGED Viewed

@@ -112,7 +112,7 @@ create_makefile('llama_cpp/llama_cpp')
 if with_config('cublas')
   File.open('Makefile', 'a') do |f|
     f.puts 'ggml-cuda.o: ggml-cuda.cu ggml-cuda.h'
-    f.puts "\tnvcc -arch=native -c -o $@ $<"
+    f.puts "\tnvcc -shared -Xcompiler -fPIC -arch=native -c -o $@ $<"
   end
 end

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -692,6 +692,8 @@ public:
     rb_define_method(rb_cLLaMAModelQuantizeParams, "allow_requantize", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_allow_requantize), 0);
     rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_quantize_output_tensor), 1);
     rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
+    rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
+    rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
   }
 private:
@@ -752,6 +754,18 @@ private:
     LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
     return ptr->params.quantize_output_tensor ? Qtrue : Qfalse;
   }
+  // only_copy
+  static VALUE _llama_model_quantize_params_set_only_copy(VALUE self, VALUE only_copy) {
+    LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
+    ptr->params.only_copy = RTEST(only_copy) ? true : false;
+    return ptr->params.only_copy ? Qtrue : Qfalse;
+  }
+  static VALUE _llama_model_quantize_params_get_only_copy(VALUE self) {
+    LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
+    return ptr->params.only_copy ? Qtrue : Qfalse;
+  }
 };
 const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
@@ -810,6 +824,7 @@ public:
     rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
     rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
     rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
+    rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
     rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
     rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece_with_model), 1);
     rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
@@ -971,6 +986,11 @@ private:
     return INT2NUM(llama_model_n_ctx(ptr->model));
   }
+  static VALUE _llama_model_get_model_n_ctx_train(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_model_n_ctx_train(ptr->model));
+  }
   static VALUE _llama_model_get_model_n_embd(VALUE self) {
     LLaMAModelWrapper* ptr = get_llama_model(self);
     return INT2NUM(llama_model_n_embd(ptr->model));
@@ -1341,6 +1361,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "token_to_piece", RUBY_METHOD_FUNC(_llama_context_token_to_piece), 1);
     rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
     rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
+    rb_define_method(rb_cLLaMAContext, "n_ctx_train", RUBY_METHOD_FUNC(_llama_context_n_ctx_train), 0);
     rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
     rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
     rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
@@ -1733,6 +1754,15 @@ private:
     return INT2NUM(llama_n_ctx(ptr->ctx));
   }
+  static VALUE _llama_context_n_ctx_train(VALUE self) {
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
+      return Qnil;
+    }
+    return INT2NUM(llama_n_ctx_train(ptr->ctx));
+  }
   static VALUE _llama_context_n_embd(VALUE self) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {

data/ext/llama_cpp/src/ggml-alloc.c CHANGED Viewed

@@ -1,8 +1,3 @@
-// defines MAP_ANONYMOUS
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
 #include "ggml-alloc.h"
 #include "ggml.h"
 #include <assert.h>