llama_cpp 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd67587510fff74b8b1d55e2e5861711709dfb5d8c44cf40b3bf762276e57d5b
4
- data.tar.gz: 5cb5319136e538eb2ec9a6406caaaacdabdb2dceec5cade43769eda1b02de9c5
3
+ metadata.gz: 9e38c82f6ce7404a78b3ecdbc9574ae860322e6945499f0c4a905956bcbd2be7
4
+ data.tar.gz: 4a5effb6fcf3182baad091717bc510176eb127ccd660342ce0cc46bf2d392b4a
5
5
  SHA512:
6
- metadata.gz: c2ab28fe9bf5674976ff2e676ea4d76157bd2ebf24b92ca2f959a6cdf2c19de94fe95d76ab21ca313d9017f835387b0f9ad616cb3700024fc5394fa1e9984fda
7
- data.tar.gz: 0ce0be3db250eb7d35f3784bd7a3bd54e7ab8833378745417da3504f69bc31910d4fec459d29ad28218fce2614e8321462e9873c96ed1c3793eb5f9bbe5a9eac
6
+ metadata.gz: c471bd6c6afee142945d03da1c4908355fe900a5f0c259583b7b65f97d495d07c5397d1b551da888a5970170944596959ddef73d2df803acf001b8d079d0affb
7
+ data.tar.gz: 99cbb2d978723f9814d8ac7163f03c642a1ac6cabbd6cf09d003f563c629563a920d909ab797729f1e233f30d5776bf9f70f4c473919e5bf101d3e3f5fd6e938
data/CHANGELOG.md CHANGED
@@ -1,11 +1,19 @@
1
+ ## [[0.5.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.1...v0.5.2)] - 2023-09-16
2
+
3
+ - Bump bundled llama.cpp from b1198 to b1.
4
+ - Add `n_ctx_train` method to Model and Context.
5
+ - Add nvcc option to avoid link error ([#8](https://github.com/yoshoku/llama_cpp.rb/pull/8)).
6
+ - Set encoding on output of `generate` module function to avoid encoding error ([#9](https://github.com/yoshoku/llama_cpp.rb/pull/9)).
7
+ - Add `only_copy` option to ModelQuantizeParams.
8
+
1
9
  ## [[0.5.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.0...v0.5.1)] - 2023-09-08
2
10
 
3
- - Bump bundled llama.cpp from master-b1140 to master-b1198.
11
+ - Bump bundled llama.cpp from b1140 to b1198.
4
12
 
5
13
  ## [[0.5.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.4.0...v0.5.0)] - 2023-09-02
6
14
 
7
15
  **Breaking Changes**
8
- - Bump bundled llama.cpp from master-b1060 to master-b1140.
16
+ - Bump bundled llama.cpp from b1060 to b1140.
9
17
  - Rename `token_to_str` method on Context to `token_to_piece` method.
10
18
  - Rename `token_to_str` method on Model to `token_to_piece` method.
11
19
  - Rename `type` method on Model to `desc` method.
@@ -14,7 +22,7 @@
14
22
  ## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
15
23
 
16
24
  **Breaking Changes**
17
- - Bump bundled llama.cpp from master-097e121 to master-b1060.
25
+ - Bump bundled llama.cpp from master-097e121 to b1060.
18
26
  - Support new file format GGUF.
19
27
  - You should re-convert / re-quantize your model files.
20
28
  - Remove vocab methods.
@@ -1,5 +1,5 @@
1
1
  UserがTaroという名前のアシスタントと対話するダイアログのトランスクリプト。
2
- Taroは親切で、親切で、正直で、文章を書くのが上手で、ユーザーのリクエストに即座に正確に答えることを怠りません。
2
+ Taroは親切で、正直で、文章を書くのが上手で、ユーザーのリクエストに即座に正確に答えることを怠りません。
3
3
 
4
4
  User: こんにちには、Taro。
5
5
  Taro: こんにちは、今日はどのような要件ですか?
@@ -112,7 +112,7 @@ create_makefile('llama_cpp/llama_cpp')
112
112
  if with_config('cublas')
113
113
  File.open('Makefile', 'a') do |f|
114
114
  f.puts 'ggml-cuda.o: ggml-cuda.cu ggml-cuda.h'
115
- f.puts "\tnvcc -arch=native -c -o $@ $<"
115
+ f.puts "\tnvcc -shared -Xcompiler -fPIC -arch=native -c -o $@ $<"
116
116
  end
117
117
  end
118
118
 
@@ -692,6 +692,8 @@ public:
692
692
  rb_define_method(rb_cLLaMAModelQuantizeParams, "allow_requantize", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_allow_requantize), 0);
693
693
  rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_quantize_output_tensor), 1);
694
694
  rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
695
+ rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
696
+ rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
695
697
  }
696
698
 
697
699
  private:
@@ -752,6 +754,18 @@ private:
752
754
  LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
753
755
  return ptr->params.quantize_output_tensor ? Qtrue : Qfalse;
754
756
  }
757
+
758
+ // only_copy
759
+ static VALUE _llama_model_quantize_params_set_only_copy(VALUE self, VALUE only_copy) {
760
+ LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
761
+ ptr->params.only_copy = RTEST(only_copy) ? true : false;
762
+ return ptr->params.only_copy ? Qtrue : Qfalse;
763
+ }
764
+
765
+ static VALUE _llama_model_quantize_params_get_only_copy(VALUE self) {
766
+ LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
767
+ return ptr->params.only_copy ? Qtrue : Qfalse;
768
+ }
755
769
  };
756
770
 
757
771
  const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
@@ -810,6 +824,7 @@ public:
810
824
  rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
811
825
  rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
812
826
  rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
827
+ rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
813
828
  rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
814
829
  rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece_with_model), 1);
815
830
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
@@ -971,6 +986,11 @@ private:
971
986
  return INT2NUM(llama_model_n_ctx(ptr->model));
972
987
  }
973
988
 
989
+ static VALUE _llama_model_get_model_n_ctx_train(VALUE self) {
990
+ LLaMAModelWrapper* ptr = get_llama_model(self);
991
+ return INT2NUM(llama_model_n_ctx_train(ptr->model));
992
+ }
993
+
974
994
  static VALUE _llama_model_get_model_n_embd(VALUE self) {
975
995
  LLaMAModelWrapper* ptr = get_llama_model(self);
976
996
  return INT2NUM(llama_model_n_embd(ptr->model));
@@ -1341,6 +1361,7 @@ public:
1341
1361
  rb_define_method(rb_cLLaMAContext, "token_to_piece", RUBY_METHOD_FUNC(_llama_context_token_to_piece), 1);
1342
1362
  rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
1343
1363
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
1364
+ rb_define_method(rb_cLLaMAContext, "n_ctx_train", RUBY_METHOD_FUNC(_llama_context_n_ctx_train), 0);
1344
1365
  rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
1345
1366
  rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
1346
1367
  rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
@@ -1733,6 +1754,15 @@ private:
1733
1754
  return INT2NUM(llama_n_ctx(ptr->ctx));
1734
1755
  }
1735
1756
 
1757
+ static VALUE _llama_context_n_ctx_train(VALUE self) {
1758
+ LLaMAContextWrapper* ptr = get_llama_context(self);
1759
+ if (ptr->ctx == NULL) {
1760
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
1761
+ return Qnil;
1762
+ }
1763
+ return INT2NUM(llama_n_ctx_train(ptr->ctx));
1764
+ }
1765
+
1736
1766
  static VALUE _llama_context_n_embd(VALUE self) {
1737
1767
  LLaMAContextWrapper* ptr = get_llama_context(self);
1738
1768
  if (ptr->ctx == NULL) {
@@ -1,8 +1,3 @@
1
- // defines MAP_ANONYMOUS
2
- #ifndef _GNU_SOURCE
3
- #define _GNU_SOURCE
4
- #endif
5
-
6
1
  #include "ggml-alloc.h"
7
2
  #include "ggml.h"
8
3
  #include <assert.h>