llama_cpp 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -3
- data/examples/prompt_jp.txt +1 -1
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +30 -0
- data/ext/llama_cpp/src/ggml-alloc.c +0 -5
- data/ext/llama_cpp/src/ggml-cuda.cu +1011 -655
- data/ext/llama_cpp/src/ggml-metal.m +57 -15
- data/ext/llama_cpp/src/ggml-metal.metal +271 -137
- data/ext/llama_cpp/src/ggml.c +7 -3
- data/ext/llama_cpp/src/ggml.h +1 -1
- data/ext/llama_cpp/src/k_quants.c +4 -1
- data/ext/llama_cpp/src/llama.cpp +617 -141
- data/ext/llama_cpp/src/llama.h +8 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e38c82f6ce7404a78b3ecdbc9574ae860322e6945499f0c4a905956bcbd2be7
|
4
|
+
data.tar.gz: 4a5effb6fcf3182baad091717bc510176eb127ccd660342ce0cc46bf2d392b4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c471bd6c6afee142945d03da1c4908355fe900a5f0c259583b7b65f97d495d07c5397d1b551da888a5970170944596959ddef73d2df803acf001b8d079d0affb
|
7
|
+
data.tar.gz: 99cbb2d978723f9814d8ac7163f03c642a1ac6cabbd6cf09d003f563c629563a920d909ab797729f1e233f30d5776bf9f70f4c473919e5bf101d3e3f5fd6e938
|
data/CHANGELOG.md
CHANGED
@@ -1,11 +1,19 @@
|
|
1
|
+
## [[0.5.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.1...v0.5.2)] - 2023-09-16
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1198 to b1.
|
4
|
+
- Add `n_ctx_train` method to Model and Context.
|
5
|
+
- Add nvcc option to avoid link error ([#8](https://github.com/yoshoku/llama_cpp.rb/pull/8)).
|
6
|
+
- Set encoding on output of `generate` module function to avoid encoding error ([#9](https://github.com/yoshoku/llama_cpp.rb/pull/9)).
|
7
|
+
- Add `only_copy` option to ModelQuantizeParams.
|
8
|
+
|
1
9
|
## [[0.5.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.0...v0.5.1)] - 2023-09-08
|
2
10
|
|
3
|
-
- Bump bundled llama.cpp from
|
11
|
+
- Bump bundled llama.cpp from b1140 to b1198.
|
4
12
|
|
5
13
|
## [[0.5.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.4.0...v0.5.0)] - 2023-09-02
|
6
14
|
|
7
15
|
**Breaking Changes**
|
8
|
-
- Bump bundled llama.cpp from
|
16
|
+
- Bump bundled llama.cpp from b1060 to b1140.
|
9
17
|
- Rename `token_to_str` method on Context to `token_to_piece` method.
|
10
18
|
- Rename `token_to_str` method on Model to `token_to_piece` method.
|
11
19
|
- Rename `type` method on Model to `desc` method.
|
@@ -14,7 +22,7 @@
|
|
14
22
|
## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
|
15
23
|
|
16
24
|
**Breaking Changes**
|
17
|
-
- Bump bundled llama.cpp from master-097e121 to
|
25
|
+
- Bump bundled llama.cpp from master-097e121 to b1060.
|
18
26
|
- Support new file format GGUF.
|
19
27
|
- You should re-convert / re-quantize your model files.
|
20
28
|
- Remove vocab methods.
|
data/examples/prompt_jp.txt
CHANGED
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -112,7 +112,7 @@ create_makefile('llama_cpp/llama_cpp')
|
|
112
112
|
if with_config('cublas')
|
113
113
|
File.open('Makefile', 'a') do |f|
|
114
114
|
f.puts 'ggml-cuda.o: ggml-cuda.cu ggml-cuda.h'
|
115
|
-
f.puts "\tnvcc -arch=native -c -o $@ $<"
|
115
|
+
f.puts "\tnvcc -shared -Xcompiler -fPIC -arch=native -c -o $@ $<"
|
116
116
|
end
|
117
117
|
end
|
118
118
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -692,6 +692,8 @@ public:
|
|
692
692
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "allow_requantize", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_allow_requantize), 0);
|
693
693
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_quantize_output_tensor), 1);
|
694
694
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
|
695
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
|
696
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
|
695
697
|
}
|
696
698
|
|
697
699
|
private:
|
@@ -752,6 +754,18 @@ private:
|
|
752
754
|
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
753
755
|
return ptr->params.quantize_output_tensor ? Qtrue : Qfalse;
|
754
756
|
}
|
757
|
+
|
758
|
+
// only_copy
|
759
|
+
static VALUE _llama_model_quantize_params_set_only_copy(VALUE self, VALUE only_copy) {
|
760
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
761
|
+
ptr->params.only_copy = RTEST(only_copy) ? true : false;
|
762
|
+
return ptr->params.only_copy ? Qtrue : Qfalse;
|
763
|
+
}
|
764
|
+
|
765
|
+
static VALUE _llama_model_quantize_params_get_only_copy(VALUE self) {
|
766
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
767
|
+
return ptr->params.only_copy ? Qtrue : Qfalse;
|
768
|
+
}
|
755
769
|
};
|
756
770
|
|
757
771
|
const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
|
@@ -810,6 +824,7 @@ public:
|
|
810
824
|
rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
|
811
825
|
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
812
826
|
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
|
827
|
+
rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
|
813
828
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
814
829
|
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece_with_model), 1);
|
815
830
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
|
@@ -971,6 +986,11 @@ private:
|
|
971
986
|
return INT2NUM(llama_model_n_ctx(ptr->model));
|
972
987
|
}
|
973
988
|
|
989
|
+
static VALUE _llama_model_get_model_n_ctx_train(VALUE self) {
|
990
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
991
|
+
return INT2NUM(llama_model_n_ctx_train(ptr->model));
|
992
|
+
}
|
993
|
+
|
974
994
|
static VALUE _llama_model_get_model_n_embd(VALUE self) {
|
975
995
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
976
996
|
return INT2NUM(llama_model_n_embd(ptr->model));
|
@@ -1341,6 +1361,7 @@ public:
|
|
1341
1361
|
rb_define_method(rb_cLLaMAContext, "token_to_piece", RUBY_METHOD_FUNC(_llama_context_token_to_piece), 1);
|
1342
1362
|
rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
|
1343
1363
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
1364
|
+
rb_define_method(rb_cLLaMAContext, "n_ctx_train", RUBY_METHOD_FUNC(_llama_context_n_ctx_train), 0);
|
1344
1365
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
1345
1366
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
1346
1367
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
@@ -1733,6 +1754,15 @@ private:
|
|
1733
1754
|
return INT2NUM(llama_n_ctx(ptr->ctx));
|
1734
1755
|
}
|
1735
1756
|
|
1757
|
+
static VALUE _llama_context_n_ctx_train(VALUE self) {
|
1758
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1759
|
+
if (ptr->ctx == NULL) {
|
1760
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1761
|
+
return Qnil;
|
1762
|
+
}
|
1763
|
+
return INT2NUM(llama_n_ctx_train(ptr->ctx));
|
1764
|
+
}
|
1765
|
+
|
1736
1766
|
static VALUE _llama_context_n_embd(VALUE self) {
|
1737
1767
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1738
1768
|
if (ptr->ctx == NULL) {
|