llama_cpp 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -3
- data/examples/prompt_jp.txt +1 -1
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +30 -0
- data/ext/llama_cpp/src/ggml-alloc.c +0 -5
- data/ext/llama_cpp/src/ggml-cuda.cu +1011 -655
- data/ext/llama_cpp/src/ggml-metal.m +57 -15
- data/ext/llama_cpp/src/ggml-metal.metal +271 -137
- data/ext/llama_cpp/src/ggml.c +7 -3
- data/ext/llama_cpp/src/ggml.h +1 -1
- data/ext/llama_cpp/src/k_quants.c +4 -1
- data/ext/llama_cpp/src/llama.cpp +617 -141
- data/ext/llama_cpp/src/llama.h +8 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e38c82f6ce7404a78b3ecdbc9574ae860322e6945499f0c4a905956bcbd2be7
|
4
|
+
data.tar.gz: 4a5effb6fcf3182baad091717bc510176eb127ccd660342ce0cc46bf2d392b4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c471bd6c6afee142945d03da1c4908355fe900a5f0c259583b7b65f97d495d07c5397d1b551da888a5970170944596959ddef73d2df803acf001b8d079d0affb
|
7
|
+
data.tar.gz: 99cbb2d978723f9814d8ac7163f03c642a1ac6cabbd6cf09d003f563c629563a920d909ab797729f1e233f30d5776bf9f70f4c473919e5bf101d3e3f5fd6e938
|
data/CHANGELOG.md
CHANGED
@@ -1,11 +1,19 @@
|
|
1
|
+
## [[0.5.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.1...v0.5.2)] - 2023-09-16
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1198 to b1.
|
4
|
+
- Add `n_ctx_train` method to Model and Context.
|
5
|
+
- Add nvcc option to avoid link error ([#8](https://github.com/yoshoku/llama_cpp.rb/pull/8)).
|
6
|
+
- Set encoding on output of `generate` module function to avoid encoding error ([#9](https://github.com/yoshoku/llama_cpp.rb/pull/9)).
|
7
|
+
- Add `only_copy` option to ModelQuantizeParams.
|
8
|
+
|
1
9
|
## [[0.5.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.0...v0.5.1)] - 2023-09-08
|
2
10
|
|
3
|
-
- Bump bundled llama.cpp from
|
11
|
+
- Bump bundled llama.cpp from b1140 to b1198.
|
4
12
|
|
5
13
|
## [[0.5.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.4.0...v0.5.0)] - 2023-09-02
|
6
14
|
|
7
15
|
**Breaking Changes**
|
8
|
-
- Bump bundled llama.cpp from
|
16
|
+
- Bump bundled llama.cpp from b1060 to b1140.
|
9
17
|
- Rename `token_to_str` method on Context to `token_to_piece` method.
|
10
18
|
- Rename `token_to_str` method on Model to `token_to_piece` method.
|
11
19
|
- Rename `type` method on Model to `desc` method.
|
@@ -14,7 +22,7 @@
|
|
14
22
|
## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
|
15
23
|
|
16
24
|
**Breaking Changes**
|
17
|
-
- Bump bundled llama.cpp from master-097e121 to
|
25
|
+
- Bump bundled llama.cpp from master-097e121 to b1060.
|
18
26
|
- Support new file format GGUF.
|
19
27
|
- You should re-convert / re-quantize your model files.
|
20
28
|
- Remove vocab methods.
|
data/examples/prompt_jp.txt
CHANGED
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -112,7 +112,7 @@ create_makefile('llama_cpp/llama_cpp')
|
|
112
112
|
if with_config('cublas')
|
113
113
|
File.open('Makefile', 'a') do |f|
|
114
114
|
f.puts 'ggml-cuda.o: ggml-cuda.cu ggml-cuda.h'
|
115
|
-
f.puts "\tnvcc -arch=native -c -o $@ $<"
|
115
|
+
f.puts "\tnvcc -shared -Xcompiler -fPIC -arch=native -c -o $@ $<"
|
116
116
|
end
|
117
117
|
end
|
118
118
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -692,6 +692,8 @@ public:
|
|
692
692
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "allow_requantize", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_allow_requantize), 0);
|
693
693
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_quantize_output_tensor), 1);
|
694
694
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
|
695
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
|
696
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
|
695
697
|
}
|
696
698
|
|
697
699
|
private:
|
@@ -752,6 +754,18 @@ private:
|
|
752
754
|
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
753
755
|
return ptr->params.quantize_output_tensor ? Qtrue : Qfalse;
|
754
756
|
}
|
757
|
+
|
758
|
+
// only_copy
|
759
|
+
static VALUE _llama_model_quantize_params_set_only_copy(VALUE self, VALUE only_copy) {
|
760
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
761
|
+
ptr->params.only_copy = RTEST(only_copy) ? true : false;
|
762
|
+
return ptr->params.only_copy ? Qtrue : Qfalse;
|
763
|
+
}
|
764
|
+
|
765
|
+
static VALUE _llama_model_quantize_params_get_only_copy(VALUE self) {
|
766
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
767
|
+
return ptr->params.only_copy ? Qtrue : Qfalse;
|
768
|
+
}
|
755
769
|
};
|
756
770
|
|
757
771
|
const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
|
@@ -810,6 +824,7 @@ public:
|
|
810
824
|
rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
|
811
825
|
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
812
826
|
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
|
827
|
+
rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
|
813
828
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
814
829
|
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece_with_model), 1);
|
815
830
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
|
@@ -971,6 +986,11 @@ private:
|
|
971
986
|
return INT2NUM(llama_model_n_ctx(ptr->model));
|
972
987
|
}
|
973
988
|
|
989
|
+
static VALUE _llama_model_get_model_n_ctx_train(VALUE self) {
|
990
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
991
|
+
return INT2NUM(llama_model_n_ctx_train(ptr->model));
|
992
|
+
}
|
993
|
+
|
974
994
|
static VALUE _llama_model_get_model_n_embd(VALUE self) {
|
975
995
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
976
996
|
return INT2NUM(llama_model_n_embd(ptr->model));
|
@@ -1341,6 +1361,7 @@ public:
|
|
1341
1361
|
rb_define_method(rb_cLLaMAContext, "token_to_piece", RUBY_METHOD_FUNC(_llama_context_token_to_piece), 1);
|
1342
1362
|
rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
|
1343
1363
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
1364
|
+
rb_define_method(rb_cLLaMAContext, "n_ctx_train", RUBY_METHOD_FUNC(_llama_context_n_ctx_train), 0);
|
1344
1365
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
1345
1366
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
1346
1367
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
@@ -1733,6 +1754,15 @@ private:
|
|
1733
1754
|
return INT2NUM(llama_n_ctx(ptr->ctx));
|
1734
1755
|
}
|
1735
1756
|
|
1757
|
+
static VALUE _llama_context_n_ctx_train(VALUE self) {
|
1758
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1759
|
+
if (ptr->ctx == NULL) {
|
1760
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1761
|
+
return Qnil;
|
1762
|
+
}
|
1763
|
+
return INT2NUM(llama_n_ctx_train(ptr->ctx));
|
1764
|
+
}
|
1765
|
+
|
1736
1766
|
static VALUE _llama_context_n_embd(VALUE self) {
|
1737
1767
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1738
1768
|
if (ptr->ctx == NULL) {
|