llama_cpp 0.3.8 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +1 -1
- data/examples/chat.rb +4 -6
- data/ext/llama_cpp/extconf.rb +3 -3
- data/ext/llama_cpp/llama_cpp.cpp +129 -124
- data/ext/llama_cpp/src/ggml-alloc.c +90 -113
- data/ext/llama_cpp/src/ggml-alloc.h +1 -1
- data/ext/llama_cpp/src/ggml-cuda.cu +350 -77
- data/ext/llama_cpp/src/ggml-cuda.h +13 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +226 -121
- data/ext/llama_cpp/src/ggml-metal.metal +157 -35
- data/ext/llama_cpp/src/ggml.c +2724 -584
- data/ext/llama_cpp/src/ggml.h +282 -31
- data/ext/llama_cpp/src/k_quants.c +112 -56
- data/ext/llama_cpp/src/llama.cpp +4857 -2986
- data/ext/llama_cpp/src/llama.h +180 -126
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -2
- data/sig/llama_cpp.rbs +12 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 715eab98a76ed825d66da6e4fcc84154dca8eed76f6cf6625d210a1ffb702958
|
4
|
+
data.tar.gz: 3ceafc312354d245e485b664d71450cd9c27bcd89f5faec91af6cdf1221c251f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ebe959d9380c9d981156606fdd8a6bcea9b88914923e693b400cfcd605b8c216bdfdcc807c0e72a21fe5fc6d7d623118fc7246524d7f59acdb8bc0064d736bc
|
7
|
+
data.tar.gz: c6d428234d866c09d227b5c308a573e9721454ded3f7fdd36880706e7c47c72c67e6fed119c75d6898c6a1149cde853e5dbb59e3a390ef3d370aab4f0d6be548
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
## [[0.5.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.4.0...v0.5.0)] - 2023-09-02
|
2
|
+
|
3
|
+
**Breaking Changes**
|
4
|
+
- Bump bundled llama.cpp from master-b1060 to master-b1140.
|
5
|
+
- Rename `token_to_str` method on Context to `token_to_piece` method.
|
6
|
+
- Rename `token_to_str` method on Model to `token_to_piece` method.
|
7
|
+
- Rename `type` method on Model to `desc` method.
|
8
|
+
- Add `size` and `n_params` methods to Model.
|
9
|
+
|
10
|
+
## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
|
11
|
+
|
12
|
+
**Breaking Changes**
|
13
|
+
- Bump bundled llama.cpp from master-097e121 to master-b1060.
|
14
|
+
- Support new file format GGUF.
|
15
|
+
- You should re-convert / re-quantize your model files.
|
16
|
+
- Remove vocab methods.
|
17
|
+
- Move token_bos, token_eos, and token_nl methods to Context.
|
18
|
+
- Add text, score, and type methods to Context.
|
19
|
+
|
1
20
|
## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
|
2
21
|
|
3
22
|
- Bump bundled llama.cpp from master-9ca4abe to master-097e121.
|
data/README.md
CHANGED
@@ -51,7 +51,7 @@ $ git clone https://huggingface.co/openlm-research/open_llama_7b
|
|
51
51
|
$ cd ../
|
52
52
|
$ python3 convert.py models/open_llama_7b
|
53
53
|
$ make
|
54
|
-
$ ./quantize ./models/open_llama_7b/ggml-model-f16.
|
54
|
+
$ ./quantize ./models/open_llama_7b/ggml-model-f16.gguf ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
|
55
55
|
```
|
56
56
|
|
57
57
|
An example of Ruby code that generates sentences with the quantization model is as follows:
|
data/examples/chat.rb
CHANGED
@@ -49,8 +49,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
49
49
|
n_keep = options[:keep]
|
50
50
|
n_keep = embd_input.size if n_keep > embd_input.size
|
51
51
|
|
52
|
-
token_newline = context.tokenize(text: "\n", add_bos: false)
|
53
|
-
|
54
52
|
last_n_tokens = [0] * n_ctx
|
55
53
|
interactive = true
|
56
54
|
is_interacting = false
|
@@ -101,8 +99,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
101
99
|
last_n_tokens.shift
|
102
100
|
last_n_tokens.push(id)
|
103
101
|
|
104
|
-
if id ==
|
105
|
-
id =
|
102
|
+
if id == context.token_eos
|
103
|
+
id = context.token_nl
|
106
104
|
unless antiprompt.empty?
|
107
105
|
first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
|
108
106
|
embd_input.concat(first_antiprompt)
|
@@ -124,7 +122,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
124
122
|
|
125
123
|
if input_echo
|
126
124
|
output = []
|
127
|
-
embd.each { |token| output << context.
|
125
|
+
embd.each { |token| output << context.token_to_piece(token) }
|
128
126
|
output_str = output.join
|
129
127
|
output_str.chomp!(antiprompt) if first_input
|
130
128
|
print(output_str)
|
@@ -133,7 +131,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
133
131
|
if embd_input.size <= n_consumed
|
134
132
|
if antiprompt.size.positive?
|
135
133
|
last_output = []
|
136
|
-
last_n_tokens.each { |token| last_output << context.
|
134
|
+
last_n_tokens.each { |token| last_output << context.token_to_piece(token) }
|
137
135
|
last_output_str = last_output.join
|
138
136
|
|
139
137
|
search_start_pos = last_output_str.size > antiprompt.size ? last_output_str.size - antiprompt.size : 0
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -50,10 +50,10 @@ if with_config('accelerate')
|
|
50
50
|
end
|
51
51
|
|
52
52
|
if with_config('metal')
|
53
|
-
$CFLAGS << ' -DGGML_USE_METAL
|
53
|
+
$CFLAGS << ' -DGGML_USE_METAL'
|
54
54
|
$CXXFLAGS << ' -DGGML_USE_METAL'
|
55
55
|
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
56
|
-
$objs = %w[ggml.o ggml-alloc.o
|
56
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
|
57
57
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
58
58
|
end
|
59
59
|
|
@@ -61,7 +61,7 @@ if with_config('cublas')
|
|
61
61
|
$CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
62
62
|
$CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
63
63
|
$LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
|
64
|
-
$objs = %w[ggml-
|
64
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
|
65
65
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
66
66
|
end
|
67
67
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -808,13 +808,14 @@ public:
|
|
808
808
|
rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
|
809
809
|
rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
|
810
810
|
rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
|
811
|
-
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(
|
812
|
-
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(
|
813
|
-
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(
|
814
|
-
rb_define_method(rb_cLLaMAModel, "
|
815
|
-
rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
|
811
|
+
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
812
|
+
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
|
813
|
+
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
814
|
+
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece_with_model), 1);
|
816
815
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
|
817
|
-
rb_define_method(rb_cLLaMAModel, "
|
816
|
+
rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
|
817
|
+
rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
|
818
|
+
rb_define_method(rb_cLLaMAModel, "n_params", RUBY_METHOD_FUNC(_llama_model_get_model_n_params), 0);
|
818
819
|
}
|
819
820
|
|
820
821
|
private:
|
@@ -960,64 +961,42 @@ private:
|
|
960
961
|
return Qnil;
|
961
962
|
}
|
962
963
|
|
963
|
-
static VALUE
|
964
|
+
static VALUE _llama_model_get_model_n_vocab(VALUE self) {
|
964
965
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
965
|
-
return INT2NUM(
|
966
|
+
return INT2NUM(llama_model_n_vocab(ptr->model));
|
966
967
|
}
|
967
968
|
|
968
|
-
static VALUE
|
969
|
+
static VALUE _llama_model_get_model_n_ctx(VALUE self) {
|
969
970
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
970
|
-
return INT2NUM(
|
971
|
+
return INT2NUM(llama_model_n_ctx(ptr->model));
|
971
972
|
}
|
972
973
|
|
973
|
-
static VALUE
|
974
|
+
static VALUE _llama_model_get_model_n_embd(VALUE self) {
|
974
975
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
975
|
-
return INT2NUM(
|
976
|
+
return INT2NUM(llama_model_n_embd(ptr->model));
|
976
977
|
}
|
977
978
|
|
978
|
-
static VALUE
|
979
|
-
VALUE kw_args = Qnil;
|
980
|
-
ID kw_table[1] = { rb_intern("capacity") };
|
981
|
-
VALUE kw_values[1] = { Qundef };
|
982
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
983
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
984
|
-
|
985
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
986
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
987
|
-
return Qnil;
|
988
|
-
}
|
989
|
-
|
990
|
-
const int capacity = NUM2INT(kw_values[0]);
|
991
|
-
|
992
|
-
LLaMAModelWrapper* ptr = get_llama_model(self);
|
993
|
-
const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
|
994
|
-
const char** vocabs = ALLOCA_N(const char*, n);
|
995
|
-
float* scores = ALLOCA_N(float, n);
|
996
|
-
|
997
|
-
llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
|
998
|
-
|
999
|
-
VALUE vocabs_ary = rb_ary_new();
|
1000
|
-
VALUE scores_ary = rb_ary_new();
|
1001
|
-
|
1002
|
-
for (int i = 0; i < n; i++) {
|
1003
|
-
rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
|
1004
|
-
rb_ary_push(scores_ary, DBL2NUM(scores[i]));
|
1005
|
-
}
|
1006
|
-
|
1007
|
-
VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
|
1008
|
-
|
1009
|
-
return ret;
|
1010
|
-
}
|
1011
|
-
|
1012
|
-
static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
|
979
|
+
static VALUE _llama_model_token_to_piece_with_model(VALUE self, VALUE token_) {
|
1013
980
|
if (!RB_INTEGER_TYPE_P(token_)) {
|
1014
981
|
rb_raise(rb_eArgError, "token must be an integer");
|
1015
982
|
return Qnil;
|
1016
983
|
}
|
1017
984
|
const llama_token token = NUM2INT(token_);
|
1018
985
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1019
|
-
|
1020
|
-
|
986
|
+
std::vector<char> result(8, 0);
|
987
|
+
const int n_tokens = llama_token_to_piece_with_model(ptr->model, token, result.data(), result.size());
|
988
|
+
if (n_tokens < 0) {
|
989
|
+
result.resize(-n_tokens);
|
990
|
+
const int check = llama_token_to_piece_with_model(ptr->model, token, result.data(), result.size());
|
991
|
+
if (check != -n_tokens) {
|
992
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
993
|
+
return Qnil;
|
994
|
+
}
|
995
|
+
} else {
|
996
|
+
result.resize(n_tokens);
|
997
|
+
}
|
998
|
+
std::string ret(result.data(), result.size());
|
999
|
+
return rb_str_new_cstr(ret.c_str());
|
1021
1000
|
}
|
1022
1001
|
|
1023
1002
|
static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
|
@@ -1063,12 +1042,22 @@ private:
|
|
1063
1042
|
return ret;
|
1064
1043
|
}
|
1065
1044
|
|
1066
|
-
static VALUE
|
1045
|
+
static VALUE _llama_model_get_model_desc(VALUE self) {
|
1067
1046
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1068
1047
|
char buf[128];
|
1069
|
-
|
1048
|
+
llama_model_desc(ptr->model, buf, sizeof(buf));
|
1070
1049
|
return rb_str_new_cstr(buf);
|
1071
1050
|
}
|
1051
|
+
|
1052
|
+
static VALUE _llama_model_get_model_size(VALUE self) {
|
1053
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1054
|
+
return UINT2NUM(llama_model_size(ptr->model));
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
static VALUE _llama_model_get_model_n_params(VALUE self) {
|
1058
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1059
|
+
return UINT2NUM(llama_model_n_params(ptr->model));
|
1060
|
+
}
|
1072
1061
|
};
|
1073
1062
|
|
1074
1063
|
const rb_data_type_t RbLLaMAModel::llama_model_type = {
|
@@ -1343,8 +1332,13 @@ public:
|
|
1343
1332
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
1344
1333
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
1345
1334
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
1346
|
-
rb_define_method(rb_cLLaMAContext, "
|
1347
|
-
rb_define_method(rb_cLLaMAContext, "
|
1335
|
+
rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
|
1336
|
+
rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
|
1337
|
+
rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
|
1338
|
+
rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
|
1339
|
+
rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
|
1340
|
+
rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
|
1341
|
+
rb_define_method(rb_cLLaMAContext, "token_to_piece", RUBY_METHOD_FUNC(_llama_context_token_to_piece), 1);
|
1348
1342
|
rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
|
1349
1343
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
1350
1344
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
@@ -1585,15 +1579,27 @@ private:
|
|
1585
1579
|
return output;
|
1586
1580
|
}
|
1587
1581
|
|
1588
|
-
static VALUE
|
1582
|
+
static VALUE _llama_context_token_to_piece(VALUE self, VALUE token_) {
|
1589
1583
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1590
1584
|
if (ptr->ctx == NULL) {
|
1591
1585
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1592
1586
|
return Qnil;
|
1593
1587
|
}
|
1594
1588
|
const llama_token token = NUM2INT(token_);
|
1595
|
-
|
1596
|
-
|
1589
|
+
std::vector<char> result(8, 0);
|
1590
|
+
const int n_tokens = llama_token_to_piece(ptr->ctx, token, result.data(), result.size());
|
1591
|
+
if (n_tokens < 0) {
|
1592
|
+
result.resize(-n_tokens);
|
1593
|
+
const int check = llama_token_to_piece(ptr->ctx, token, result.data(), result.size());
|
1594
|
+
if (check != -n_tokens) {
|
1595
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
1596
|
+
return Qnil;
|
1597
|
+
}
|
1598
|
+
} else {
|
1599
|
+
result.resize(n_tokens);
|
1600
|
+
}
|
1601
|
+
std::string ret(result.data(), result.size());
|
1602
|
+
return rb_str_new_cstr(ret.c_str());
|
1597
1603
|
}
|
1598
1604
|
|
1599
1605
|
static VALUE _llama_context_logits(VALUE self) {
|
@@ -1649,41 +1655,64 @@ private:
|
|
1649
1655
|
return output;
|
1650
1656
|
}
|
1651
1657
|
|
1652
|
-
static VALUE
|
1653
|
-
|
1654
|
-
|
1655
|
-
|
1656
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
1657
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
1658
|
-
|
1659
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
1660
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
1658
|
+
static VALUE _llama_context_text(VALUE self, VALUE token_) {
|
1659
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1660
|
+
if (ptr->ctx == NULL) {
|
1661
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1661
1662
|
return Qnil;
|
1662
1663
|
}
|
1664
|
+
const llama_token token = NUM2INT(token_);
|
1665
|
+
const char* text = llama_token_get_text(ptr->ctx, token);
|
1666
|
+
return rb_str_new_cstr(text);
|
1667
|
+
}
|
1663
1668
|
|
1669
|
+
static VALUE _llama_context_score(VALUE self, VALUE token_) {
|
1664
1670
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1665
1671
|
if (ptr->ctx == NULL) {
|
1666
1672
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1667
1673
|
return Qnil;
|
1668
1674
|
}
|
1675
|
+
const llama_token token = NUM2INT(token_);
|
1676
|
+
const float score = llama_token_get_score(ptr->ctx, token);
|
1677
|
+
return DBL2NUM(score);
|
1678
|
+
}
|
1669
1679
|
|
1670
|
-
|
1671
|
-
|
1672
|
-
|
1673
|
-
|
1674
|
-
|
1675
|
-
|
1680
|
+
static VALUE _llama_context_type(VALUE self, VALUE token_) {
|
1681
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1682
|
+
if (ptr->ctx == NULL) {
|
1683
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1684
|
+
return Qnil;
|
1685
|
+
}
|
1686
|
+
const llama_token token = NUM2INT(token_);
|
1687
|
+
const int type = llama_token_get_type(ptr->ctx, token);
|
1688
|
+
return INT2NUM(type);
|
1689
|
+
}
|
1676
1690
|
|
1677
|
-
|
1691
|
+
static VALUE _llama_context_token_bos(VALUE self) {
|
1692
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1693
|
+
if (ptr->ctx == NULL) {
|
1694
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1695
|
+
return Qnil;
|
1696
|
+
}
|
1697
|
+
return INT2NUM(llama_token_bos(ptr->ctx));
|
1698
|
+
}
|
1678
1699
|
|
1679
|
-
|
1680
|
-
|
1681
|
-
|
1682
|
-
|
1683
|
-
|
1700
|
+
static VALUE _llama_context_token_eos(VALUE self) {
|
1701
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1702
|
+
if (ptr->ctx == NULL) {
|
1703
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1704
|
+
return Qnil;
|
1684
1705
|
}
|
1706
|
+
return INT2NUM(llama_token_eos(ptr->ctx));
|
1707
|
+
}
|
1685
1708
|
|
1686
|
-
|
1709
|
+
static VALUE _llama_context_token_nl(VALUE self) {
|
1710
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1711
|
+
if (ptr->ctx == NULL) {
|
1712
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1713
|
+
return Qnil;
|
1714
|
+
}
|
1715
|
+
return INT2NUM(llama_token_nl(ptr->ctx));
|
1687
1716
|
}
|
1688
1717
|
|
1689
1718
|
static VALUE _llama_context_n_vocab(VALUE self) {
|
@@ -2474,23 +2503,15 @@ static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
|
2474
2503
|
return Qnil;
|
2475
2504
|
}
|
2476
2505
|
|
2477
|
-
static VALUE rb_llama_token_bos(VALUE self) {
|
2478
|
-
return INT2NUM(llama_token_bos());
|
2479
|
-
}
|
2480
|
-
|
2481
|
-
static VALUE rb_llama_token_eos(VALUE self) {
|
2482
|
-
return INT2NUM(llama_token_eos());
|
2483
|
-
}
|
2484
|
-
|
2485
|
-
static VALUE rb_llama_token_nl(VALUE self) {
|
2486
|
-
return INT2NUM(llama_token_nl());
|
2487
|
-
}
|
2488
|
-
|
2489
2506
|
static VALUE rb_llama_print_system_info(VALUE self) {
|
2490
2507
|
const char* result = llama_print_system_info();
|
2491
2508
|
return rb_utf8_str_new_cstr(result);
|
2492
2509
|
}
|
2493
2510
|
|
2511
|
+
static VALUE rb_llama_time_us(VALUE self) {
|
2512
|
+
return LONG2NUM(llama_time_us());
|
2513
|
+
}
|
2514
|
+
|
2494
2515
|
static VALUE rb_llama_mmap_supported(VALUE self) {
|
2495
2516
|
return llama_mmap_supported() ? Qtrue : Qfalse;
|
2496
2517
|
}
|
@@ -2519,16 +2540,29 @@ extern "C" void Init_llama_cpp(void) {
|
|
2519
2540
|
rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
|
2520
2541
|
rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
|
2521
2542
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
2522
|
-
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
2523
|
-
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
2524
|
-
rb_define_module_function(rb_mLLaMACpp, "token_nl", rb_llama_token_nl, 0);
|
2525
2543
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
2544
|
+
rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
|
2526
2545
|
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
2527
2546
|
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
2528
2547
|
rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
|
2529
2548
|
|
2530
2549
|
rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
|
2531
2550
|
|
2551
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_ERROR", INT2NUM(LLAMA_LOG_LEVEL_ERROR));
|
2552
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_WARN", INT2NUM(LLAMA_LOG_LEVEL_WARN));
|
2553
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_INFO", INT2NUM(LLAMA_LOG_LEVEL_INFO));
|
2554
|
+
|
2555
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
2556
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
2557
|
+
|
2558
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
2559
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
2560
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
|
2561
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_CONTROL", INT2NUM(LLAMA_TOKEN_TYPE_CONTROL));
|
2562
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_USER_DEFINED", INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED));
|
2563
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNUSED", INT2NUM(LLAMA_TOKEN_TYPE_UNUSED));
|
2564
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_BYTE", INT2NUM(LLAMA_TOKEN_TYPE_BYTE));
|
2565
|
+
|
2532
2566
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_ALL_F32", INT2NUM(LLAMA_FTYPE_ALL_F32));
|
2533
2567
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_F16));
|
2534
2568
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
|
@@ -2547,6 +2581,8 @@ extern "C" void Init_llama_cpp(void) {
|
|
2547
2581
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
|
2548
2582
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
|
2549
2583
|
|
2584
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
2585
|
+
|
2550
2586
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
|
2551
2587
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
|
2552
2588
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_RULE_REF", INT2NUM(LLAMA_GRETYPE_RULE_REF));
|
@@ -2556,39 +2592,9 @@ extern "C" void Init_llama_cpp(void) {
|
|
2556
2592
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
2557
2593
|
|
2558
2594
|
std::stringstream ss_magic;
|
2559
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
2560
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
2561
|
-
|
2562
|
-
ss_magic.str("");
|
2563
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2564
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
2565
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
2566
|
-
|
2567
|
-
ss_magic.str("");
|
2568
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2569
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
2570
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
2571
|
-
|
2572
|
-
ss_magic.str("");
|
2573
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2574
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
2575
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
2576
|
-
|
2577
|
-
ss_magic.str("");
|
2578
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2579
2595
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
2580
2596
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
2581
2597
|
|
2582
|
-
ss_magic.str("");
|
2583
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2584
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
2585
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
2586
|
-
|
2587
|
-
ss_magic.str("");
|
2588
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2589
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
2590
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
2591
|
-
|
2592
2598
|
ss_magic.str("");
|
2593
2599
|
ss_magic.clear(std::stringstream::goodbit);
|
2594
2600
|
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
@@ -2599,6 +2605,5 @@ extern "C" void Init_llama_cpp(void) {
|
|
2599
2605
|
ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
|
2600
2606
|
rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
|
2601
2607
|
|
2602
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
2603
2608
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
2604
2609
|
}
|