llama_cpp 0.3.8 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +1 -1
- data/examples/chat.rb +4 -6
- data/ext/llama_cpp/extconf.rb +3 -3
- data/ext/llama_cpp/llama_cpp.cpp +129 -124
- data/ext/llama_cpp/src/ggml-alloc.c +90 -113
- data/ext/llama_cpp/src/ggml-alloc.h +1 -1
- data/ext/llama_cpp/src/ggml-cuda.cu +350 -77
- data/ext/llama_cpp/src/ggml-cuda.h +13 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +226 -121
- data/ext/llama_cpp/src/ggml-metal.metal +157 -35
- data/ext/llama_cpp/src/ggml.c +2724 -584
- data/ext/llama_cpp/src/ggml.h +282 -31
- data/ext/llama_cpp/src/k_quants.c +112 -56
- data/ext/llama_cpp/src/llama.cpp +4857 -2986
- data/ext/llama_cpp/src/llama.h +180 -126
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -2
- data/sig/llama_cpp.rbs +12 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 715eab98a76ed825d66da6e4fcc84154dca8eed76f6cf6625d210a1ffb702958
|
4
|
+
data.tar.gz: 3ceafc312354d245e485b664d71450cd9c27bcd89f5faec91af6cdf1221c251f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ebe959d9380c9d981156606fdd8a6bcea9b88914923e693b400cfcd605b8c216bdfdcc807c0e72a21fe5fc6d7d623118fc7246524d7f59acdb8bc0064d736bc
|
7
|
+
data.tar.gz: c6d428234d866c09d227b5c308a573e9721454ded3f7fdd36880706e7c47c72c67e6fed119c75d6898c6a1149cde853e5dbb59e3a390ef3d370aab4f0d6be548
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
## [[0.5.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.4.0...v0.5.0)] - 2023-09-02
|
2
|
+
|
3
|
+
**Breaking Changes**
|
4
|
+
- Bump bundled llama.cpp from master-b1060 to master-b1140.
|
5
|
+
- Rename `token_to_str` method on Context to `token_to_piece` method.
|
6
|
+
- Rename `token_to_str` method on Model to `token_to_piece` method.
|
7
|
+
- Rename `type` method on Model to `desc` method.
|
8
|
+
- Add `size` and `n_params` methods to Model.
|
9
|
+
|
10
|
+
## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
|
11
|
+
|
12
|
+
**Breaking Changes**
|
13
|
+
- Bump bundled llama.cpp from master-097e121 to master-b1060.
|
14
|
+
- Support new file format GGUF.
|
15
|
+
- You should re-convert / re-quantize your model files.
|
16
|
+
- Remove vocab methods.
|
17
|
+
- Move token_bos, token_eos, and token_nl methods to Context.
|
18
|
+
- Add text, score, and type methods to Context.
|
19
|
+
|
1
20
|
## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
|
2
21
|
|
3
22
|
- Bump bundled llama.cpp from master-9ca4abe to master-097e121.
|
data/README.md
CHANGED
@@ -51,7 +51,7 @@ $ git clone https://huggingface.co/openlm-research/open_llama_7b
|
|
51
51
|
$ cd ../
|
52
52
|
$ python3 convert.py models/open_llama_7b
|
53
53
|
$ make
|
54
|
-
$ ./quantize ./models/open_llama_7b/ggml-model-f16.
|
54
|
+
$ ./quantize ./models/open_llama_7b/ggml-model-f16.gguf ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
|
55
55
|
```
|
56
56
|
|
57
57
|
An example of Ruby code that generates sentences with the quantization model is as follows:
|
data/examples/chat.rb
CHANGED
@@ -49,8 +49,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
49
49
|
n_keep = options[:keep]
|
50
50
|
n_keep = embd_input.size if n_keep > embd_input.size
|
51
51
|
|
52
|
-
token_newline = context.tokenize(text: "\n", add_bos: false)
|
53
|
-
|
54
52
|
last_n_tokens = [0] * n_ctx
|
55
53
|
interactive = true
|
56
54
|
is_interacting = false
|
@@ -101,8 +99,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
101
99
|
last_n_tokens.shift
|
102
100
|
last_n_tokens.push(id)
|
103
101
|
|
104
|
-
if id ==
|
105
|
-
id =
|
102
|
+
if id == context.token_eos
|
103
|
+
id = context.token_nl
|
106
104
|
unless antiprompt.empty?
|
107
105
|
first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
|
108
106
|
embd_input.concat(first_antiprompt)
|
@@ -124,7 +122,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
124
122
|
|
125
123
|
if input_echo
|
126
124
|
output = []
|
127
|
-
embd.each { |token| output << context.
|
125
|
+
embd.each { |token| output << context.token_to_piece(token) }
|
128
126
|
output_str = output.join
|
129
127
|
output_str.chomp!(antiprompt) if first_input
|
130
128
|
print(output_str)
|
@@ -133,7 +131,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
133
131
|
if embd_input.size <= n_consumed
|
134
132
|
if antiprompt.size.positive?
|
135
133
|
last_output = []
|
136
|
-
last_n_tokens.each { |token| last_output << context.
|
134
|
+
last_n_tokens.each { |token| last_output << context.token_to_piece(token) }
|
137
135
|
last_output_str = last_output.join
|
138
136
|
|
139
137
|
search_start_pos = last_output_str.size > antiprompt.size ? last_output_str.size - antiprompt.size : 0
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -50,10 +50,10 @@ if with_config('accelerate')
|
|
50
50
|
end
|
51
51
|
|
52
52
|
if with_config('metal')
|
53
|
-
$CFLAGS << ' -DGGML_USE_METAL
|
53
|
+
$CFLAGS << ' -DGGML_USE_METAL'
|
54
54
|
$CXXFLAGS << ' -DGGML_USE_METAL'
|
55
55
|
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
56
|
-
$objs = %w[ggml.o ggml-alloc.o
|
56
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
|
57
57
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
58
58
|
end
|
59
59
|
|
@@ -61,7 +61,7 @@ if with_config('cublas')
|
|
61
61
|
$CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
62
62
|
$CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
63
63
|
$LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
|
64
|
-
$objs = %w[ggml-
|
64
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
|
65
65
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
66
66
|
end
|
67
67
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -808,13 +808,14 @@ public:
|
|
808
808
|
rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
|
809
809
|
rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
|
810
810
|
rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
|
811
|
-
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(
|
812
|
-
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(
|
813
|
-
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(
|
814
|
-
rb_define_method(rb_cLLaMAModel, "
|
815
|
-
rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
|
811
|
+
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
812
|
+
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
|
813
|
+
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
814
|
+
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece_with_model), 1);
|
816
815
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
|
817
|
-
rb_define_method(rb_cLLaMAModel, "
|
816
|
+
rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
|
817
|
+
rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
|
818
|
+
rb_define_method(rb_cLLaMAModel, "n_params", RUBY_METHOD_FUNC(_llama_model_get_model_n_params), 0);
|
818
819
|
}
|
819
820
|
|
820
821
|
private:
|
@@ -960,64 +961,42 @@ private:
|
|
960
961
|
return Qnil;
|
961
962
|
}
|
962
963
|
|
963
|
-
static VALUE
|
964
|
+
static VALUE _llama_model_get_model_n_vocab(VALUE self) {
|
964
965
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
965
|
-
return INT2NUM(
|
966
|
+
return INT2NUM(llama_model_n_vocab(ptr->model));
|
966
967
|
}
|
967
968
|
|
968
|
-
static VALUE
|
969
|
+
static VALUE _llama_model_get_model_n_ctx(VALUE self) {
|
969
970
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
970
|
-
return INT2NUM(
|
971
|
+
return INT2NUM(llama_model_n_ctx(ptr->model));
|
971
972
|
}
|
972
973
|
|
973
|
-
static VALUE
|
974
|
+
static VALUE _llama_model_get_model_n_embd(VALUE self) {
|
974
975
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
975
|
-
return INT2NUM(
|
976
|
+
return INT2NUM(llama_model_n_embd(ptr->model));
|
976
977
|
}
|
977
978
|
|
978
|
-
static VALUE
|
979
|
-
VALUE kw_args = Qnil;
|
980
|
-
ID kw_table[1] = { rb_intern("capacity") };
|
981
|
-
VALUE kw_values[1] = { Qundef };
|
982
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
983
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
984
|
-
|
985
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
986
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
987
|
-
return Qnil;
|
988
|
-
}
|
989
|
-
|
990
|
-
const int capacity = NUM2INT(kw_values[0]);
|
991
|
-
|
992
|
-
LLaMAModelWrapper* ptr = get_llama_model(self);
|
993
|
-
const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
|
994
|
-
const char** vocabs = ALLOCA_N(const char*, n);
|
995
|
-
float* scores = ALLOCA_N(float, n);
|
996
|
-
|
997
|
-
llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
|
998
|
-
|
999
|
-
VALUE vocabs_ary = rb_ary_new();
|
1000
|
-
VALUE scores_ary = rb_ary_new();
|
1001
|
-
|
1002
|
-
for (int i = 0; i < n; i++) {
|
1003
|
-
rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
|
1004
|
-
rb_ary_push(scores_ary, DBL2NUM(scores[i]));
|
1005
|
-
}
|
1006
|
-
|
1007
|
-
VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
|
1008
|
-
|
1009
|
-
return ret;
|
1010
|
-
}
|
1011
|
-
|
1012
|
-
static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
|
979
|
+
static VALUE _llama_model_token_to_piece_with_model(VALUE self, VALUE token_) {
|
1013
980
|
if (!RB_INTEGER_TYPE_P(token_)) {
|
1014
981
|
rb_raise(rb_eArgError, "token must be an integer");
|
1015
982
|
return Qnil;
|
1016
983
|
}
|
1017
984
|
const llama_token token = NUM2INT(token_);
|
1018
985
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1019
|
-
|
1020
|
-
|
986
|
+
std::vector<char> result(8, 0);
|
987
|
+
const int n_tokens = llama_token_to_piece_with_model(ptr->model, token, result.data(), result.size());
|
988
|
+
if (n_tokens < 0) {
|
989
|
+
result.resize(-n_tokens);
|
990
|
+
const int check = llama_token_to_piece_with_model(ptr->model, token, result.data(), result.size());
|
991
|
+
if (check != -n_tokens) {
|
992
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
993
|
+
return Qnil;
|
994
|
+
}
|
995
|
+
} else {
|
996
|
+
result.resize(n_tokens);
|
997
|
+
}
|
998
|
+
std::string ret(result.data(), result.size());
|
999
|
+
return rb_str_new_cstr(ret.c_str());
|
1021
1000
|
}
|
1022
1001
|
|
1023
1002
|
static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
|
@@ -1063,12 +1042,22 @@ private:
|
|
1063
1042
|
return ret;
|
1064
1043
|
}
|
1065
1044
|
|
1066
|
-
static VALUE
|
1045
|
+
static VALUE _llama_model_get_model_desc(VALUE self) {
|
1067
1046
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1068
1047
|
char buf[128];
|
1069
|
-
|
1048
|
+
llama_model_desc(ptr->model, buf, sizeof(buf));
|
1070
1049
|
return rb_str_new_cstr(buf);
|
1071
1050
|
}
|
1051
|
+
|
1052
|
+
static VALUE _llama_model_get_model_size(VALUE self) {
|
1053
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1054
|
+
return UINT2NUM(llama_model_size(ptr->model));
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
static VALUE _llama_model_get_model_n_params(VALUE self) {
|
1058
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1059
|
+
return UINT2NUM(llama_model_n_params(ptr->model));
|
1060
|
+
}
|
1072
1061
|
};
|
1073
1062
|
|
1074
1063
|
const rb_data_type_t RbLLaMAModel::llama_model_type = {
|
@@ -1343,8 +1332,13 @@ public:
|
|
1343
1332
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
1344
1333
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
1345
1334
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
1346
|
-
rb_define_method(rb_cLLaMAContext, "
|
1347
|
-
rb_define_method(rb_cLLaMAContext, "
|
1335
|
+
rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
|
1336
|
+
rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
|
1337
|
+
rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
|
1338
|
+
rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
|
1339
|
+
rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
|
1340
|
+
rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
|
1341
|
+
rb_define_method(rb_cLLaMAContext, "token_to_piece", RUBY_METHOD_FUNC(_llama_context_token_to_piece), 1);
|
1348
1342
|
rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
|
1349
1343
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
1350
1344
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
@@ -1585,15 +1579,27 @@ private:
|
|
1585
1579
|
return output;
|
1586
1580
|
}
|
1587
1581
|
|
1588
|
-
static VALUE
|
1582
|
+
static VALUE _llama_context_token_to_piece(VALUE self, VALUE token_) {
|
1589
1583
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1590
1584
|
if (ptr->ctx == NULL) {
|
1591
1585
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1592
1586
|
return Qnil;
|
1593
1587
|
}
|
1594
1588
|
const llama_token token = NUM2INT(token_);
|
1595
|
-
|
1596
|
-
|
1589
|
+
std::vector<char> result(8, 0);
|
1590
|
+
const int n_tokens = llama_token_to_piece(ptr->ctx, token, result.data(), result.size());
|
1591
|
+
if (n_tokens < 0) {
|
1592
|
+
result.resize(-n_tokens);
|
1593
|
+
const int check = llama_token_to_piece(ptr->ctx, token, result.data(), result.size());
|
1594
|
+
if (check != -n_tokens) {
|
1595
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
1596
|
+
return Qnil;
|
1597
|
+
}
|
1598
|
+
} else {
|
1599
|
+
result.resize(n_tokens);
|
1600
|
+
}
|
1601
|
+
std::string ret(result.data(), result.size());
|
1602
|
+
return rb_str_new_cstr(ret.c_str());
|
1597
1603
|
}
|
1598
1604
|
|
1599
1605
|
static VALUE _llama_context_logits(VALUE self) {
|
@@ -1649,41 +1655,64 @@ private:
|
|
1649
1655
|
return output;
|
1650
1656
|
}
|
1651
1657
|
|
1652
|
-
static VALUE
|
1653
|
-
|
1654
|
-
|
1655
|
-
|
1656
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
1657
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
1658
|
-
|
1659
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
1660
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
1658
|
+
static VALUE _llama_context_text(VALUE self, VALUE token_) {
|
1659
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1660
|
+
if (ptr->ctx == NULL) {
|
1661
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1661
1662
|
return Qnil;
|
1662
1663
|
}
|
1664
|
+
const llama_token token = NUM2INT(token_);
|
1665
|
+
const char* text = llama_token_get_text(ptr->ctx, token);
|
1666
|
+
return rb_str_new_cstr(text);
|
1667
|
+
}
|
1663
1668
|
|
1669
|
+
static VALUE _llama_context_score(VALUE self, VALUE token_) {
|
1664
1670
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1665
1671
|
if (ptr->ctx == NULL) {
|
1666
1672
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1667
1673
|
return Qnil;
|
1668
1674
|
}
|
1675
|
+
const llama_token token = NUM2INT(token_);
|
1676
|
+
const float score = llama_token_get_score(ptr->ctx, token);
|
1677
|
+
return DBL2NUM(score);
|
1678
|
+
}
|
1669
1679
|
|
1670
|
-
|
1671
|
-
|
1672
|
-
|
1673
|
-
|
1674
|
-
|
1675
|
-
|
1680
|
+
static VALUE _llama_context_type(VALUE self, VALUE token_) {
|
1681
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1682
|
+
if (ptr->ctx == NULL) {
|
1683
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1684
|
+
return Qnil;
|
1685
|
+
}
|
1686
|
+
const llama_token token = NUM2INT(token_);
|
1687
|
+
const int type = llama_token_get_type(ptr->ctx, token);
|
1688
|
+
return INT2NUM(type);
|
1689
|
+
}
|
1676
1690
|
|
1677
|
-
|
1691
|
+
static VALUE _llama_context_token_bos(VALUE self) {
|
1692
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1693
|
+
if (ptr->ctx == NULL) {
|
1694
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1695
|
+
return Qnil;
|
1696
|
+
}
|
1697
|
+
return INT2NUM(llama_token_bos(ptr->ctx));
|
1698
|
+
}
|
1678
1699
|
|
1679
|
-
|
1680
|
-
|
1681
|
-
|
1682
|
-
|
1683
|
-
|
1700
|
+
static VALUE _llama_context_token_eos(VALUE self) {
|
1701
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1702
|
+
if (ptr->ctx == NULL) {
|
1703
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1704
|
+
return Qnil;
|
1684
1705
|
}
|
1706
|
+
return INT2NUM(llama_token_eos(ptr->ctx));
|
1707
|
+
}
|
1685
1708
|
|
1686
|
-
|
1709
|
+
static VALUE _llama_context_token_nl(VALUE self) {
|
1710
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1711
|
+
if (ptr->ctx == NULL) {
|
1712
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1713
|
+
return Qnil;
|
1714
|
+
}
|
1715
|
+
return INT2NUM(llama_token_nl(ptr->ctx));
|
1687
1716
|
}
|
1688
1717
|
|
1689
1718
|
static VALUE _llama_context_n_vocab(VALUE self) {
|
@@ -2474,23 +2503,15 @@ static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
|
2474
2503
|
return Qnil;
|
2475
2504
|
}
|
2476
2505
|
|
2477
|
-
static VALUE rb_llama_token_bos(VALUE self) {
|
2478
|
-
return INT2NUM(llama_token_bos());
|
2479
|
-
}
|
2480
|
-
|
2481
|
-
static VALUE rb_llama_token_eos(VALUE self) {
|
2482
|
-
return INT2NUM(llama_token_eos());
|
2483
|
-
}
|
2484
|
-
|
2485
|
-
static VALUE rb_llama_token_nl(VALUE self) {
|
2486
|
-
return INT2NUM(llama_token_nl());
|
2487
|
-
}
|
2488
|
-
|
2489
2506
|
static VALUE rb_llama_print_system_info(VALUE self) {
|
2490
2507
|
const char* result = llama_print_system_info();
|
2491
2508
|
return rb_utf8_str_new_cstr(result);
|
2492
2509
|
}
|
2493
2510
|
|
2511
|
+
static VALUE rb_llama_time_us(VALUE self) {
|
2512
|
+
return LONG2NUM(llama_time_us());
|
2513
|
+
}
|
2514
|
+
|
2494
2515
|
static VALUE rb_llama_mmap_supported(VALUE self) {
|
2495
2516
|
return llama_mmap_supported() ? Qtrue : Qfalse;
|
2496
2517
|
}
|
@@ -2519,16 +2540,29 @@ extern "C" void Init_llama_cpp(void) {
|
|
2519
2540
|
rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
|
2520
2541
|
rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
|
2521
2542
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
2522
|
-
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
2523
|
-
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
2524
|
-
rb_define_module_function(rb_mLLaMACpp, "token_nl", rb_llama_token_nl, 0);
|
2525
2543
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
2544
|
+
rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
|
2526
2545
|
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
2527
2546
|
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
2528
2547
|
rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
|
2529
2548
|
|
2530
2549
|
rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
|
2531
2550
|
|
2551
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_ERROR", INT2NUM(LLAMA_LOG_LEVEL_ERROR));
|
2552
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_WARN", INT2NUM(LLAMA_LOG_LEVEL_WARN));
|
2553
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_INFO", INT2NUM(LLAMA_LOG_LEVEL_INFO));
|
2554
|
+
|
2555
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
2556
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
2557
|
+
|
2558
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
2559
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
2560
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
|
2561
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_CONTROL", INT2NUM(LLAMA_TOKEN_TYPE_CONTROL));
|
2562
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_USER_DEFINED", INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED));
|
2563
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNUSED", INT2NUM(LLAMA_TOKEN_TYPE_UNUSED));
|
2564
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_BYTE", INT2NUM(LLAMA_TOKEN_TYPE_BYTE));
|
2565
|
+
|
2532
2566
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_ALL_F32", INT2NUM(LLAMA_FTYPE_ALL_F32));
|
2533
2567
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_F16));
|
2534
2568
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
|
@@ -2547,6 +2581,8 @@ extern "C" void Init_llama_cpp(void) {
|
|
2547
2581
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
|
2548
2582
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
|
2549
2583
|
|
2584
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
2585
|
+
|
2550
2586
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
|
2551
2587
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
|
2552
2588
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_RULE_REF", INT2NUM(LLAMA_GRETYPE_RULE_REF));
|
@@ -2556,39 +2592,9 @@ extern "C" void Init_llama_cpp(void) {
|
|
2556
2592
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
2557
2593
|
|
2558
2594
|
std::stringstream ss_magic;
|
2559
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
2560
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
2561
|
-
|
2562
|
-
ss_magic.str("");
|
2563
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2564
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
2565
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
2566
|
-
|
2567
|
-
ss_magic.str("");
|
2568
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2569
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
2570
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
2571
|
-
|
2572
|
-
ss_magic.str("");
|
2573
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2574
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
2575
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
2576
|
-
|
2577
|
-
ss_magic.str("");
|
2578
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2579
2595
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
2580
2596
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
2581
2597
|
|
2582
|
-
ss_magic.str("");
|
2583
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2584
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
2585
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
2586
|
-
|
2587
|
-
ss_magic.str("");
|
2588
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2589
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
2590
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
2591
|
-
|
2592
2598
|
ss_magic.str("");
|
2593
2599
|
ss_magic.clear(std::stringstream::goodbit);
|
2594
2600
|
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
@@ -2599,6 +2605,5 @@ extern "C" void Init_llama_cpp(void) {
|
|
2599
2605
|
ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
|
2600
2606
|
rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
|
2601
2607
|
|
2602
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
2603
2608
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
2604
2609
|
}
|