llama_cpp 0.3.7 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +1 -1
- data/examples/chat.rb +2 -4
- data/ext/llama_cpp/extconf.rb +3 -3
- data/ext/llama_cpp/llama_cpp.cpp +118 -117
- data/ext/llama_cpp/src/ggml-alloc.c +97 -53
- data/ext/llama_cpp/src/ggml-alloc.h +4 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1010 -497
- data/ext/llama_cpp/src/ggml-cuda.h +32 -23
- data/ext/llama_cpp/src/ggml-metal.h +9 -3
- data/ext/llama_cpp/src/ggml-metal.m +142 -161
- data/ext/llama_cpp/src/ggml-metal.metal +577 -500
- data/ext/llama_cpp/src/ggml.c +2064 -233
- data/ext/llama_cpp/src/ggml.h +238 -13
- data/ext/llama_cpp/src/k_quants.c +110 -54
- data/ext/llama_cpp/src/llama-util.h +10 -8
- data/ext/llama_cpp/src/llama.cpp +4544 -2890
- data/ext/llama_cpp/src/llama.h +133 -123
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +8 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af3a0e01bc9f3cfad4cee3f21144dd354640e1d4558125be36d4b499fa3b4c24
|
4
|
+
data.tar.gz: 042a3b0491d98fa6a093c684e6ab751152f37c8438a3b4a7b19cb2d8c7ab95a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ed85bd8438ee3b3adab884795c4aecb5b0d72ad57b7e02bc281b62c3b1d669efab62a020e03b09defe3084ecd8afacc4220303e99167d04d668650768c7392b
|
7
|
+
data.tar.gz: b705a0ccd2c7c1e15aed6383acb9d5a3d79d0a0c882a74c42b9099df9a27aff88ba08a2f06aa4d195382e8f41c1b16c0014a2047d1923369f275ca481d52bb21
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
|
2
|
+
|
3
|
+
**Breaking Changes**
|
4
|
+
- Bump bundled llama.cpp from master-097e121 to master-b1060.
|
5
|
+
- Support new file format GGUF.
|
6
|
+
- You should re-convert / re-quantize your model files.
|
7
|
+
- Remove vocab methods.
|
8
|
+
- Move token_bos, token_eos, and token_nl methods to Context.
|
9
|
+
- Add text, score, and type methods to Context.
|
10
|
+
|
11
|
+
## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
|
12
|
+
|
13
|
+
- Bump bundled llama.cpp from master-9ca4abe to master-097e121.
|
14
|
+
- Add `type` method to Model.
|
15
|
+
- Revert pull request #2592 in llama.cpp.
|
16
|
+
It seems that PWIN32_MEMORY_RANGE_ENTRY and WIN32_MEMORY_RANGE_ENTRY do not exist in mingw.
|
17
|
+
|
1
18
|
## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
|
2
19
|
|
3
20
|
- Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
|
data/README.md
CHANGED
@@ -51,7 +51,7 @@ $ git clone https://huggingface.co/openlm-research/open_llama_7b
|
|
51
51
|
$ cd ../
|
52
52
|
$ python3 convert.py models/open_llama_7b
|
53
53
|
$ make
|
54
|
-
$ ./quantize ./models/open_llama_7b/ggml-model-f16.
|
54
|
+
$ ./quantize ./models/open_llama_7b/ggml-model-f16.gguf ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
|
55
55
|
```
|
56
56
|
|
57
57
|
An example of Ruby code that generates sentences with the quantization model is as follows:
|
data/examples/chat.rb
CHANGED
@@ -49,8 +49,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
49
49
|
n_keep = options[:keep]
|
50
50
|
n_keep = embd_input.size if n_keep > embd_input.size
|
51
51
|
|
52
|
-
token_newline = context.tokenize(text: "\n", add_bos: false)
|
53
|
-
|
54
52
|
last_n_tokens = [0] * n_ctx
|
55
53
|
interactive = true
|
56
54
|
is_interacting = false
|
@@ -101,8 +99,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
101
99
|
last_n_tokens.shift
|
102
100
|
last_n_tokens.push(id)
|
103
101
|
|
104
|
-
if id ==
|
105
|
-
id =
|
102
|
+
if id == context.token_eos
|
103
|
+
id = context.token_nl
|
106
104
|
unless antiprompt.empty?
|
107
105
|
first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
|
108
106
|
embd_input.concat(first_antiprompt)
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -52,8 +52,8 @@ end
|
|
52
52
|
if with_config('metal')
|
53
53
|
$CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
|
54
54
|
$CXXFLAGS << ' -DGGML_USE_METAL'
|
55
|
-
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit
|
56
|
-
$objs = %w[ggml.o
|
55
|
+
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
56
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
|
57
57
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
58
58
|
end
|
59
59
|
|
@@ -61,7 +61,7 @@ if with_config('cublas')
|
|
61
61
|
$CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
62
62
|
$CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
63
63
|
$LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
|
64
|
-
$objs = %w[ggml-
|
64
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
|
65
65
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
66
66
|
end
|
67
67
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -808,12 +808,12 @@ public:
|
|
808
808
|
rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
|
809
809
|
rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
|
810
810
|
rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
|
811
|
-
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(
|
812
|
-
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(
|
813
|
-
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(
|
814
|
-
rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
|
811
|
+
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
812
|
+
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
|
813
|
+
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
815
814
|
rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
|
816
815
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
|
816
|
+
rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
|
817
817
|
}
|
818
818
|
|
819
819
|
private:
|
@@ -959,53 +959,19 @@ private:
|
|
959
959
|
return Qnil;
|
960
960
|
}
|
961
961
|
|
962
|
-
static VALUE
|
962
|
+
static VALUE _llama_model_get_model_n_vocab(VALUE self) {
|
963
963
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
964
|
-
return INT2NUM(
|
964
|
+
return INT2NUM(llama_model_n_vocab(ptr->model));
|
965
965
|
}
|
966
966
|
|
967
|
-
static VALUE
|
967
|
+
static VALUE _llama_model_get_model_n_ctx(VALUE self) {
|
968
968
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
969
|
-
return INT2NUM(
|
969
|
+
return INT2NUM(llama_model_n_ctx(ptr->model));
|
970
970
|
}
|
971
971
|
|
972
|
-
static VALUE
|
972
|
+
static VALUE _llama_model_get_model_n_embd(VALUE self) {
|
973
973
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
974
|
-
return INT2NUM(
|
975
|
-
}
|
976
|
-
|
977
|
-
static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
|
978
|
-
VALUE kw_args = Qnil;
|
979
|
-
ID kw_table[1] = { rb_intern("capacity") };
|
980
|
-
VALUE kw_values[1] = { Qundef };
|
981
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
982
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
983
|
-
|
984
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
985
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
986
|
-
return Qnil;
|
987
|
-
}
|
988
|
-
|
989
|
-
const int capacity = NUM2INT(kw_values[0]);
|
990
|
-
|
991
|
-
LLaMAModelWrapper* ptr = get_llama_model(self);
|
992
|
-
const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
|
993
|
-
const char** vocabs = ALLOCA_N(const char*, n);
|
994
|
-
float* scores = ALLOCA_N(float, n);
|
995
|
-
|
996
|
-
llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
|
997
|
-
|
998
|
-
VALUE vocabs_ary = rb_ary_new();
|
999
|
-
VALUE scores_ary = rb_ary_new();
|
1000
|
-
|
1001
|
-
for (int i = 0; i < n; i++) {
|
1002
|
-
rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
|
1003
|
-
rb_ary_push(scores_ary, DBL2NUM(scores[i]));
|
1004
|
-
}
|
1005
|
-
|
1006
|
-
VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
|
1007
|
-
|
1008
|
-
return ret;
|
974
|
+
return INT2NUM(llama_model_n_embd(ptr->model));
|
1009
975
|
}
|
1010
976
|
|
1011
977
|
static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
|
@@ -1015,8 +981,20 @@ private:
|
|
1015
981
|
}
|
1016
982
|
const llama_token token = NUM2INT(token_);
|
1017
983
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1018
|
-
|
1019
|
-
|
984
|
+
std::vector<char> result(8, 0);
|
985
|
+
const int n_tokens = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
|
986
|
+
if (n_tokens < 0) {
|
987
|
+
result.resize(-n_tokens);
|
988
|
+
const int check = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
|
989
|
+
if (check != -n_tokens) {
|
990
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
991
|
+
return Qnil;
|
992
|
+
}
|
993
|
+
} else {
|
994
|
+
result.resize(n_tokens);
|
995
|
+
}
|
996
|
+
std::string ret(result.data(), result.size());
|
997
|
+
return rb_str_new_cstr(ret.c_str());
|
1020
998
|
}
|
1021
999
|
|
1022
1000
|
static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
|
@@ -1061,6 +1039,13 @@ private:
|
|
1061
1039
|
RB_GC_GUARD(text_);
|
1062
1040
|
return ret;
|
1063
1041
|
}
|
1042
|
+
|
1043
|
+
static VALUE _llama_model_get_model_type(VALUE self) {
|
1044
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1045
|
+
char buf[128];
|
1046
|
+
::llama_model_type(ptr->model, buf, sizeof(buf));
|
1047
|
+
return rb_str_new_cstr(buf);
|
1048
|
+
}
|
1064
1049
|
};
|
1065
1050
|
|
1066
1051
|
const rb_data_type_t RbLLaMAModel::llama_model_type = {
|
@@ -1335,7 +1320,12 @@ public:
|
|
1335
1320
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
1336
1321
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
1337
1322
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
1338
|
-
rb_define_method(rb_cLLaMAContext, "
|
1323
|
+
rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
|
1324
|
+
rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
|
1325
|
+
rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
|
1326
|
+
rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
|
1327
|
+
rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
|
1328
|
+
rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
|
1339
1329
|
rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
|
1340
1330
|
rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
|
1341
1331
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
@@ -1584,8 +1574,20 @@ private:
|
|
1584
1574
|
return Qnil;
|
1585
1575
|
}
|
1586
1576
|
const llama_token token = NUM2INT(token_);
|
1587
|
-
|
1588
|
-
|
1577
|
+
std::vector<char> result(8, 0);
|
1578
|
+
const int n_tokens = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
|
1579
|
+
if (n_tokens < 0) {
|
1580
|
+
result.resize(-n_tokens);
|
1581
|
+
const int check = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
|
1582
|
+
if (check != -n_tokens) {
|
1583
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
1584
|
+
return Qnil;
|
1585
|
+
}
|
1586
|
+
} else {
|
1587
|
+
result.resize(n_tokens);
|
1588
|
+
}
|
1589
|
+
std::string ret(result.data(), result.size());
|
1590
|
+
return rb_str_new_cstr(ret.c_str());
|
1589
1591
|
}
|
1590
1592
|
|
1591
1593
|
static VALUE _llama_context_logits(VALUE self) {
|
@@ -1641,41 +1643,64 @@ private:
|
|
1641
1643
|
return output;
|
1642
1644
|
}
|
1643
1645
|
|
1644
|
-
static VALUE
|
1645
|
-
|
1646
|
-
|
1647
|
-
|
1648
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
1649
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
1650
|
-
|
1651
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
1652
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
1646
|
+
static VALUE _llama_context_text(VALUE self, VALUE token_) {
|
1647
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1648
|
+
if (ptr->ctx == NULL) {
|
1649
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1653
1650
|
return Qnil;
|
1654
1651
|
}
|
1652
|
+
const llama_token token = NUM2INT(token_);
|
1653
|
+
const char* text = llama_token_get_text(ptr->ctx, token);
|
1654
|
+
return rb_str_new_cstr(text);
|
1655
|
+
}
|
1655
1656
|
|
1657
|
+
static VALUE _llama_context_score(VALUE self, VALUE token_) {
|
1656
1658
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1657
1659
|
if (ptr->ctx == NULL) {
|
1658
1660
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1659
1661
|
return Qnil;
|
1660
1662
|
}
|
1663
|
+
const llama_token token = NUM2INT(token_);
|
1664
|
+
const float score = llama_token_get_score(ptr->ctx, token);
|
1665
|
+
return DBL2NUM(score);
|
1666
|
+
}
|
1661
1667
|
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
+
static VALUE _llama_context_type(VALUE self, VALUE token_) {
|
1669
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1670
|
+
if (ptr->ctx == NULL) {
|
1671
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1672
|
+
return Qnil;
|
1673
|
+
}
|
1674
|
+
const llama_token token = NUM2INT(token_);
|
1675
|
+
const int type = llama_token_get_type(ptr->ctx, token);
|
1676
|
+
return INT2NUM(type);
|
1677
|
+
}
|
1668
1678
|
|
1669
|
-
|
1679
|
+
static VALUE _llama_context_token_bos(VALUE self) {
|
1680
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1681
|
+
if (ptr->ctx == NULL) {
|
1682
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1683
|
+
return Qnil;
|
1684
|
+
}
|
1685
|
+
return INT2NUM(llama_token_bos(ptr->ctx));
|
1686
|
+
}
|
1670
1687
|
|
1671
|
-
|
1672
|
-
|
1673
|
-
|
1674
|
-
|
1675
|
-
|
1688
|
+
static VALUE _llama_context_token_eos(VALUE self) {
|
1689
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1690
|
+
if (ptr->ctx == NULL) {
|
1691
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1692
|
+
return Qnil;
|
1676
1693
|
}
|
1694
|
+
return INT2NUM(llama_token_eos(ptr->ctx));
|
1695
|
+
}
|
1677
1696
|
|
1678
|
-
|
1697
|
+
static VALUE _llama_context_token_nl(VALUE self) {
|
1698
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1699
|
+
if (ptr->ctx == NULL) {
|
1700
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1701
|
+
return Qnil;
|
1702
|
+
}
|
1703
|
+
return INT2NUM(llama_token_nl(ptr->ctx));
|
1679
1704
|
}
|
1680
1705
|
|
1681
1706
|
static VALUE _llama_context_n_vocab(VALUE self) {
|
@@ -2466,23 +2491,15 @@ static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
|
2466
2491
|
return Qnil;
|
2467
2492
|
}
|
2468
2493
|
|
2469
|
-
static VALUE rb_llama_token_bos(VALUE self) {
|
2470
|
-
return INT2NUM(llama_token_bos());
|
2471
|
-
}
|
2472
|
-
|
2473
|
-
static VALUE rb_llama_token_eos(VALUE self) {
|
2474
|
-
return INT2NUM(llama_token_eos());
|
2475
|
-
}
|
2476
|
-
|
2477
|
-
static VALUE rb_llama_token_nl(VALUE self) {
|
2478
|
-
return INT2NUM(llama_token_nl());
|
2479
|
-
}
|
2480
|
-
|
2481
2494
|
static VALUE rb_llama_print_system_info(VALUE self) {
|
2482
2495
|
const char* result = llama_print_system_info();
|
2483
2496
|
return rb_utf8_str_new_cstr(result);
|
2484
2497
|
}
|
2485
2498
|
|
2499
|
+
static VALUE rb_llama_time_us(VALUE self) {
|
2500
|
+
return LONG2NUM(llama_time_us());
|
2501
|
+
}
|
2502
|
+
|
2486
2503
|
static VALUE rb_llama_mmap_supported(VALUE self) {
|
2487
2504
|
return llama_mmap_supported() ? Qtrue : Qfalse;
|
2488
2505
|
}
|
@@ -2511,16 +2528,29 @@ extern "C" void Init_llama_cpp(void) {
|
|
2511
2528
|
rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
|
2512
2529
|
rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
|
2513
2530
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
2514
|
-
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
2515
|
-
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
2516
|
-
rb_define_module_function(rb_mLLaMACpp, "token_nl", rb_llama_token_nl, 0);
|
2517
2531
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
2532
|
+
rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
|
2518
2533
|
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
2519
2534
|
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
2520
2535
|
rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
|
2521
2536
|
|
2522
2537
|
rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
|
2523
2538
|
|
2539
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_ERROR", INT2NUM(LLAMA_LOG_LEVEL_ERROR));
|
2540
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_WARN", INT2NUM(LLAMA_LOG_LEVEL_WARN));
|
2541
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_INFO", INT2NUM(LLAMA_LOG_LEVEL_INFO));
|
2542
|
+
|
2543
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
2544
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
2545
|
+
|
2546
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
2547
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
2548
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
|
2549
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_CONTROL", INT2NUM(LLAMA_TOKEN_TYPE_CONTROL));
|
2550
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_USER_DEFINED", INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED));
|
2551
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNUSED", INT2NUM(LLAMA_TOKEN_TYPE_UNUSED));
|
2552
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_BYTE", INT2NUM(LLAMA_TOKEN_TYPE_BYTE));
|
2553
|
+
|
2524
2554
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_ALL_F32", INT2NUM(LLAMA_FTYPE_ALL_F32));
|
2525
2555
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_F16));
|
2526
2556
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
|
@@ -2539,6 +2569,8 @@ extern "C" void Init_llama_cpp(void) {
|
|
2539
2569
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
|
2540
2570
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
|
2541
2571
|
|
2572
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
2573
|
+
|
2542
2574
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
|
2543
2575
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
|
2544
2576
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_RULE_REF", INT2NUM(LLAMA_GRETYPE_RULE_REF));
|
@@ -2548,39 +2580,9 @@ extern "C" void Init_llama_cpp(void) {
|
|
2548
2580
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
2549
2581
|
|
2550
2582
|
std::stringstream ss_magic;
|
2551
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
2552
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
2553
|
-
|
2554
|
-
ss_magic.str("");
|
2555
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2556
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
2557
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
2558
|
-
|
2559
|
-
ss_magic.str("");
|
2560
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2561
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
2562
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
2563
|
-
|
2564
|
-
ss_magic.str("");
|
2565
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2566
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
2567
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
2568
|
-
|
2569
|
-
ss_magic.str("");
|
2570
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2571
2583
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
2572
2584
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
2573
2585
|
|
2574
|
-
ss_magic.str("");
|
2575
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2576
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
2577
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
2578
|
-
|
2579
|
-
ss_magic.str("");
|
2580
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2581
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
2582
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
2583
|
-
|
2584
2586
|
ss_magic.str("");
|
2585
2587
|
ss_magic.clear(std::stringstream::goodbit);
|
2586
2588
|
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
@@ -2591,6 +2593,5 @@ extern "C" void Init_llama_cpp(void) {
|
|
2591
2593
|
ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
|
2592
2594
|
rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
|
2593
2595
|
|
2594
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
2595
2596
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
2596
2597
|
}
|