llama_cpp 0.3.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +1 -1
- data/examples/chat.rb +2 -4
- data/ext/llama_cpp/extconf.rb +3 -3
- data/ext/llama_cpp/llama_cpp.cpp +118 -117
- data/ext/llama_cpp/src/ggml-alloc.c +97 -53
- data/ext/llama_cpp/src/ggml-alloc.h +4 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1010 -497
- data/ext/llama_cpp/src/ggml-cuda.h +32 -23
- data/ext/llama_cpp/src/ggml-metal.h +9 -3
- data/ext/llama_cpp/src/ggml-metal.m +142 -161
- data/ext/llama_cpp/src/ggml-metal.metal +577 -500
- data/ext/llama_cpp/src/ggml.c +2064 -233
- data/ext/llama_cpp/src/ggml.h +238 -13
- data/ext/llama_cpp/src/k_quants.c +110 -54
- data/ext/llama_cpp/src/llama-util.h +10 -8
- data/ext/llama_cpp/src/llama.cpp +4544 -2890
- data/ext/llama_cpp/src/llama.h +133 -123
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +8 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af3a0e01bc9f3cfad4cee3f21144dd354640e1d4558125be36d4b499fa3b4c24
|
4
|
+
data.tar.gz: 042a3b0491d98fa6a093c684e6ab751152f37c8438a3b4a7b19cb2d8c7ab95a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ed85bd8438ee3b3adab884795c4aecb5b0d72ad57b7e02bc281b62c3b1d669efab62a020e03b09defe3084ecd8afacc4220303e99167d04d668650768c7392b
|
7
|
+
data.tar.gz: b705a0ccd2c7c1e15aed6383acb9d5a3d79d0a0c882a74c42b9099df9a27aff88ba08a2f06aa4d195382e8f41c1b16c0014a2047d1923369f275ca481d52bb21
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## [[0.4.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.8...v0.4.0)] - 2023-08-26
|
2
|
+
|
3
|
+
**Breaking Changes**
|
4
|
+
- Bump bundled llama.cpp from master-097e121 to master-b1060.
|
5
|
+
- Support new file format GGUF.
|
6
|
+
- You should re-convert / re-quantize your model files.
|
7
|
+
- Remove vocab methods.
|
8
|
+
- Move token_bos, token_eos, and token_nl methods to Context.
|
9
|
+
- Add text, score, and type methods to Context.
|
10
|
+
|
11
|
+
## [[0.3.8](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.7...v0.3.8)] - 2023-08-19
|
12
|
+
|
13
|
+
- Bump bundled llama.cpp from master-9ca4abe to master-097e121.
|
14
|
+
- Add `type` method to Model.
|
15
|
+
- Revert pull request #2592 in llama.cpp.
|
16
|
+
It seems that PWIN32_MEMORY_RANGE_ENTRY and WIN32_MEMORY_RANGE_ENTRY do not exist in mingw.
|
17
|
+
|
1
18
|
## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
|
2
19
|
|
3
20
|
- Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
|
data/README.md
CHANGED
@@ -51,7 +51,7 @@ $ git clone https://huggingface.co/openlm-research/open_llama_7b
|
|
51
51
|
$ cd ../
|
52
52
|
$ python3 convert.py models/open_llama_7b
|
53
53
|
$ make
|
54
|
-
$ ./quantize ./models/open_llama_7b/ggml-model-f16.
|
54
|
+
$ ./quantize ./models/open_llama_7b/ggml-model-f16.gguf ./models/open_llama_7b/ggml-model-q4_0.bin q4_0
|
55
55
|
```
|
56
56
|
|
57
57
|
An example of Ruby code that generates sentences with the quantization model is as follows:
|
data/examples/chat.rb
CHANGED
@@ -49,8 +49,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
49
49
|
n_keep = options[:keep]
|
50
50
|
n_keep = embd_input.size if n_keep > embd_input.size
|
51
51
|
|
52
|
-
token_newline = context.tokenize(text: "\n", add_bos: false)
|
53
|
-
|
54
52
|
last_n_tokens = [0] * n_ctx
|
55
53
|
interactive = true
|
56
54
|
is_interacting = false
|
@@ -101,8 +99,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
101
99
|
last_n_tokens.shift
|
102
100
|
last_n_tokens.push(id)
|
103
101
|
|
104
|
-
if id ==
|
105
|
-
id =
|
102
|
+
if id == context.token_eos
|
103
|
+
id = context.token_nl
|
106
104
|
unless antiprompt.empty?
|
107
105
|
first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
|
108
106
|
embd_input.concat(first_antiprompt)
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -52,8 +52,8 @@ end
|
|
52
52
|
if with_config('metal')
|
53
53
|
$CFLAGS << ' -DGGML_USE_METAL -DGGML_METAL_NDEBUG'
|
54
54
|
$CXXFLAGS << ' -DGGML_USE_METAL'
|
55
|
-
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit
|
56
|
-
$objs = %w[ggml.o
|
55
|
+
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
56
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
|
57
57
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
58
58
|
end
|
59
59
|
|
@@ -61,7 +61,7 @@ if with_config('cublas')
|
|
61
61
|
$CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
62
62
|
$CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
63
63
|
$LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
|
64
|
-
$objs = %w[ggml-
|
64
|
+
$objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
|
65
65
|
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
66
66
|
end
|
67
67
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -808,12 +808,12 @@ public:
|
|
808
808
|
rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
|
809
809
|
rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
|
810
810
|
rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
|
811
|
-
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(
|
812
|
-
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(
|
813
|
-
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(
|
814
|
-
rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
|
811
|
+
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
812
|
+
rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx), 0);
|
813
|
+
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
815
814
|
rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
|
816
815
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
|
816
|
+
rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_model_type), 0);
|
817
817
|
}
|
818
818
|
|
819
819
|
private:
|
@@ -959,53 +959,19 @@ private:
|
|
959
959
|
return Qnil;
|
960
960
|
}
|
961
961
|
|
962
|
-
static VALUE
|
962
|
+
static VALUE _llama_model_get_model_n_vocab(VALUE self) {
|
963
963
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
964
|
-
return INT2NUM(
|
964
|
+
return INT2NUM(llama_model_n_vocab(ptr->model));
|
965
965
|
}
|
966
966
|
|
967
|
-
static VALUE
|
967
|
+
static VALUE _llama_model_get_model_n_ctx(VALUE self) {
|
968
968
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
969
|
-
return INT2NUM(
|
969
|
+
return INT2NUM(llama_model_n_ctx(ptr->model));
|
970
970
|
}
|
971
971
|
|
972
|
-
static VALUE
|
972
|
+
static VALUE _llama_model_get_model_n_embd(VALUE self) {
|
973
973
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
974
|
-
return INT2NUM(
|
975
|
-
}
|
976
|
-
|
977
|
-
static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
|
978
|
-
VALUE kw_args = Qnil;
|
979
|
-
ID kw_table[1] = { rb_intern("capacity") };
|
980
|
-
VALUE kw_values[1] = { Qundef };
|
981
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
982
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
983
|
-
|
984
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
985
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
986
|
-
return Qnil;
|
987
|
-
}
|
988
|
-
|
989
|
-
const int capacity = NUM2INT(kw_values[0]);
|
990
|
-
|
991
|
-
LLaMAModelWrapper* ptr = get_llama_model(self);
|
992
|
-
const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
|
993
|
-
const char** vocabs = ALLOCA_N(const char*, n);
|
994
|
-
float* scores = ALLOCA_N(float, n);
|
995
|
-
|
996
|
-
llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
|
997
|
-
|
998
|
-
VALUE vocabs_ary = rb_ary_new();
|
999
|
-
VALUE scores_ary = rb_ary_new();
|
1000
|
-
|
1001
|
-
for (int i = 0; i < n; i++) {
|
1002
|
-
rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
|
1003
|
-
rb_ary_push(scores_ary, DBL2NUM(scores[i]));
|
1004
|
-
}
|
1005
|
-
|
1006
|
-
VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
|
1007
|
-
|
1008
|
-
return ret;
|
974
|
+
return INT2NUM(llama_model_n_embd(ptr->model));
|
1009
975
|
}
|
1010
976
|
|
1011
977
|
static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
|
@@ -1015,8 +981,20 @@ private:
|
|
1015
981
|
}
|
1016
982
|
const llama_token token = NUM2INT(token_);
|
1017
983
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1018
|
-
|
1019
|
-
|
984
|
+
std::vector<char> result(8, 0);
|
985
|
+
const int n_tokens = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
|
986
|
+
if (n_tokens < 0) {
|
987
|
+
result.resize(-n_tokens);
|
988
|
+
const int check = llama_token_to_str_with_model(ptr->model, token, result.data(), result.size());
|
989
|
+
if (check != -n_tokens) {
|
990
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
991
|
+
return Qnil;
|
992
|
+
}
|
993
|
+
} else {
|
994
|
+
result.resize(n_tokens);
|
995
|
+
}
|
996
|
+
std::string ret(result.data(), result.size());
|
997
|
+
return rb_str_new_cstr(ret.c_str());
|
1020
998
|
}
|
1021
999
|
|
1022
1000
|
static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
|
@@ -1061,6 +1039,13 @@ private:
|
|
1061
1039
|
RB_GC_GUARD(text_);
|
1062
1040
|
return ret;
|
1063
1041
|
}
|
1042
|
+
|
1043
|
+
static VALUE _llama_model_get_model_type(VALUE self) {
|
1044
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1045
|
+
char buf[128];
|
1046
|
+
::llama_model_type(ptr->model, buf, sizeof(buf));
|
1047
|
+
return rb_str_new_cstr(buf);
|
1048
|
+
}
|
1064
1049
|
};
|
1065
1050
|
|
1066
1051
|
const rb_data_type_t RbLLaMAModel::llama_model_type = {
|
@@ -1335,7 +1320,12 @@ public:
|
|
1335
1320
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
1336
1321
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
1337
1322
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
1338
|
-
rb_define_method(rb_cLLaMAContext, "
|
1323
|
+
rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
|
1324
|
+
rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
|
1325
|
+
rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
|
1326
|
+
rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
|
1327
|
+
rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
|
1328
|
+
rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
|
1339
1329
|
rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
|
1340
1330
|
rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
|
1341
1331
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
@@ -1584,8 +1574,20 @@ private:
|
|
1584
1574
|
return Qnil;
|
1585
1575
|
}
|
1586
1576
|
const llama_token token = NUM2INT(token_);
|
1587
|
-
|
1588
|
-
|
1577
|
+
std::vector<char> result(8, 0);
|
1578
|
+
const int n_tokens = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
|
1579
|
+
if (n_tokens < 0) {
|
1580
|
+
result.resize(-n_tokens);
|
1581
|
+
const int check = llama_token_to_str(ptr->ctx, token, result.data(), result.size());
|
1582
|
+
if (check != -n_tokens) {
|
1583
|
+
rb_raise(rb_eRuntimeError, "failed to convert");
|
1584
|
+
return Qnil;
|
1585
|
+
}
|
1586
|
+
} else {
|
1587
|
+
result.resize(n_tokens);
|
1588
|
+
}
|
1589
|
+
std::string ret(result.data(), result.size());
|
1590
|
+
return rb_str_new_cstr(ret.c_str());
|
1589
1591
|
}
|
1590
1592
|
|
1591
1593
|
static VALUE _llama_context_logits(VALUE self) {
|
@@ -1641,41 +1643,64 @@ private:
|
|
1641
1643
|
return output;
|
1642
1644
|
}
|
1643
1645
|
|
1644
|
-
static VALUE
|
1645
|
-
|
1646
|
-
|
1647
|
-
|
1648
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
1649
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
1650
|
-
|
1651
|
-
if (!RB_INTEGER_TYPE_P(kw_values[0])) {
|
1652
|
-
rb_raise(rb_eArgError, "capacity must be an integer");
|
1646
|
+
static VALUE _llama_context_text(VALUE self, VALUE token_) {
|
1647
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1648
|
+
if (ptr->ctx == NULL) {
|
1649
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1653
1650
|
return Qnil;
|
1654
1651
|
}
|
1652
|
+
const llama_token token = NUM2INT(token_);
|
1653
|
+
const char* text = llama_token_get_text(ptr->ctx, token);
|
1654
|
+
return rb_str_new_cstr(text);
|
1655
|
+
}
|
1655
1656
|
|
1657
|
+
static VALUE _llama_context_score(VALUE self, VALUE token_) {
|
1656
1658
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1657
1659
|
if (ptr->ctx == NULL) {
|
1658
1660
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1659
1661
|
return Qnil;
|
1660
1662
|
}
|
1663
|
+
const llama_token token = NUM2INT(token_);
|
1664
|
+
const float score = llama_token_get_score(ptr->ctx, token);
|
1665
|
+
return DBL2NUM(score);
|
1666
|
+
}
|
1661
1667
|
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
+
static VALUE _llama_context_type(VALUE self, VALUE token_) {
|
1669
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1670
|
+
if (ptr->ctx == NULL) {
|
1671
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1672
|
+
return Qnil;
|
1673
|
+
}
|
1674
|
+
const llama_token token = NUM2INT(token_);
|
1675
|
+
const int type = llama_token_get_type(ptr->ctx, token);
|
1676
|
+
return INT2NUM(type);
|
1677
|
+
}
|
1668
1678
|
|
1669
|
-
|
1679
|
+
static VALUE _llama_context_token_bos(VALUE self) {
|
1680
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1681
|
+
if (ptr->ctx == NULL) {
|
1682
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1683
|
+
return Qnil;
|
1684
|
+
}
|
1685
|
+
return INT2NUM(llama_token_bos(ptr->ctx));
|
1686
|
+
}
|
1670
1687
|
|
1671
|
-
|
1672
|
-
|
1673
|
-
|
1674
|
-
|
1675
|
-
|
1688
|
+
static VALUE _llama_context_token_eos(VALUE self) {
|
1689
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1690
|
+
if (ptr->ctx == NULL) {
|
1691
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1692
|
+
return Qnil;
|
1676
1693
|
}
|
1694
|
+
return INT2NUM(llama_token_eos(ptr->ctx));
|
1695
|
+
}
|
1677
1696
|
|
1678
|
-
|
1697
|
+
static VALUE _llama_context_token_nl(VALUE self) {
|
1698
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1699
|
+
if (ptr->ctx == NULL) {
|
1700
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1701
|
+
return Qnil;
|
1702
|
+
}
|
1703
|
+
return INT2NUM(llama_token_nl(ptr->ctx));
|
1679
1704
|
}
|
1680
1705
|
|
1681
1706
|
static VALUE _llama_context_n_vocab(VALUE self) {
|
@@ -2466,23 +2491,15 @@ static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
|
2466
2491
|
return Qnil;
|
2467
2492
|
}
|
2468
2493
|
|
2469
|
-
static VALUE rb_llama_token_bos(VALUE self) {
|
2470
|
-
return INT2NUM(llama_token_bos());
|
2471
|
-
}
|
2472
|
-
|
2473
|
-
static VALUE rb_llama_token_eos(VALUE self) {
|
2474
|
-
return INT2NUM(llama_token_eos());
|
2475
|
-
}
|
2476
|
-
|
2477
|
-
static VALUE rb_llama_token_nl(VALUE self) {
|
2478
|
-
return INT2NUM(llama_token_nl());
|
2479
|
-
}
|
2480
|
-
|
2481
2494
|
static VALUE rb_llama_print_system_info(VALUE self) {
|
2482
2495
|
const char* result = llama_print_system_info();
|
2483
2496
|
return rb_utf8_str_new_cstr(result);
|
2484
2497
|
}
|
2485
2498
|
|
2499
|
+
static VALUE rb_llama_time_us(VALUE self) {
|
2500
|
+
return LONG2NUM(llama_time_us());
|
2501
|
+
}
|
2502
|
+
|
2486
2503
|
static VALUE rb_llama_mmap_supported(VALUE self) {
|
2487
2504
|
return llama_mmap_supported() ? Qtrue : Qfalse;
|
2488
2505
|
}
|
@@ -2511,16 +2528,29 @@ extern "C" void Init_llama_cpp(void) {
|
|
2511
2528
|
rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
|
2512
2529
|
rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
|
2513
2530
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
2514
|
-
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
2515
|
-
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
2516
|
-
rb_define_module_function(rb_mLLaMACpp, "token_nl", rb_llama_token_nl, 0);
|
2517
2531
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
2532
|
+
rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
|
2518
2533
|
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
2519
2534
|
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
2520
2535
|
rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
|
2521
2536
|
|
2522
2537
|
rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
|
2523
2538
|
|
2539
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_ERROR", INT2NUM(LLAMA_LOG_LEVEL_ERROR));
|
2540
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_WARN", INT2NUM(LLAMA_LOG_LEVEL_WARN));
|
2541
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_LOG_LEVEL_INFO", INT2NUM(LLAMA_LOG_LEVEL_INFO));
|
2542
|
+
|
2543
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
2544
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
2545
|
+
|
2546
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNDEFINED", INT2NUM(LLAMA_TOKEN_TYPE_UNDEFINED));
|
2547
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_NORMAL", INT2NUM(LLAMA_TOKEN_TYPE_NORMAL));
|
2548
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNKNOWN", INT2NUM(LLAMA_TOKEN_TYPE_UNKNOWN));
|
2549
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_CONTROL", INT2NUM(LLAMA_TOKEN_TYPE_CONTROL));
|
2550
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_USER_DEFINED", INT2NUM(LLAMA_TOKEN_TYPE_USER_DEFINED));
|
2551
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_UNUSED", INT2NUM(LLAMA_TOKEN_TYPE_UNUSED));
|
2552
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_TOKEN_TYPE_BYTE", INT2NUM(LLAMA_TOKEN_TYPE_BYTE));
|
2553
|
+
|
2524
2554
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_ALL_F32", INT2NUM(LLAMA_FTYPE_ALL_F32));
|
2525
2555
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_F16));
|
2526
2556
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
|
@@ -2539,6 +2569,8 @@ extern "C" void Init_llama_cpp(void) {
|
|
2539
2569
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_K_M", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_K_M));
|
2540
2570
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q6_K", INT2NUM(LLAMA_FTYPE_MOSTLY_Q6_K));
|
2541
2571
|
|
2572
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
2573
|
+
|
2542
2574
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
|
2543
2575
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
|
2544
2576
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_RULE_REF", INT2NUM(LLAMA_GRETYPE_RULE_REF));
|
@@ -2548,39 +2580,9 @@ extern "C" void Init_llama_cpp(void) {
|
|
2548
2580
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
2549
2581
|
|
2550
2582
|
std::stringstream ss_magic;
|
2551
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
2552
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
2553
|
-
|
2554
|
-
ss_magic.str("");
|
2555
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2556
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
2557
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
2558
|
-
|
2559
|
-
ss_magic.str("");
|
2560
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2561
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
2562
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
2563
|
-
|
2564
|
-
ss_magic.str("");
|
2565
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2566
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
2567
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
2568
|
-
|
2569
|
-
ss_magic.str("");
|
2570
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2571
2583
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
2572
2584
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
2573
2585
|
|
2574
|
-
ss_magic.str("");
|
2575
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2576
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
2577
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
2578
|
-
|
2579
|
-
ss_magic.str("");
|
2580
|
-
ss_magic.clear(std::stringstream::goodbit);
|
2581
|
-
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
2582
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
2583
|
-
|
2584
2586
|
ss_magic.str("");
|
2585
2587
|
ss_magic.clear(std::stringstream::goodbit);
|
2586
2588
|
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
@@ -2591,6 +2593,5 @@ extern "C" void Init_llama_cpp(void) {
|
|
2591
2593
|
ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
|
2592
2594
|
rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
|
2593
2595
|
|
2594
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
2595
2596
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
2596
2597
|
}
|