llama_cpp 0.12.6 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/ext/llama_cpp/llama_cpp.cpp +90 -269
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +28 -23
- data/vendor/tmp/llama.cpp/Makefile +51 -15
- data/vendor/tmp/llama.cpp/ggml-alloc.c +73 -43
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +32 -11
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +560 -346
- data/vendor/tmp/llama.cpp/ggml-impl.h +20 -7
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +7 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +191 -22
- data/vendor/tmp/llama.cpp/ggml-metal.metal +2472 -862
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +25 -25
- data/vendor/tmp/llama.cpp/ggml-quants.c +3176 -667
- data/vendor/tmp/llama.cpp/ggml-quants.h +77 -2
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +373 -424
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +186 -102
- data/vendor/tmp/llama.cpp/ggml.c +1266 -699
- data/vendor/tmp/llama.cpp/ggml.h +59 -30
- data/vendor/tmp/llama.cpp/llama.cpp +1517 -717
- data/vendor/tmp/llama.cpp/llama.h +87 -63
- data/vendor/tmp/llama.cpp/scripts/get-flags.mk +1 -1
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e8d23f3abceeea388895f198a3906b7a24d692cba97e46934a14567450fc3a2
|
|
4
|
+
data.tar.gz: 9d1385671b76ea826fbc000910e102fbbb951970f77b7511fdf2653adbc97334
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 24746b8aaaa749b4058ddb64f6b07952356a6947ef1f40bc8bf7010a37b8b476e71632452ce28b6e61b11c66249a9d4fb6573de31e66e750bdb4391ce8f3286c
|
|
7
|
+
data.tar.gz: 56f79812ecdeecfc2dce6f68a73fc72d4495c6a51cc1d2ea7ccfeeb3e1ac9b6e72e78cbed019108e05987e431c4634bbfa1029f380f813a7fb6e009b5f6ec4e3
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,24 @@
|
|
|
1
|
+
## [[0.13.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.7...v0.13.0)] - 2024-03-02
|
|
2
|
+
|
|
3
|
+
- Bump bundled llama.cpp from b2143 to b2303.
|
|
4
|
+
- Remove deprecated methods:
|
|
5
|
+
- `map_supported?`, `mlock_supported?`, `apply_lora_from_file`, `eval`, `eval_embd`, `sample_classifier_free_guidance`, `sample_temperature`, and `mul_mat_q`.
|
|
6
|
+
- Rename some constants.
|
|
7
|
+
- Rename `kv_cache_seq_shift` method to `kv_cache_seq_add`.
|
|
8
|
+
- Add `defrag_thold` accessor to `ContextParams`.
|
|
9
|
+
- Add `vocab_type` and `rope_type` methods to `Model`.
|
|
10
|
+
- Add `kv_cache_seq_pos_max`, `kv_cache_defrag`, and `kv_cache_update` methods to `Context`.
|
|
11
|
+
|
|
12
|
+
## [[0.12.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.6...v0.12.7)] - 2024-02-24
|
|
13
|
+
|
|
14
|
+
- Bump bundled llama.cpp from b2106 to b2143.
|
|
15
|
+
- Add constants for file type: `LLAMA_FTYPE_MOSTLY_IQ1_S` and `LLAMA_FTYPE_MOSTLY_IQ4_NL`.
|
|
16
|
+
- Add constants for pooling type: `LLAMA_POOLING_NONE`, `LLAMA_POOLING_MEAN`, and `LLAMA_POOLING_CLS`.
|
|
17
|
+
- Add `numa_init` module function to `LLaMACpp`.
|
|
18
|
+
- Remove unnecessary argument from `backend_init`.
|
|
19
|
+
|
|
20
|
+
Implementation of llama_chat_apply_template binding has been postponed for the time being.
|
|
21
|
+
|
|
1
22
|
## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
|
|
2
23
|
|
|
3
24
|
- Bump bundled llama.cpp from b2106 to b2143.
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
|
@@ -966,12 +966,12 @@ public:
|
|
|
966
966
|
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
|
|
967
967
|
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
|
|
968
968
|
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
|
|
969
|
+
rb_define_method(rb_cLLaMAContextParams, "defrag_thold=", RUBY_METHOD_FUNC(_llama_context_params_set_defrag_thold), 1);
|
|
970
|
+
rb_define_method(rb_cLLaMAContextParams, "defrag_thold", RUBY_METHOD_FUNC(_llama_context_params_get_defrag_thold), 0);
|
|
969
971
|
rb_define_method(rb_cLLaMAContextParams, "type_k=", RUBY_METHOD_FUNC(_llama_context_params_set_type_k), 1);
|
|
970
972
|
rb_define_method(rb_cLLaMAContextParams, "type_k", RUBY_METHOD_FUNC(_llama_context_params_get_type_k), 0);
|
|
971
973
|
rb_define_method(rb_cLLaMAContextParams, "type_v=", RUBY_METHOD_FUNC(_llama_context_params_set_type_v), 1);
|
|
972
974
|
rb_define_method(rb_cLLaMAContextParams, "type_v", RUBY_METHOD_FUNC(_llama_context_params_get_type_v), 0);
|
|
973
|
-
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
|
974
|
-
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
|
975
975
|
rb_define_method(rb_cLLaMAContextParams, "logits_all=", RUBY_METHOD_FUNC(_llama_context_params_set_logits_all), 1);
|
|
976
976
|
rb_define_method(rb_cLLaMAContextParams, "logits_all", RUBY_METHOD_FUNC(_llama_context_params_get_logits_all), 0);
|
|
977
977
|
rb_define_method(rb_cLLaMAContextParams, "embedding=", RUBY_METHOD_FUNC(_llama_context_params_set_embedding), 1);
|
|
@@ -1146,6 +1146,18 @@ private:
|
|
|
1146
1146
|
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
|
1147
1147
|
}
|
|
1148
1148
|
|
|
1149
|
+
// defrag_thold
|
|
1150
|
+
static VALUE _llama_context_params_set_defrag_thold(VALUE self, VALUE defrag_thold) {
|
|
1151
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
|
1152
|
+
ptr->params.defrag_thold = NUM2DBL(defrag_thold);
|
|
1153
|
+
return DBL2NUM(ptr->params.defrag_thold);
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
static VALUE _llama_context_params_get_defrag_thold(VALUE self) {
|
|
1157
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
|
1158
|
+
return DBL2NUM(ptr->params.defrag_thold);
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1149
1161
|
static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
|
|
1150
1162
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
|
1151
1163
|
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
|
@@ -1175,18 +1187,6 @@ private:
|
|
|
1175
1187
|
return INT2NUM(ptr->params.type_v);
|
|
1176
1188
|
}
|
|
1177
1189
|
|
|
1178
|
-
// mul_mat_q
|
|
1179
|
-
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
|
1180
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
|
1181
|
-
ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
|
|
1182
|
-
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
|
1183
|
-
}
|
|
1184
|
-
|
|
1185
|
-
static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
|
|
1186
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
|
1187
|
-
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
|
1188
|
-
}
|
|
1189
|
-
|
|
1190
1190
|
// logits_all
|
|
1191
1191
|
static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
|
|
1192
1192
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
|
@@ -1433,7 +1433,8 @@ public:
|
|
|
1433
1433
|
rb_define_method(rb_cLLaMAModel, "empty?", RUBY_METHOD_FUNC(_llama_model_empty), 0);
|
|
1434
1434
|
rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
|
|
1435
1435
|
rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
|
|
1436
|
-
rb_define_method(rb_cLLaMAModel, "
|
|
1436
|
+
rb_define_method(rb_cLLaMAModel, "vocab_type", RUBY_METHOD_FUNC(_llama_model_get_model_vocab_type), 0);
|
|
1437
|
+
rb_define_method(rb_cLLaMAModel, "rope_type", RUBY_METHOD_FUNC(_llama_model_get_model_rope_type), 0);
|
|
1437
1438
|
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
|
1438
1439
|
rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
|
|
1439
1440
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
|
@@ -1559,41 +1560,14 @@ private:
|
|
|
1559
1560
|
return Qnil;
|
|
1560
1561
|
}
|
|
1561
1562
|
|
|
1562
|
-
static VALUE
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
|
1567
|
-
rb_get_kwargs(kw_args, kw_table, 1, 3, kw_values);
|
|
1568
|
-
|
|
1569
|
-
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
|
1570
|
-
rb_raise(rb_eArgError, "lora_path must be a string");
|
|
1571
|
-
return Qnil;
|
|
1572
|
-
}
|
|
1573
|
-
if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
|
|
1574
|
-
rb_raise(rb_eArgError, "base_model_path must be a string");
|
|
1575
|
-
return Qnil;
|
|
1576
|
-
}
|
|
1577
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
|
1578
|
-
rb_raise(rb_eArgError, "n_threads must be an integer");
|
|
1579
|
-
return Qnil;
|
|
1580
|
-
}
|
|
1581
|
-
if (kw_values[3] != Qundef && !RB_FLOAT_TYPE_P(kw_values[3])) {
|
|
1582
|
-
rb_raise(rb_eArgError, "scale must be a float");
|
|
1583
|
-
return Qnil;
|
|
1584
|
-
}
|
|
1585
|
-
|
|
1586
|
-
const char* lora_path = StringValueCStr(kw_values[0]);
|
|
1587
|
-
const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
|
|
1588
|
-
const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
|
|
1589
|
-
const float scale = kw_values[3] == Qundef ? 1.0 : NUM2DBL(kw_values[3]);
|
|
1563
|
+
static VALUE _llama_model_get_model_vocab_type(VALUE self) {
|
|
1564
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
|
1565
|
+
return INT2NUM(llama_vocab_type(ptr->model));
|
|
1566
|
+
}
|
|
1590
1567
|
|
|
1568
|
+
static VALUE _llama_model_get_model_rope_type(VALUE self) {
|
|
1591
1569
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
|
1592
|
-
|
|
1593
|
-
rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
|
|
1594
|
-
return Qnil;
|
|
1595
|
-
}
|
|
1596
|
-
return Qnil;
|
|
1570
|
+
return INT2NUM(llama_rope_type(ptr->model));
|
|
1597
1571
|
}
|
|
1598
1572
|
|
|
1599
1573
|
static VALUE _llama_model_get_model_n_vocab(VALUE self) {
|
|
@@ -2038,8 +2012,6 @@ public:
|
|
|
2038
2012
|
rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
|
|
2039
2013
|
rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
|
|
2040
2014
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
|
2041
|
-
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
|
2042
|
-
rb_define_method(rb_cLLaMAContext, "eval_embd", RUBY_METHOD_FUNC(_llama_context_eval_embd), -1);
|
|
2043
2015
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
|
2044
2016
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
|
2045
2017
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
|
@@ -2054,14 +2026,16 @@ public:
|
|
|
2054
2026
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
|
|
2055
2027
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
|
|
2056
2028
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
|
|
2057
|
-
rb_define_method(rb_cLLaMAContext, "
|
|
2029
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_add", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_add), 4);
|
|
2058
2030
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_div", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_div), 4);
|
|
2031
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_pos_max", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_pos_max), 1);
|
|
2032
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_defrag", RUBY_METHOD_FUNC(_llama_context_kv_cache_defrag), 0);
|
|
2033
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_update", RUBY_METHOD_FUNC(_llama_context_kv_cache_update), 0);
|
|
2059
2034
|
rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
|
|
2060
2035
|
rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
|
|
2061
2036
|
rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
|
|
2062
2037
|
rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
|
|
2063
2038
|
rb_define_method(rb_cLLaMAContext, "sample_apply_guidance", RUBY_METHOD_FUNC(_llama_context_sample_apply_guidance), -1);
|
|
2064
|
-
rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
|
|
2065
2039
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
|
2066
2040
|
rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
|
|
2067
2041
|
rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
|
|
@@ -2070,7 +2044,6 @@ public:
|
|
|
2070
2044
|
rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
|
|
2071
2045
|
rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
|
|
2072
2046
|
rb_define_method(rb_cLLaMAContext, "sample_entropy", RUBY_METHOD_FUNC(_llama_context_sample_entropy), -1);
|
|
2073
|
-
rb_define_method(rb_cLLaMAContext, "sample_temperature", RUBY_METHOD_FUNC(_llama_context_sample_temperature), -1);
|
|
2074
2047
|
rb_define_method(rb_cLLaMAContext, "sample_token_mirostat", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat), -1);
|
|
2075
2048
|
rb_define_method(rb_cLLaMAContext, "sample_token_mirostat_v2", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat_v2), -1);
|
|
2076
2049
|
rb_define_method(rb_cLLaMAContext, "sample_token_greedy", RUBY_METHOD_FUNC(_llama_context_sample_token_greedy), 1);
|
|
@@ -2122,110 +2095,6 @@ private:
|
|
|
2122
2095
|
return Qnil;
|
|
2123
2096
|
}
|
|
2124
2097
|
|
|
2125
|
-
static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
|
|
2126
|
-
VALUE kw_args = Qnil;
|
|
2127
|
-
ID kw_table[3] = { rb_intern("tokens"), rb_intern("n_past"), rb_intern("n_tokens") };
|
|
2128
|
-
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
|
2129
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
|
2130
|
-
rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
|
|
2131
|
-
|
|
2132
|
-
rb_warn("eval is deprecated. Use decode instead.");
|
|
2133
|
-
|
|
2134
|
-
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
|
2135
|
-
rb_raise(rb_eArgError, "tokens must be an Array");
|
|
2136
|
-
return Qnil;
|
|
2137
|
-
}
|
|
2138
|
-
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
|
2139
|
-
rb_raise(rb_eArgError, "n_past must be an integer");
|
|
2140
|
-
return Qnil;
|
|
2141
|
-
}
|
|
2142
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
|
2143
|
-
rb_raise(rb_eArgError, "n_tokens must be an integer");
|
|
2144
|
-
return Qnil;
|
|
2145
|
-
}
|
|
2146
|
-
|
|
2147
|
-
const size_t tokens_len = RARRAY_LEN(kw_values[0]);
|
|
2148
|
-
std::vector<llama_token> embd(tokens_len);
|
|
2149
|
-
for (size_t i = 0; i < tokens_len; i++) {
|
|
2150
|
-
VALUE token = rb_ary_entry(kw_values[0], i);
|
|
2151
|
-
if (!RB_INTEGER_TYPE_P(token)) {
|
|
2152
|
-
rb_raise(rb_eArgError, "tokens must be an array of integers");
|
|
2153
|
-
return Qnil;
|
|
2154
|
-
}
|
|
2155
|
-
embd[i] = NUM2INT(token);
|
|
2156
|
-
}
|
|
2157
|
-
|
|
2158
|
-
const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
|
|
2159
|
-
const int n_past = NUM2INT(kw_values[1]);
|
|
2160
|
-
|
|
2161
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2162
|
-
if (ptr->ctx == NULL) {
|
|
2163
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
|
2164
|
-
return Qnil;
|
|
2165
|
-
}
|
|
2166
|
-
if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
|
|
2167
|
-
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
|
2168
|
-
return Qnil;
|
|
2169
|
-
}
|
|
2170
|
-
|
|
2171
|
-
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
|
2172
|
-
rb_iv_set(self, "@has_evaluated", Qtrue);
|
|
2173
|
-
|
|
2174
|
-
return Qnil;
|
|
2175
|
-
}
|
|
2176
|
-
|
|
2177
|
-
static VALUE _llama_context_eval_embd(int argc, VALUE* argv, VALUE self) {
|
|
2178
|
-
VALUE kw_args = Qnil;
|
|
2179
|
-
ID kw_table[3] = { rb_intern("embd"), rb_intern("n_past"), rb_intern("n_tokens") };
|
|
2180
|
-
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
|
2181
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
|
2182
|
-
rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
|
|
2183
|
-
|
|
2184
|
-
rb_warn("eval_embd is deprecated. Use decode instead.");
|
|
2185
|
-
|
|
2186
|
-
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
|
2187
|
-
rb_raise(rb_eArgError, "tokens must be an Array");
|
|
2188
|
-
return Qnil;
|
|
2189
|
-
}
|
|
2190
|
-
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
|
2191
|
-
rb_raise(rb_eArgError, "n_past must be an integer");
|
|
2192
|
-
return Qnil;
|
|
2193
|
-
}
|
|
2194
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
|
2195
|
-
rb_raise(rb_eArgError, "n_tokens must be an integer");
|
|
2196
|
-
return Qnil;
|
|
2197
|
-
}
|
|
2198
|
-
|
|
2199
|
-
const size_t tokens_len = RARRAY_LEN(kw_values[0]);
|
|
2200
|
-
std::vector<float> embd(tokens_len);
|
|
2201
|
-
for (size_t i = 0; i < tokens_len; i++) {
|
|
2202
|
-
VALUE el = rb_ary_entry(kw_values[0], i);
|
|
2203
|
-
if (!RB_FLOAT_TYPE_P(el)) {
|
|
2204
|
-
rb_raise(rb_eArgError, "embd must be an array of floats");
|
|
2205
|
-
return Qnil;
|
|
2206
|
-
}
|
|
2207
|
-
embd[i] = NUM2DBL(el);
|
|
2208
|
-
}
|
|
2209
|
-
|
|
2210
|
-
const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
|
|
2211
|
-
const int n_past = NUM2INT(kw_values[1]);
|
|
2212
|
-
|
|
2213
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2214
|
-
if (ptr->ctx == NULL) {
|
|
2215
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
|
2216
|
-
return Qnil;
|
|
2217
|
-
}
|
|
2218
|
-
if (llama_eval_embd(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
|
|
2219
|
-
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
|
2220
|
-
return Qnil;
|
|
2221
|
-
}
|
|
2222
|
-
|
|
2223
|
-
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
|
2224
|
-
rb_iv_set(self, "@has_evaluated", Qtrue);
|
|
2225
|
-
|
|
2226
|
-
return Qnil;
|
|
2227
|
-
}
|
|
2228
|
-
|
|
2229
2098
|
static VALUE _llama_context_decode(VALUE self, VALUE batch) {
|
|
2230
2099
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2231
2100
|
if (ptr->ctx == NULL) {
|
|
@@ -2430,13 +2299,13 @@ private:
|
|
|
2430
2299
|
return Qnil;
|
|
2431
2300
|
}
|
|
2432
2301
|
|
|
2433
|
-
static VALUE
|
|
2302
|
+
static VALUE _llama_context_kv_cache_seq_add(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
|
|
2434
2303
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2435
2304
|
if (ptr->ctx == NULL) {
|
|
2436
2305
|
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
|
2437
2306
|
return Qnil;
|
|
2438
2307
|
}
|
|
2439
|
-
|
|
2308
|
+
llama_kv_cache_seq_add(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
|
|
2440
2309
|
return Qnil;
|
|
2441
2310
|
}
|
|
2442
2311
|
|
|
@@ -2450,6 +2319,35 @@ private:
|
|
|
2450
2319
|
return Qnil;
|
|
2451
2320
|
}
|
|
2452
2321
|
|
|
2322
|
+
static VALUE _llama_context_kv_cache_seq_pos_max(VALUE self, VALUE seq_id) {
|
|
2323
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2324
|
+
if (ptr->ctx == NULL) {
|
|
2325
|
+
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
|
2326
|
+
return Qnil;
|
|
2327
|
+
}
|
|
2328
|
+
return INT2NUM(llama_kv_cache_seq_pos_max(ptr->ctx, NUM2INT(seq_id)));
|
|
2329
|
+
}
|
|
2330
|
+
|
|
2331
|
+
static VALUE _llama_context_kv_cache_defrag(VALUE self) {
|
|
2332
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2333
|
+
if (ptr->ctx == NULL) {
|
|
2334
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
|
2335
|
+
return Qnil;
|
|
2336
|
+
}
|
|
2337
|
+
llama_kv_cache_defrag(ptr->ctx);
|
|
2338
|
+
return Qnil;
|
|
2339
|
+
}
|
|
2340
|
+
|
|
2341
|
+
static VALUE _llama_context_kv_cache_update(VALUE self) {
|
|
2342
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2343
|
+
if (ptr->ctx == NULL) {
|
|
2344
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
|
2345
|
+
return Qnil;
|
|
2346
|
+
}
|
|
2347
|
+
llama_kv_cache_update(ptr->ctx);
|
|
2348
|
+
return Qnil;
|
|
2349
|
+
}
|
|
2350
|
+
|
|
2453
2351
|
static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
|
|
2454
2352
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
|
2455
2353
|
if (ptr->ctx == NULL) {
|
|
@@ -2659,46 +2557,6 @@ private:
|
|
|
2659
2557
|
return Qnil;
|
|
2660
2558
|
}
|
|
2661
2559
|
|
|
2662
|
-
static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
|
|
2663
|
-
VALUE kw_args = Qnil;
|
|
2664
|
-
ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
|
|
2665
|
-
VALUE kw_values[2] = { Qundef, Qundef };
|
|
2666
|
-
VALUE candidates = Qnil;
|
|
2667
|
-
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
|
2668
|
-
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
|
2669
|
-
|
|
2670
|
-
if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
|
|
2671
|
-
rb_raise(rb_eArgError, "guidance must be a Context");
|
|
2672
|
-
return Qnil;
|
|
2673
|
-
}
|
|
2674
|
-
if (!RB_FLOAT_TYPE_P(kw_values[1])) {
|
|
2675
|
-
rb_raise(rb_eArgError, "scale must be a float");
|
|
2676
|
-
return Qnil;
|
|
2677
|
-
}
|
|
2678
|
-
|
|
2679
|
-
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
|
2680
|
-
if (ctx_ptr->ctx == NULL) {
|
|
2681
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
|
2682
|
-
return Qnil;
|
|
2683
|
-
}
|
|
2684
|
-
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
|
2685
|
-
if (cnd_ptr->array.data == nullptr) {
|
|
2686
|
-
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
|
2687
|
-
return Qnil;
|
|
2688
|
-
}
|
|
2689
|
-
|
|
2690
|
-
LLaMAContextWrapper* guidance_ptr = get_llama_context(kw_values[0]);
|
|
2691
|
-
if (guidance_ptr->ctx == NULL) {
|
|
2692
|
-
rb_raise(rb_eRuntimeError, "guidance context is not initialized");
|
|
2693
|
-
return Qnil;
|
|
2694
|
-
}
|
|
2695
|
-
const float scale = NUM2DBL(kw_values[1]);
|
|
2696
|
-
|
|
2697
|
-
llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
|
|
2698
|
-
|
|
2699
|
-
return Qnil;
|
|
2700
|
-
}
|
|
2701
|
-
|
|
2702
2560
|
static VALUE _llama_context_sample_softmax(VALUE self, VALUE candidates) {
|
|
2703
2561
|
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
|
2704
2562
|
rb_raise(rb_eArgError, "argument must be a TokenDataArray");
|
|
@@ -2994,42 +2852,6 @@ private:
|
|
|
2994
2852
|
return Qnil;
|
|
2995
2853
|
}
|
|
2996
2854
|
|
|
2997
|
-
static VALUE _llama_context_sample_temperature(int argc, VALUE* argv, VALUE self) {
|
|
2998
|
-
VALUE kw_args = Qnil;
|
|
2999
|
-
ID kw_table[1] = { rb_intern("temperature") };
|
|
3000
|
-
VALUE kw_values[1] = { Qundef };
|
|
3001
|
-
VALUE candidates = Qnil;
|
|
3002
|
-
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
|
3003
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
|
3004
|
-
|
|
3005
|
-
rb_warn("sample_temperature is deprecated. Use sample_temp instead.");
|
|
3006
|
-
|
|
3007
|
-
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
|
3008
|
-
rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
|
|
3009
|
-
return Qnil;
|
|
3010
|
-
}
|
|
3011
|
-
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
|
3012
|
-
rb_raise(rb_eArgError, "temperature must be a float");
|
|
3013
|
-
return Qnil;
|
|
3014
|
-
}
|
|
3015
|
-
|
|
3016
|
-
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
|
3017
|
-
if (ctx_ptr->ctx == NULL) {
|
|
3018
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
|
3019
|
-
return Qnil;
|
|
3020
|
-
}
|
|
3021
|
-
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
|
3022
|
-
if (cnd_ptr->array.data == nullptr) {
|
|
3023
|
-
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
|
3024
|
-
return Qnil;
|
|
3025
|
-
}
|
|
3026
|
-
const float temperature = NUM2DBL(kw_values[0]);
|
|
3027
|
-
|
|
3028
|
-
llama_sample_temperature(ctx_ptr->ctx, &(cnd_ptr->array), temperature);
|
|
3029
|
-
|
|
3030
|
-
return Qnil;
|
|
3031
|
-
}
|
|
3032
|
-
|
|
3033
2855
|
static VALUE _llama_context_sample_token_mirostat(int argc, VALUE* argv, VALUE self) {
|
|
3034
2856
|
VALUE kw_args = Qnil;
|
|
3035
2857
|
ID kw_table[4] = { rb_intern("tau"), rb_intern("eta"), rb_intern("m"), rb_intern("mu") };
|
|
@@ -3243,15 +3065,8 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
|
|
|
3243
3065
|
|
|
3244
3066
|
// module functions
|
|
3245
3067
|
|
|
3246
|
-
static VALUE rb_llama_llama_backend_init(
|
|
3247
|
-
|
|
3248
|
-
ID kw_table[1] = { rb_intern("numa") };
|
|
3249
|
-
VALUE kw_values[1] = { Qundef };
|
|
3250
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
|
3251
|
-
rb_get_kwargs(kw_args, kw_table, 0, 1, kw_values);
|
|
3252
|
-
|
|
3253
|
-
const bool numa = kw_values[0] == Qundef ? false : (RTEST(kw_values[0]) ? true : false);
|
|
3254
|
-
llama_backend_init(numa);
|
|
3068
|
+
static VALUE rb_llama_llama_backend_init(VALUE self) {
|
|
3069
|
+
llama_backend_init();
|
|
3255
3070
|
|
|
3256
3071
|
return Qnil;
|
|
3257
3072
|
}
|
|
@@ -3262,6 +3077,17 @@ static VALUE rb_llama_llama_backend_free(VALUE self) {
|
|
|
3262
3077
|
return Qnil;
|
|
3263
3078
|
}
|
|
3264
3079
|
|
|
3080
|
+
static VALUE rb_llama_llama_numa_init(VALUE self, VALUE strategy) {
|
|
3081
|
+
if (!RB_INTEGER_TYPE_P(strategy)) {
|
|
3082
|
+
rb_raise(rb_eArgError, "strategy must be an integer");
|
|
3083
|
+
return Qnil;
|
|
3084
|
+
}
|
|
3085
|
+
|
|
3086
|
+
llama_numa_init(static_cast<enum ggml_numa_strategy>(NUM2INT(strategy)));
|
|
3087
|
+
|
|
3088
|
+
return Qnil;
|
|
3089
|
+
}
|
|
3090
|
+
|
|
3265
3091
|
static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
|
3266
3092
|
VALUE kw_args = Qnil;
|
|
3267
3093
|
ID kw_table[3] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("params") };
|
|
@@ -3303,16 +3129,6 @@ static VALUE rb_llama_time_us(VALUE self) {
|
|
|
3303
3129
|
return LONG2NUM(llama_time_us());
|
|
3304
3130
|
}
|
|
3305
3131
|
|
|
3306
|
-
static VALUE rb_llama_mmap_supported(VALUE self) {
|
|
3307
|
-
rb_warn("mmap_supported? is deprecated. Use supports_mmap? instead.");
|
|
3308
|
-
return llama_mmap_supported() ? Qtrue : Qfalse;
|
|
3309
|
-
}
|
|
3310
|
-
|
|
3311
|
-
static VALUE rb_llama_mlock_supported(VALUE self) {
|
|
3312
|
-
rb_warn("mlock_supported? is deprecated. Use supports_mlock? instead.");
|
|
3313
|
-
return llama_mlock_supported() ? Qtrue : Qfalse;
|
|
3314
|
-
}
|
|
3315
|
-
|
|
3316
3132
|
static VALUE rb_llama_max_devices(VALUE self) {
|
|
3317
3133
|
return SIZET2NUM(llama_max_devices());
|
|
3318
3134
|
}
|
|
@@ -3345,13 +3161,12 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3345
3161
|
RbLLaMAGrammarElement::define_class(rb_mLLaMACpp);
|
|
3346
3162
|
RbLLaMAGrammar::define_class(rb_mLLaMACpp);
|
|
3347
3163
|
|
|
3348
|
-
rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init,
|
|
3164
|
+
rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, 0);
|
|
3349
3165
|
rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
|
|
3166
|
+
rb_define_module_function(rb_mLLaMACpp, "numa_init", rb_llama_llama_numa_init, 1);
|
|
3350
3167
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
|
3351
3168
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
|
3352
3169
|
rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
|
|
3353
|
-
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
|
3354
|
-
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
|
3355
3170
|
rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
|
|
3356
3171
|
rb_define_module_function(rb_mLLaMACpp, "supports_mmap?", rb_llama_supports_mmap, 0);
|
|
3357
3172
|
rb_define_module_function(rb_mLLaMACpp, "supports_mlock?", rb_llama_supports_mlock, 0);
|
|
@@ -3389,14 +3204,16 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3389
3204
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS));
|
|
3390
3205
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
|
|
3391
3206
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
|
|
3392
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3207
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XS));
|
|
3393
3208
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS));
|
|
3209
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ1_S", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_S));
|
|
3210
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ4_NL", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_NL));
|
|
3394
3211
|
|
|
3395
3212
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
|
3396
3213
|
|
|
3397
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3398
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3399
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3214
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_INT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT));
|
|
3215
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT));
|
|
3216
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL));
|
|
3400
3217
|
|
|
3401
3218
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
|
|
3402
3219
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
|
|
@@ -3406,15 +3223,19 @@ extern "C" void Init_llama_cpp(void) {
|
|
|
3406
3223
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
|
3407
3224
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
|
3408
3225
|
|
|
3409
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3410
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3411
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3412
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3413
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3226
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
|
|
3227
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
|
|
3228
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_TYPE_LINEAR));
|
|
3229
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_YARN", INT2NUM(LLAMA_ROPE_SCALING_TYPE_YARN));
|
|
3230
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_MAX_VALUE));
|
|
3231
|
+
|
|
3232
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
|
|
3233
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
|
|
3234
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
|
|
3414
3235
|
|
|
3415
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3416
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3417
|
-
rb_define_const(rb_mLLaMACpp, "
|
|
3236
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
|
3237
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
|
3238
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
|
3418
3239
|
|
|
3419
3240
|
std::stringstream ss_magic;
|
|
3420
3241
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
data/lib/llama_cpp/version.rb
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
|
4
4
|
module LLaMACpp
|
|
5
5
|
# The version of llama_cpp.rb you install.
|
|
6
|
-
VERSION = '0.
|
|
6
|
+
VERSION = '0.13.0'
|
|
7
7
|
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
|
9
|
-
LLAMA_CPP_VERSION = '
|
|
9
|
+
LLAMA_CPP_VERSION = 'b2303'
|
|
10
10
|
end
|