llama_cpp 0.12.7 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/ext/llama_cpp/llama_cpp.cpp +131 -288
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +29 -29
- data/vendor/tmp/llama.cpp/Makefile +10 -6
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +32 -23
- data/vendor/tmp/llama.cpp/ggml-backend.h +17 -16
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +949 -168
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +9 -3
- data/vendor/tmp/llama.cpp/ggml-metal.m +159 -22
- data/vendor/tmp/llama.cpp/ggml-metal.metal +1195 -139
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +27 -27
- data/vendor/tmp/llama.cpp/ggml-quants.c +1971 -271
- data/vendor/tmp/llama.cpp/ggml-quants.h +52 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +3586 -1201
- data/vendor/tmp/llama.cpp/ggml-sycl.h +5 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +39336 -43461
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1391 -825
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +1 -0
- data/vendor/tmp/llama.cpp/ggml.c +545 -210
- data/vendor/tmp/llama.cpp/ggml.h +65 -23
- data/vendor/tmp/llama.cpp/llama.cpp +1458 -763
- data/vendor/tmp/llama.cpp/llama.h +81 -75
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -952,6 +952,8 @@ public:
|
|
952
952
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads_batch), 0);
|
953
953
|
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_scaling_type), 1);
|
954
954
|
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
|
955
|
+
rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
|
956
|
+
rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
|
955
957
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
|
956
958
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
|
957
959
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
|
@@ -966,20 +968,18 @@ public:
|
|
966
968
|
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
|
967
969
|
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
|
968
970
|
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
|
971
|
+
rb_define_method(rb_cLLaMAContextParams, "defrag_thold=", RUBY_METHOD_FUNC(_llama_context_params_set_defrag_thold), 1);
|
972
|
+
rb_define_method(rb_cLLaMAContextParams, "defrag_thold", RUBY_METHOD_FUNC(_llama_context_params_get_defrag_thold), 0);
|
969
973
|
rb_define_method(rb_cLLaMAContextParams, "type_k=", RUBY_METHOD_FUNC(_llama_context_params_set_type_k), 1);
|
970
974
|
rb_define_method(rb_cLLaMAContextParams, "type_k", RUBY_METHOD_FUNC(_llama_context_params_get_type_k), 0);
|
971
975
|
rb_define_method(rb_cLLaMAContextParams, "type_v=", RUBY_METHOD_FUNC(_llama_context_params_set_type_v), 1);
|
972
976
|
rb_define_method(rb_cLLaMAContextParams, "type_v", RUBY_METHOD_FUNC(_llama_context_params_get_type_v), 0);
|
973
|
-
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
974
|
-
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
975
977
|
rb_define_method(rb_cLLaMAContextParams, "logits_all=", RUBY_METHOD_FUNC(_llama_context_params_set_logits_all), 1);
|
976
978
|
rb_define_method(rb_cLLaMAContextParams, "logits_all", RUBY_METHOD_FUNC(_llama_context_params_get_logits_all), 0);
|
977
|
-
rb_define_method(rb_cLLaMAContextParams, "
|
978
|
-
rb_define_method(rb_cLLaMAContextParams, "
|
979
|
+
rb_define_method(rb_cLLaMAContextParams, "embeddings=", RUBY_METHOD_FUNC(_llama_context_params_set_embeddings), 1);
|
980
|
+
rb_define_method(rb_cLLaMAContextParams, "embeddings", RUBY_METHOD_FUNC(_llama_context_params_get_embeddings), 0);
|
979
981
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
|
980
982
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
|
981
|
-
rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
|
982
|
-
rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
|
983
983
|
}
|
984
984
|
|
985
985
|
private:
|
@@ -1058,7 +1058,7 @@ private:
|
|
1058
1058
|
// rope_scaling_type
|
1059
1059
|
static VALUE _llama_context_params_set_rope_scaling_type(VALUE self, VALUE scaling_type) {
|
1060
1060
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1061
|
-
ptr->params.rope_scaling_type = NUM2INT(scaling_type);
|
1061
|
+
ptr->params.rope_scaling_type = static_cast<enum llama_rope_scaling_type>(NUM2INT(scaling_type));
|
1062
1062
|
return INT2NUM(ptr->params.rope_scaling_type);
|
1063
1063
|
}
|
1064
1064
|
|
@@ -1067,6 +1067,18 @@ private:
|
|
1067
1067
|
return INT2NUM(ptr->params.rope_scaling_type);
|
1068
1068
|
}
|
1069
1069
|
|
1070
|
+
// pooling_type
|
1071
|
+
static VALUE _llama_context_params_set_pooling_type(VALUE self, VALUE scaling_type) {
|
1072
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1073
|
+
ptr->params.pooling_type = static_cast<enum llama_pooling_type>(NUM2INT(scaling_type));
|
1074
|
+
return INT2NUM(ptr->params.pooling_type);
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
static VALUE _llama_context_params_get_pooling_type(VALUE self) {
|
1078
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1079
|
+
return INT2NUM(ptr->params.pooling_type);
|
1080
|
+
}
|
1081
|
+
|
1070
1082
|
// rope_freq_base
|
1071
1083
|
static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
|
1072
1084
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1146,6 +1158,18 @@ private:
|
|
1146
1158
|
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
1147
1159
|
}
|
1148
1160
|
|
1161
|
+
// defrag_thold
|
1162
|
+
static VALUE _llama_context_params_set_defrag_thold(VALUE self, VALUE defrag_thold) {
|
1163
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1164
|
+
ptr->params.defrag_thold = NUM2DBL(defrag_thold);
|
1165
|
+
return DBL2NUM(ptr->params.defrag_thold);
|
1166
|
+
}
|
1167
|
+
|
1168
|
+
static VALUE _llama_context_params_get_defrag_thold(VALUE self) {
|
1169
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1170
|
+
return DBL2NUM(ptr->params.defrag_thold);
|
1171
|
+
}
|
1172
|
+
|
1149
1173
|
static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
|
1150
1174
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1151
1175
|
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
@@ -1175,18 +1199,6 @@ private:
|
|
1175
1199
|
return INT2NUM(ptr->params.type_v);
|
1176
1200
|
}
|
1177
1201
|
|
1178
|
-
// mul_mat_q
|
1179
|
-
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
1180
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1181
|
-
ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
|
1182
|
-
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
1183
|
-
}
|
1184
|
-
|
1185
|
-
static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
|
1186
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1187
|
-
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
1188
|
-
}
|
1189
|
-
|
1190
1202
|
// logits_all
|
1191
1203
|
static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
|
1192
1204
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1199,16 +1211,16 @@ private:
|
|
1199
1211
|
return ptr->params.logits_all ? Qtrue : Qfalse;
|
1200
1212
|
}
|
1201
1213
|
|
1202
|
-
//
|
1203
|
-
static VALUE
|
1214
|
+
// embeddings
|
1215
|
+
static VALUE _llama_context_params_set_embeddings(VALUE self, VALUE embeddings) {
|
1204
1216
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1205
|
-
ptr->params.
|
1206
|
-
return ptr->params.
|
1217
|
+
ptr->params.embeddings = RTEST(embeddings) ? true : false;
|
1218
|
+
return ptr->params.embeddings ? Qtrue : Qfalse;
|
1207
1219
|
}
|
1208
1220
|
|
1209
|
-
static VALUE
|
1221
|
+
static VALUE _llama_context_params_get_embeddings(VALUE self) {
|
1210
1222
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1211
|
-
return ptr->params.
|
1223
|
+
return ptr->params.embeddings ? Qtrue : Qfalse;
|
1212
1224
|
}
|
1213
1225
|
|
1214
1226
|
// offload_kqv
|
@@ -1222,18 +1234,6 @@ private:
|
|
1222
1234
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1223
1235
|
return ptr->params.offload_kqv ? Qtrue : Qfalse;
|
1224
1236
|
}
|
1225
|
-
|
1226
|
-
// do_pooling
|
1227
|
-
static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
|
1228
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1229
|
-
ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
|
1230
|
-
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1231
|
-
}
|
1232
|
-
|
1233
|
-
static VALUE _llama_context_params_get_do_pooling(VALUE self) {
|
1234
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1235
|
-
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1236
|
-
}
|
1237
1237
|
};
|
1238
1238
|
|
1239
1239
|
const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
|
@@ -1433,7 +1433,8 @@ public:
|
|
1433
1433
|
rb_define_method(rb_cLLaMAModel, "empty?", RUBY_METHOD_FUNC(_llama_model_empty), 0);
|
1434
1434
|
rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
|
1435
1435
|
rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
|
1436
|
-
rb_define_method(rb_cLLaMAModel, "
|
1436
|
+
rb_define_method(rb_cLLaMAModel, "vocab_type", RUBY_METHOD_FUNC(_llama_model_get_model_vocab_type), 0);
|
1437
|
+
rb_define_method(rb_cLLaMAModel, "rope_type", RUBY_METHOD_FUNC(_llama_model_get_model_rope_type), 0);
|
1437
1438
|
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
1438
1439
|
rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
|
1439
1440
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
@@ -1559,41 +1560,14 @@ private:
|
|
1559
1560
|
return Qnil;
|
1560
1561
|
}
|
1561
1562
|
|
1562
|
-
static VALUE
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
1567
|
-
rb_get_kwargs(kw_args, kw_table, 1, 3, kw_values);
|
1568
|
-
|
1569
|
-
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
1570
|
-
rb_raise(rb_eArgError, "lora_path must be a string");
|
1571
|
-
return Qnil;
|
1572
|
-
}
|
1573
|
-
if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
|
1574
|
-
rb_raise(rb_eArgError, "base_model_path must be a string");
|
1575
|
-
return Qnil;
|
1576
|
-
}
|
1577
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
1578
|
-
rb_raise(rb_eArgError, "n_threads must be an integer");
|
1579
|
-
return Qnil;
|
1580
|
-
}
|
1581
|
-
if (kw_values[3] != Qundef && !RB_FLOAT_TYPE_P(kw_values[3])) {
|
1582
|
-
rb_raise(rb_eArgError, "scale must be a float");
|
1583
|
-
return Qnil;
|
1584
|
-
}
|
1585
|
-
|
1586
|
-
const char* lora_path = StringValueCStr(kw_values[0]);
|
1587
|
-
const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
|
1588
|
-
const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
|
1589
|
-
const float scale = kw_values[3] == Qundef ? 1.0 : NUM2DBL(kw_values[3]);
|
1563
|
+
static VALUE _llama_model_get_model_vocab_type(VALUE self) {
|
1564
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1565
|
+
return INT2NUM(llama_vocab_type(ptr->model));
|
1566
|
+
}
|
1590
1567
|
|
1568
|
+
static VALUE _llama_model_get_model_rope_type(VALUE self) {
|
1591
1569
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1592
|
-
|
1593
|
-
rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
|
1594
|
-
return Qnil;
|
1595
|
-
}
|
1596
|
-
return Qnil;
|
1570
|
+
return INT2NUM(llama_rope_type(ptr->model));
|
1597
1571
|
}
|
1598
1572
|
|
1599
1573
|
static VALUE _llama_model_get_model_n_vocab(VALUE self) {
|
@@ -2038,12 +2012,11 @@ public:
|
|
2038
2012
|
rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
|
2039
2013
|
rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
|
2040
2014
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
2041
|
-
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
2042
|
-
rb_define_method(rb_cLLaMAContext, "eval_embd", RUBY_METHOD_FUNC(_llama_context_eval_embd), -1);
|
2043
2015
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
2044
2016
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
2045
2017
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
2046
2018
|
rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
|
2019
|
+
rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
|
2047
2020
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
2048
2021
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
2049
2022
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
@@ -2054,14 +2027,16 @@ public:
|
|
2054
2027
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
|
2055
2028
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
|
2056
2029
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
|
2057
|
-
rb_define_method(rb_cLLaMAContext, "
|
2030
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_add", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_add), 4);
|
2058
2031
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_div", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_div), 4);
|
2032
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_pos_max", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_pos_max), 1);
|
2033
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_defrag", RUBY_METHOD_FUNC(_llama_context_kv_cache_defrag), 0);
|
2034
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_update", RUBY_METHOD_FUNC(_llama_context_kv_cache_update), 0);
|
2059
2035
|
rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
|
2060
2036
|
rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
|
2061
2037
|
rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
|
2062
2038
|
rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
|
2063
2039
|
rb_define_method(rb_cLLaMAContext, "sample_apply_guidance", RUBY_METHOD_FUNC(_llama_context_sample_apply_guidance), -1);
|
2064
|
-
rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
|
2065
2040
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
2066
2041
|
rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
|
2067
2042
|
rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
|
@@ -2070,7 +2045,6 @@ public:
|
|
2070
2045
|
rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
|
2071
2046
|
rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
|
2072
2047
|
rb_define_method(rb_cLLaMAContext, "sample_entropy", RUBY_METHOD_FUNC(_llama_context_sample_entropy), -1);
|
2073
|
-
rb_define_method(rb_cLLaMAContext, "sample_temperature", RUBY_METHOD_FUNC(_llama_context_sample_temperature), -1);
|
2074
2048
|
rb_define_method(rb_cLLaMAContext, "sample_token_mirostat", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat), -1);
|
2075
2049
|
rb_define_method(rb_cLLaMAContext, "sample_token_mirostat_v2", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat_v2), -1);
|
2076
2050
|
rb_define_method(rb_cLLaMAContext, "sample_token_greedy", RUBY_METHOD_FUNC(_llama_context_sample_token_greedy), 1);
|
@@ -2122,110 +2096,6 @@ private:
|
|
2122
2096
|
return Qnil;
|
2123
2097
|
}
|
2124
2098
|
|
2125
|
-
static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
|
2126
|
-
VALUE kw_args = Qnil;
|
2127
|
-
ID kw_table[3] = { rb_intern("tokens"), rb_intern("n_past"), rb_intern("n_tokens") };
|
2128
|
-
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
2129
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
2130
|
-
rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
|
2131
|
-
|
2132
|
-
rb_warn("eval is deprecated. Use decode instead.");
|
2133
|
-
|
2134
|
-
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
2135
|
-
rb_raise(rb_eArgError, "tokens must be an Array");
|
2136
|
-
return Qnil;
|
2137
|
-
}
|
2138
|
-
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
2139
|
-
rb_raise(rb_eArgError, "n_past must be an integer");
|
2140
|
-
return Qnil;
|
2141
|
-
}
|
2142
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
2143
|
-
rb_raise(rb_eArgError, "n_tokens must be an integer");
|
2144
|
-
return Qnil;
|
2145
|
-
}
|
2146
|
-
|
2147
|
-
const size_t tokens_len = RARRAY_LEN(kw_values[0]);
|
2148
|
-
std::vector<llama_token> embd(tokens_len);
|
2149
|
-
for (size_t i = 0; i < tokens_len; i++) {
|
2150
|
-
VALUE token = rb_ary_entry(kw_values[0], i);
|
2151
|
-
if (!RB_INTEGER_TYPE_P(token)) {
|
2152
|
-
rb_raise(rb_eArgError, "tokens must be an array of integers");
|
2153
|
-
return Qnil;
|
2154
|
-
}
|
2155
|
-
embd[i] = NUM2INT(token);
|
2156
|
-
}
|
2157
|
-
|
2158
|
-
const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
|
2159
|
-
const int n_past = NUM2INT(kw_values[1]);
|
2160
|
-
|
2161
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2162
|
-
if (ptr->ctx == NULL) {
|
2163
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2164
|
-
return Qnil;
|
2165
|
-
}
|
2166
|
-
if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
|
2167
|
-
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
2168
|
-
return Qnil;
|
2169
|
-
}
|
2170
|
-
|
2171
|
-
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
2172
|
-
rb_iv_set(self, "@has_evaluated", Qtrue);
|
2173
|
-
|
2174
|
-
return Qnil;
|
2175
|
-
}
|
2176
|
-
|
2177
|
-
static VALUE _llama_context_eval_embd(int argc, VALUE* argv, VALUE self) {
|
2178
|
-
VALUE kw_args = Qnil;
|
2179
|
-
ID kw_table[3] = { rb_intern("embd"), rb_intern("n_past"), rb_intern("n_tokens") };
|
2180
|
-
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
2181
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
2182
|
-
rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
|
2183
|
-
|
2184
|
-
rb_warn("eval_embd is deprecated. Use decode instead.");
|
2185
|
-
|
2186
|
-
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
2187
|
-
rb_raise(rb_eArgError, "tokens must be an Array");
|
2188
|
-
return Qnil;
|
2189
|
-
}
|
2190
|
-
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
2191
|
-
rb_raise(rb_eArgError, "n_past must be an integer");
|
2192
|
-
return Qnil;
|
2193
|
-
}
|
2194
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
2195
|
-
rb_raise(rb_eArgError, "n_tokens must be an integer");
|
2196
|
-
return Qnil;
|
2197
|
-
}
|
2198
|
-
|
2199
|
-
const size_t tokens_len = RARRAY_LEN(kw_values[0]);
|
2200
|
-
std::vector<float> embd(tokens_len);
|
2201
|
-
for (size_t i = 0; i < tokens_len; i++) {
|
2202
|
-
VALUE el = rb_ary_entry(kw_values[0], i);
|
2203
|
-
if (!RB_FLOAT_TYPE_P(el)) {
|
2204
|
-
rb_raise(rb_eArgError, "embd must be an array of floats");
|
2205
|
-
return Qnil;
|
2206
|
-
}
|
2207
|
-
embd[i] = NUM2DBL(el);
|
2208
|
-
}
|
2209
|
-
|
2210
|
-
const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
|
2211
|
-
const int n_past = NUM2INT(kw_values[1]);
|
2212
|
-
|
2213
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2214
|
-
if (ptr->ctx == NULL) {
|
2215
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2216
|
-
return Qnil;
|
2217
|
-
}
|
2218
|
-
if (llama_eval_embd(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
|
2219
|
-
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
2220
|
-
return Qnil;
|
2221
|
-
}
|
2222
|
-
|
2223
|
-
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
2224
|
-
rb_iv_set(self, "@has_evaluated", Qtrue);
|
2225
|
-
|
2226
|
-
return Qnil;
|
2227
|
-
}
|
2228
|
-
|
2229
2099
|
static VALUE _llama_context_decode(VALUE self, VALUE batch) {
|
2230
2100
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2231
2101
|
if (ptr->ctx == NULL) {
|
@@ -2282,7 +2152,7 @@ private:
|
|
2282
2152
|
LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
|
2283
2153
|
VALUE params = rb_iv_get(self, "@params");
|
2284
2154
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2285
|
-
if (!prms_ptr->params.
|
2155
|
+
if (!prms_ptr->params.embeddings) {
|
2286
2156
|
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2287
2157
|
return Qnil;
|
2288
2158
|
}
|
@@ -2291,10 +2161,11 @@ private:
|
|
2291
2161
|
return Qnil;
|
2292
2162
|
}
|
2293
2163
|
|
2164
|
+
const int n_tokens = NUM2INT(rb_iv_get(self, "@n_tokens"));
|
2294
2165
|
const int n_embd = llama_n_embd(model_ptr->model);
|
2295
2166
|
const float* embd = llama_get_embeddings(ptr->ctx);
|
2296
2167
|
VALUE output = rb_ary_new();
|
2297
|
-
for (int i = 0; i < n_embd; i++) {
|
2168
|
+
for (int i = 0; i < n_tokens * n_embd; i++) {
|
2298
2169
|
rb_ary_push(output, DBL2NUM((double)(embd[i])));
|
2299
2170
|
}
|
2300
2171
|
|
@@ -2313,7 +2184,7 @@ private:
|
|
2313
2184
|
}
|
2314
2185
|
VALUE params = rb_iv_get(self, "@params");
|
2315
2186
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2316
|
-
if (!prms_ptr->params.
|
2187
|
+
if (!prms_ptr->params.embeddings) {
|
2317
2188
|
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2318
2189
|
return Qnil;
|
2319
2190
|
}
|
@@ -2331,6 +2202,36 @@ private:
|
|
2331
2202
|
return output;
|
2332
2203
|
}
|
2333
2204
|
|
2205
|
+
static VALUE _llama_context_embeddings_seq(VALUE self, VALUE seq_id) {
|
2206
|
+
if (!RB_INTEGER_TYPE_P(seq_id)) {
|
2207
|
+
rb_raise(rb_eArgError, "seq_id must be an integer");
|
2208
|
+
return Qnil;
|
2209
|
+
}
|
2210
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2211
|
+
if (ptr->ctx == NULL) {
|
2212
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2213
|
+
return Qnil;
|
2214
|
+
}
|
2215
|
+
VALUE params = rb_iv_get(self, "@params");
|
2216
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2217
|
+
if (!prms_ptr->params.embeddings) {
|
2218
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2219
|
+
return Qnil;
|
2220
|
+
}
|
2221
|
+
|
2222
|
+
VALUE model = rb_iv_get(self, "@model");
|
2223
|
+
LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
|
2224
|
+
const int n_embd = llama_n_embd(model_ptr->model);
|
2225
|
+
|
2226
|
+
VALUE output = rb_ary_new();
|
2227
|
+
const float* embd = llama_get_embeddings_seq(ptr->ctx, NUM2INT(seq_id));
|
2228
|
+
for (int i = 0; i < n_embd; i++) {
|
2229
|
+
rb_ary_push(output, DBL2NUM((double)(embd[i])));
|
2230
|
+
}
|
2231
|
+
|
2232
|
+
return output;
|
2233
|
+
}
|
2234
|
+
|
2334
2235
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
2335
2236
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2336
2237
|
if (ptr->ctx == NULL) {
|
@@ -2430,13 +2331,13 @@ private:
|
|
2430
2331
|
return Qnil;
|
2431
2332
|
}
|
2432
2333
|
|
2433
|
-
static VALUE
|
2334
|
+
static VALUE _llama_context_kv_cache_seq_add(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
|
2434
2335
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2435
2336
|
if (ptr->ctx == NULL) {
|
2436
2337
|
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
2437
2338
|
return Qnil;
|
2438
2339
|
}
|
2439
|
-
|
2340
|
+
llama_kv_cache_seq_add(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
|
2440
2341
|
return Qnil;
|
2441
2342
|
}
|
2442
2343
|
|
@@ -2450,6 +2351,35 @@ private:
|
|
2450
2351
|
return Qnil;
|
2451
2352
|
}
|
2452
2353
|
|
2354
|
+
static VALUE _llama_context_kv_cache_seq_pos_max(VALUE self, VALUE seq_id) {
|
2355
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2356
|
+
if (ptr->ctx == NULL) {
|
2357
|
+
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
2358
|
+
return Qnil;
|
2359
|
+
}
|
2360
|
+
return INT2NUM(llama_kv_cache_seq_pos_max(ptr->ctx, NUM2INT(seq_id)));
|
2361
|
+
}
|
2362
|
+
|
2363
|
+
static VALUE _llama_context_kv_cache_defrag(VALUE self) {
|
2364
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2365
|
+
if (ptr->ctx == NULL) {
|
2366
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2367
|
+
return Qnil;
|
2368
|
+
}
|
2369
|
+
llama_kv_cache_defrag(ptr->ctx);
|
2370
|
+
return Qnil;
|
2371
|
+
}
|
2372
|
+
|
2373
|
+
static VALUE _llama_context_kv_cache_update(VALUE self) {
|
2374
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2375
|
+
if (ptr->ctx == NULL) {
|
2376
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2377
|
+
return Qnil;
|
2378
|
+
}
|
2379
|
+
llama_kv_cache_update(ptr->ctx);
|
2380
|
+
return Qnil;
|
2381
|
+
}
|
2382
|
+
|
2453
2383
|
static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
|
2454
2384
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2455
2385
|
if (ptr->ctx == NULL) {
|
@@ -2659,46 +2589,6 @@ private:
|
|
2659
2589
|
return Qnil;
|
2660
2590
|
}
|
2661
2591
|
|
2662
|
-
static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
|
2663
|
-
VALUE kw_args = Qnil;
|
2664
|
-
ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
|
2665
|
-
VALUE kw_values[2] = { Qundef, Qundef };
|
2666
|
-
VALUE candidates = Qnil;
|
2667
|
-
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
2668
|
-
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
2669
|
-
|
2670
|
-
if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
|
2671
|
-
rb_raise(rb_eArgError, "guidance must be a Context");
|
2672
|
-
return Qnil;
|
2673
|
-
}
|
2674
|
-
if (!RB_FLOAT_TYPE_P(kw_values[1])) {
|
2675
|
-
rb_raise(rb_eArgError, "scale must be a float");
|
2676
|
-
return Qnil;
|
2677
|
-
}
|
2678
|
-
|
2679
|
-
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
2680
|
-
if (ctx_ptr->ctx == NULL) {
|
2681
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2682
|
-
return Qnil;
|
2683
|
-
}
|
2684
|
-
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
2685
|
-
if (cnd_ptr->array.data == nullptr) {
|
2686
|
-
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
2687
|
-
return Qnil;
|
2688
|
-
}
|
2689
|
-
|
2690
|
-
LLaMAContextWrapper* guidance_ptr = get_llama_context(kw_values[0]);
|
2691
|
-
if (guidance_ptr->ctx == NULL) {
|
2692
|
-
rb_raise(rb_eRuntimeError, "guidance context is not initialized");
|
2693
|
-
return Qnil;
|
2694
|
-
}
|
2695
|
-
const float scale = NUM2DBL(kw_values[1]);
|
2696
|
-
|
2697
|
-
llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
|
2698
|
-
|
2699
|
-
return Qnil;
|
2700
|
-
}
|
2701
|
-
|
2702
2592
|
static VALUE _llama_context_sample_softmax(VALUE self, VALUE candidates) {
|
2703
2593
|
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
2704
2594
|
rb_raise(rb_eArgError, "argument must be a TokenDataArray");
|
@@ -2994,42 +2884,6 @@ private:
|
|
2994
2884
|
return Qnil;
|
2995
2885
|
}
|
2996
2886
|
|
2997
|
-
static VALUE _llama_context_sample_temperature(int argc, VALUE* argv, VALUE self) {
|
2998
|
-
VALUE kw_args = Qnil;
|
2999
|
-
ID kw_table[1] = { rb_intern("temperature") };
|
3000
|
-
VALUE kw_values[1] = { Qundef };
|
3001
|
-
VALUE candidates = Qnil;
|
3002
|
-
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
3003
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
3004
|
-
|
3005
|
-
rb_warn("sample_temperature is deprecated. Use sample_temp instead.");
|
3006
|
-
|
3007
|
-
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
3008
|
-
rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
|
3009
|
-
return Qnil;
|
3010
|
-
}
|
3011
|
-
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
3012
|
-
rb_raise(rb_eArgError, "temperature must be a float");
|
3013
|
-
return Qnil;
|
3014
|
-
}
|
3015
|
-
|
3016
|
-
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
3017
|
-
if (ctx_ptr->ctx == NULL) {
|
3018
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
3019
|
-
return Qnil;
|
3020
|
-
}
|
3021
|
-
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
3022
|
-
if (cnd_ptr->array.data == nullptr) {
|
3023
|
-
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
3024
|
-
return Qnil;
|
3025
|
-
}
|
3026
|
-
const float temperature = NUM2DBL(kw_values[0]);
|
3027
|
-
|
3028
|
-
llama_sample_temperature(ctx_ptr->ctx, &(cnd_ptr->array), temperature);
|
3029
|
-
|
3030
|
-
return Qnil;
|
3031
|
-
}
|
3032
|
-
|
3033
2887
|
static VALUE _llama_context_sample_token_mirostat(int argc, VALUE* argv, VALUE self) {
|
3034
2888
|
VALUE kw_args = Qnil;
|
3035
2889
|
ID kw_table[4] = { rb_intern("tau"), rb_intern("eta"), rb_intern("m"), rb_intern("mu") };
|
@@ -3307,16 +3161,6 @@ static VALUE rb_llama_time_us(VALUE self) {
|
|
3307
3161
|
return LONG2NUM(llama_time_us());
|
3308
3162
|
}
|
3309
3163
|
|
3310
|
-
static VALUE rb_llama_mmap_supported(VALUE self) {
|
3311
|
-
rb_warn("mmap_supported? is deprecated. Use supports_mmap? instead.");
|
3312
|
-
return llama_mmap_supported() ? Qtrue : Qfalse;
|
3313
|
-
}
|
3314
|
-
|
3315
|
-
static VALUE rb_llama_mlock_supported(VALUE self) {
|
3316
|
-
rb_warn("mlock_supported? is deprecated. Use supports_mlock? instead.");
|
3317
|
-
return llama_mlock_supported() ? Qtrue : Qfalse;
|
3318
|
-
}
|
3319
|
-
|
3320
3164
|
static VALUE rb_llama_max_devices(VALUE self) {
|
3321
3165
|
return SIZET2NUM(llama_max_devices());
|
3322
3166
|
}
|
@@ -3355,8 +3199,6 @@ extern "C" void Init_llama_cpp(void) {
|
|
3355
3199
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
3356
3200
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
3357
3201
|
rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
|
3358
|
-
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
3359
|
-
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
3360
3202
|
rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
|
3361
3203
|
rb_define_module_function(rb_mLLaMACpp, "supports_mmap?", rb_llama_supports_mmap, 0);
|
3362
3204
|
rb_define_module_function(rb_mLLaMACpp, "supports_mlock?", rb_llama_supports_mlock, 0);
|
@@ -3394,16 +3236,16 @@ extern "C" void Init_llama_cpp(void) {
|
|
3394
3236
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS));
|
3395
3237
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
|
3396
3238
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
|
3397
|
-
rb_define_const(rb_mLLaMACpp, "
|
3239
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XS));
|
3398
3240
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS));
|
3399
3241
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ1_S", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_S));
|
3400
3242
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ4_NL", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_NL));
|
3401
3243
|
|
3402
3244
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
3403
3245
|
|
3404
|
-
rb_define_const(rb_mLLaMACpp, "
|
3405
|
-
rb_define_const(rb_mLLaMACpp, "
|
3406
|
-
rb_define_const(rb_mLLaMACpp, "
|
3246
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_INT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT));
|
3247
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT));
|
3248
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL));
|
3407
3249
|
|
3408
3250
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
|
3409
3251
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
|
@@ -3413,19 +3255,20 @@ extern "C" void Init_llama_cpp(void) {
|
|
3413
3255
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
3414
3256
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
3415
3257
|
|
3416
|
-
rb_define_const(rb_mLLaMACpp, "
|
3417
|
-
rb_define_const(rb_mLLaMACpp, "
|
3418
|
-
rb_define_const(rb_mLLaMACpp, "
|
3419
|
-
rb_define_const(rb_mLLaMACpp, "
|
3420
|
-
rb_define_const(rb_mLLaMACpp, "
|
3258
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
|
3259
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
|
3260
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_TYPE_LINEAR));
|
3261
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_YARN", INT2NUM(LLAMA_ROPE_SCALING_TYPE_YARN));
|
3262
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_MAX_VALUE));
|
3421
3263
|
|
3422
|
-
rb_define_const(rb_mLLaMACpp, "
|
3423
|
-
rb_define_const(rb_mLLaMACpp, "
|
3424
|
-
rb_define_const(rb_mLLaMACpp, "
|
3264
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_POOLING_TYPE_UNSPECIFIED));
|
3265
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
|
3266
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
|
3267
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
|
3425
3268
|
|
3426
|
-
rb_define_const(rb_mLLaMACpp, "
|
3427
|
-
rb_define_const(rb_mLLaMACpp, "
|
3428
|
-
rb_define_const(rb_mLLaMACpp, "
|
3269
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
3270
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
3271
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
3429
3272
|
|
3430
3273
|
std::stringstream ss_magic;
|
3431
3274
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.14.0'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b2361'
|
10
10
|
end
|