llama_cpp 0.12.7 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/ext/llama_cpp/llama_cpp.cpp +131 -288
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +29 -29
- data/vendor/tmp/llama.cpp/Makefile +10 -6
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +32 -23
- data/vendor/tmp/llama.cpp/ggml-backend.h +17 -16
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +949 -168
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +9 -3
- data/vendor/tmp/llama.cpp/ggml-metal.m +159 -22
- data/vendor/tmp/llama.cpp/ggml-metal.metal +1195 -139
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +27 -27
- data/vendor/tmp/llama.cpp/ggml-quants.c +1971 -271
- data/vendor/tmp/llama.cpp/ggml-quants.h +52 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +3586 -1201
- data/vendor/tmp/llama.cpp/ggml-sycl.h +5 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +39336 -43461
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1391 -825
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +1 -0
- data/vendor/tmp/llama.cpp/ggml.c +545 -210
- data/vendor/tmp/llama.cpp/ggml.h +65 -23
- data/vendor/tmp/llama.cpp/llama.cpp +1458 -763
- data/vendor/tmp/llama.cpp/llama.h +81 -75
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -952,6 +952,8 @@ public:
|
|
952
952
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads_batch), 0);
|
953
953
|
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_scaling_type), 1);
|
954
954
|
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
|
955
|
+
rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
|
956
|
+
rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
|
955
957
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
|
956
958
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
|
957
959
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
|
@@ -966,20 +968,18 @@ public:
|
|
966
968
|
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
|
967
969
|
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
|
968
970
|
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
|
971
|
+
rb_define_method(rb_cLLaMAContextParams, "defrag_thold=", RUBY_METHOD_FUNC(_llama_context_params_set_defrag_thold), 1);
|
972
|
+
rb_define_method(rb_cLLaMAContextParams, "defrag_thold", RUBY_METHOD_FUNC(_llama_context_params_get_defrag_thold), 0);
|
969
973
|
rb_define_method(rb_cLLaMAContextParams, "type_k=", RUBY_METHOD_FUNC(_llama_context_params_set_type_k), 1);
|
970
974
|
rb_define_method(rb_cLLaMAContextParams, "type_k", RUBY_METHOD_FUNC(_llama_context_params_get_type_k), 0);
|
971
975
|
rb_define_method(rb_cLLaMAContextParams, "type_v=", RUBY_METHOD_FUNC(_llama_context_params_set_type_v), 1);
|
972
976
|
rb_define_method(rb_cLLaMAContextParams, "type_v", RUBY_METHOD_FUNC(_llama_context_params_get_type_v), 0);
|
973
|
-
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
974
|
-
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
975
977
|
rb_define_method(rb_cLLaMAContextParams, "logits_all=", RUBY_METHOD_FUNC(_llama_context_params_set_logits_all), 1);
|
976
978
|
rb_define_method(rb_cLLaMAContextParams, "logits_all", RUBY_METHOD_FUNC(_llama_context_params_get_logits_all), 0);
|
977
|
-
rb_define_method(rb_cLLaMAContextParams, "
|
978
|
-
rb_define_method(rb_cLLaMAContextParams, "
|
979
|
+
rb_define_method(rb_cLLaMAContextParams, "embeddings=", RUBY_METHOD_FUNC(_llama_context_params_set_embeddings), 1);
|
980
|
+
rb_define_method(rb_cLLaMAContextParams, "embeddings", RUBY_METHOD_FUNC(_llama_context_params_get_embeddings), 0);
|
979
981
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
|
980
982
|
rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
|
981
|
-
rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
|
982
|
-
rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
|
983
983
|
}
|
984
984
|
|
985
985
|
private:
|
@@ -1058,7 +1058,7 @@ private:
|
|
1058
1058
|
// rope_scaling_type
|
1059
1059
|
static VALUE _llama_context_params_set_rope_scaling_type(VALUE self, VALUE scaling_type) {
|
1060
1060
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1061
|
-
ptr->params.rope_scaling_type = NUM2INT(scaling_type);
|
1061
|
+
ptr->params.rope_scaling_type = static_cast<enum llama_rope_scaling_type>(NUM2INT(scaling_type));
|
1062
1062
|
return INT2NUM(ptr->params.rope_scaling_type);
|
1063
1063
|
}
|
1064
1064
|
|
@@ -1067,6 +1067,18 @@ private:
|
|
1067
1067
|
return INT2NUM(ptr->params.rope_scaling_type);
|
1068
1068
|
}
|
1069
1069
|
|
1070
|
+
// pooling_type
|
1071
|
+
static VALUE _llama_context_params_set_pooling_type(VALUE self, VALUE scaling_type) {
|
1072
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1073
|
+
ptr->params.pooling_type = static_cast<enum llama_pooling_type>(NUM2INT(scaling_type));
|
1074
|
+
return INT2NUM(ptr->params.pooling_type);
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
static VALUE _llama_context_params_get_pooling_type(VALUE self) {
|
1078
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1079
|
+
return INT2NUM(ptr->params.pooling_type);
|
1080
|
+
}
|
1081
|
+
|
1070
1082
|
// rope_freq_base
|
1071
1083
|
static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
|
1072
1084
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1146,6 +1158,18 @@ private:
|
|
1146
1158
|
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
1147
1159
|
}
|
1148
1160
|
|
1161
|
+
// defrag_thold
|
1162
|
+
static VALUE _llama_context_params_set_defrag_thold(VALUE self, VALUE defrag_thold) {
|
1163
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1164
|
+
ptr->params.defrag_thold = NUM2DBL(defrag_thold);
|
1165
|
+
return DBL2NUM(ptr->params.defrag_thold);
|
1166
|
+
}
|
1167
|
+
|
1168
|
+
static VALUE _llama_context_params_get_defrag_thold(VALUE self) {
|
1169
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1170
|
+
return DBL2NUM(ptr->params.defrag_thold);
|
1171
|
+
}
|
1172
|
+
|
1149
1173
|
static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
|
1150
1174
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1151
1175
|
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
@@ -1175,18 +1199,6 @@ private:
|
|
1175
1199
|
return INT2NUM(ptr->params.type_v);
|
1176
1200
|
}
|
1177
1201
|
|
1178
|
-
// mul_mat_q
|
1179
|
-
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
1180
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1181
|
-
ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
|
1182
|
-
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
1183
|
-
}
|
1184
|
-
|
1185
|
-
static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
|
1186
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1187
|
-
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
1188
|
-
}
|
1189
|
-
|
1190
1202
|
// logits_all
|
1191
1203
|
static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
|
1192
1204
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1199,16 +1211,16 @@ private:
|
|
1199
1211
|
return ptr->params.logits_all ? Qtrue : Qfalse;
|
1200
1212
|
}
|
1201
1213
|
|
1202
|
-
//
|
1203
|
-
static VALUE
|
1214
|
+
// embeddings
|
1215
|
+
static VALUE _llama_context_params_set_embeddings(VALUE self, VALUE embeddings) {
|
1204
1216
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1205
|
-
ptr->params.
|
1206
|
-
return ptr->params.
|
1217
|
+
ptr->params.embeddings = RTEST(embeddings) ? true : false;
|
1218
|
+
return ptr->params.embeddings ? Qtrue : Qfalse;
|
1207
1219
|
}
|
1208
1220
|
|
1209
|
-
static VALUE
|
1221
|
+
static VALUE _llama_context_params_get_embeddings(VALUE self) {
|
1210
1222
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1211
|
-
return ptr->params.
|
1223
|
+
return ptr->params.embeddings ? Qtrue : Qfalse;
|
1212
1224
|
}
|
1213
1225
|
|
1214
1226
|
// offload_kqv
|
@@ -1222,18 +1234,6 @@ private:
|
|
1222
1234
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1223
1235
|
return ptr->params.offload_kqv ? Qtrue : Qfalse;
|
1224
1236
|
}
|
1225
|
-
|
1226
|
-
// do_pooling
|
1227
|
-
static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
|
1228
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1229
|
-
ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
|
1230
|
-
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1231
|
-
}
|
1232
|
-
|
1233
|
-
static VALUE _llama_context_params_get_do_pooling(VALUE self) {
|
1234
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1235
|
-
return ptr->params.do_pooling ? Qtrue : Qfalse;
|
1236
|
-
}
|
1237
1237
|
};
|
1238
1238
|
|
1239
1239
|
const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
|
@@ -1433,7 +1433,8 @@ public:
|
|
1433
1433
|
rb_define_method(rb_cLLaMAModel, "empty?", RUBY_METHOD_FUNC(_llama_model_empty), 0);
|
1434
1434
|
rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
|
1435
1435
|
rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
|
1436
|
-
rb_define_method(rb_cLLaMAModel, "
|
1436
|
+
rb_define_method(rb_cLLaMAModel, "vocab_type", RUBY_METHOD_FUNC(_llama_model_get_model_vocab_type), 0);
|
1437
|
+
rb_define_method(rb_cLLaMAModel, "rope_type", RUBY_METHOD_FUNC(_llama_model_get_model_rope_type), 0);
|
1437
1438
|
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
1438
1439
|
rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
|
1439
1440
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
@@ -1559,41 +1560,14 @@ private:
|
|
1559
1560
|
return Qnil;
|
1560
1561
|
}
|
1561
1562
|
|
1562
|
-
static VALUE
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
1567
|
-
rb_get_kwargs(kw_args, kw_table, 1, 3, kw_values);
|
1568
|
-
|
1569
|
-
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
1570
|
-
rb_raise(rb_eArgError, "lora_path must be a string");
|
1571
|
-
return Qnil;
|
1572
|
-
}
|
1573
|
-
if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
|
1574
|
-
rb_raise(rb_eArgError, "base_model_path must be a string");
|
1575
|
-
return Qnil;
|
1576
|
-
}
|
1577
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
1578
|
-
rb_raise(rb_eArgError, "n_threads must be an integer");
|
1579
|
-
return Qnil;
|
1580
|
-
}
|
1581
|
-
if (kw_values[3] != Qundef && !RB_FLOAT_TYPE_P(kw_values[3])) {
|
1582
|
-
rb_raise(rb_eArgError, "scale must be a float");
|
1583
|
-
return Qnil;
|
1584
|
-
}
|
1585
|
-
|
1586
|
-
const char* lora_path = StringValueCStr(kw_values[0]);
|
1587
|
-
const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
|
1588
|
-
const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
|
1589
|
-
const float scale = kw_values[3] == Qundef ? 1.0 : NUM2DBL(kw_values[3]);
|
1563
|
+
static VALUE _llama_model_get_model_vocab_type(VALUE self) {
|
1564
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1565
|
+
return INT2NUM(llama_vocab_type(ptr->model));
|
1566
|
+
}
|
1590
1567
|
|
1568
|
+
static VALUE _llama_model_get_model_rope_type(VALUE self) {
|
1591
1569
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1592
|
-
|
1593
|
-
rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
|
1594
|
-
return Qnil;
|
1595
|
-
}
|
1596
|
-
return Qnil;
|
1570
|
+
return INT2NUM(llama_rope_type(ptr->model));
|
1597
1571
|
}
|
1598
1572
|
|
1599
1573
|
static VALUE _llama_model_get_model_n_vocab(VALUE self) {
|
@@ -2038,12 +2012,11 @@ public:
|
|
2038
2012
|
rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
|
2039
2013
|
rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
|
2040
2014
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
2041
|
-
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
2042
|
-
rb_define_method(rb_cLLaMAContext, "eval_embd", RUBY_METHOD_FUNC(_llama_context_eval_embd), -1);
|
2043
2015
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
2044
2016
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
2045
2017
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
2046
2018
|
rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
|
2019
|
+
rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
|
2047
2020
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
2048
2021
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
2049
2022
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
@@ -2054,14 +2027,16 @@ public:
|
|
2054
2027
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
|
2055
2028
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
|
2056
2029
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
|
2057
|
-
rb_define_method(rb_cLLaMAContext, "
|
2030
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_add", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_add), 4);
|
2058
2031
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_div", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_div), 4);
|
2032
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_pos_max", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_pos_max), 1);
|
2033
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_defrag", RUBY_METHOD_FUNC(_llama_context_kv_cache_defrag), 0);
|
2034
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_update", RUBY_METHOD_FUNC(_llama_context_kv_cache_update), 0);
|
2059
2035
|
rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
|
2060
2036
|
rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
|
2061
2037
|
rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
|
2062
2038
|
rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
|
2063
2039
|
rb_define_method(rb_cLLaMAContext, "sample_apply_guidance", RUBY_METHOD_FUNC(_llama_context_sample_apply_guidance), -1);
|
2064
|
-
rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
|
2065
2040
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
2066
2041
|
rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
|
2067
2042
|
rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
|
@@ -2070,7 +2045,6 @@ public:
|
|
2070
2045
|
rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
|
2071
2046
|
rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
|
2072
2047
|
rb_define_method(rb_cLLaMAContext, "sample_entropy", RUBY_METHOD_FUNC(_llama_context_sample_entropy), -1);
|
2073
|
-
rb_define_method(rb_cLLaMAContext, "sample_temperature", RUBY_METHOD_FUNC(_llama_context_sample_temperature), -1);
|
2074
2048
|
rb_define_method(rb_cLLaMAContext, "sample_token_mirostat", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat), -1);
|
2075
2049
|
rb_define_method(rb_cLLaMAContext, "sample_token_mirostat_v2", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat_v2), -1);
|
2076
2050
|
rb_define_method(rb_cLLaMAContext, "sample_token_greedy", RUBY_METHOD_FUNC(_llama_context_sample_token_greedy), 1);
|
@@ -2122,110 +2096,6 @@ private:
|
|
2122
2096
|
return Qnil;
|
2123
2097
|
}
|
2124
2098
|
|
2125
|
-
static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
|
2126
|
-
VALUE kw_args = Qnil;
|
2127
|
-
ID kw_table[3] = { rb_intern("tokens"), rb_intern("n_past"), rb_intern("n_tokens") };
|
2128
|
-
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
2129
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
2130
|
-
rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
|
2131
|
-
|
2132
|
-
rb_warn("eval is deprecated. Use decode instead.");
|
2133
|
-
|
2134
|
-
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
2135
|
-
rb_raise(rb_eArgError, "tokens must be an Array");
|
2136
|
-
return Qnil;
|
2137
|
-
}
|
2138
|
-
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
2139
|
-
rb_raise(rb_eArgError, "n_past must be an integer");
|
2140
|
-
return Qnil;
|
2141
|
-
}
|
2142
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
2143
|
-
rb_raise(rb_eArgError, "n_tokens must be an integer");
|
2144
|
-
return Qnil;
|
2145
|
-
}
|
2146
|
-
|
2147
|
-
const size_t tokens_len = RARRAY_LEN(kw_values[0]);
|
2148
|
-
std::vector<llama_token> embd(tokens_len);
|
2149
|
-
for (size_t i = 0; i < tokens_len; i++) {
|
2150
|
-
VALUE token = rb_ary_entry(kw_values[0], i);
|
2151
|
-
if (!RB_INTEGER_TYPE_P(token)) {
|
2152
|
-
rb_raise(rb_eArgError, "tokens must be an array of integers");
|
2153
|
-
return Qnil;
|
2154
|
-
}
|
2155
|
-
embd[i] = NUM2INT(token);
|
2156
|
-
}
|
2157
|
-
|
2158
|
-
const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
|
2159
|
-
const int n_past = NUM2INT(kw_values[1]);
|
2160
|
-
|
2161
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2162
|
-
if (ptr->ctx == NULL) {
|
2163
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2164
|
-
return Qnil;
|
2165
|
-
}
|
2166
|
-
if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
|
2167
|
-
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
2168
|
-
return Qnil;
|
2169
|
-
}
|
2170
|
-
|
2171
|
-
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
2172
|
-
rb_iv_set(self, "@has_evaluated", Qtrue);
|
2173
|
-
|
2174
|
-
return Qnil;
|
2175
|
-
}
|
2176
|
-
|
2177
|
-
static VALUE _llama_context_eval_embd(int argc, VALUE* argv, VALUE self) {
|
2178
|
-
VALUE kw_args = Qnil;
|
2179
|
-
ID kw_table[3] = { rb_intern("embd"), rb_intern("n_past"), rb_intern("n_tokens") };
|
2180
|
-
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
2181
|
-
rb_scan_args(argc, argv, ":", &kw_args);
|
2182
|
-
rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
|
2183
|
-
|
2184
|
-
rb_warn("eval_embd is deprecated. Use decode instead.");
|
2185
|
-
|
2186
|
-
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
2187
|
-
rb_raise(rb_eArgError, "tokens must be an Array");
|
2188
|
-
return Qnil;
|
2189
|
-
}
|
2190
|
-
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
2191
|
-
rb_raise(rb_eArgError, "n_past must be an integer");
|
2192
|
-
return Qnil;
|
2193
|
-
}
|
2194
|
-
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
2195
|
-
rb_raise(rb_eArgError, "n_tokens must be an integer");
|
2196
|
-
return Qnil;
|
2197
|
-
}
|
2198
|
-
|
2199
|
-
const size_t tokens_len = RARRAY_LEN(kw_values[0]);
|
2200
|
-
std::vector<float> embd(tokens_len);
|
2201
|
-
for (size_t i = 0; i < tokens_len; i++) {
|
2202
|
-
VALUE el = rb_ary_entry(kw_values[0], i);
|
2203
|
-
if (!RB_FLOAT_TYPE_P(el)) {
|
2204
|
-
rb_raise(rb_eArgError, "embd must be an array of floats");
|
2205
|
-
return Qnil;
|
2206
|
-
}
|
2207
|
-
embd[i] = NUM2DBL(el);
|
2208
|
-
}
|
2209
|
-
|
2210
|
-
const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
|
2211
|
-
const int n_past = NUM2INT(kw_values[1]);
|
2212
|
-
|
2213
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2214
|
-
if (ptr->ctx == NULL) {
|
2215
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2216
|
-
return Qnil;
|
2217
|
-
}
|
2218
|
-
if (llama_eval_embd(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
|
2219
|
-
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
2220
|
-
return Qnil;
|
2221
|
-
}
|
2222
|
-
|
2223
|
-
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
2224
|
-
rb_iv_set(self, "@has_evaluated", Qtrue);
|
2225
|
-
|
2226
|
-
return Qnil;
|
2227
|
-
}
|
2228
|
-
|
2229
2099
|
static VALUE _llama_context_decode(VALUE self, VALUE batch) {
|
2230
2100
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2231
2101
|
if (ptr->ctx == NULL) {
|
@@ -2282,7 +2152,7 @@ private:
|
|
2282
2152
|
LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
|
2283
2153
|
VALUE params = rb_iv_get(self, "@params");
|
2284
2154
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2285
|
-
if (!prms_ptr->params.
|
2155
|
+
if (!prms_ptr->params.embeddings) {
|
2286
2156
|
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2287
2157
|
return Qnil;
|
2288
2158
|
}
|
@@ -2291,10 +2161,11 @@ private:
|
|
2291
2161
|
return Qnil;
|
2292
2162
|
}
|
2293
2163
|
|
2164
|
+
const int n_tokens = NUM2INT(rb_iv_get(self, "@n_tokens"));
|
2294
2165
|
const int n_embd = llama_n_embd(model_ptr->model);
|
2295
2166
|
const float* embd = llama_get_embeddings(ptr->ctx);
|
2296
2167
|
VALUE output = rb_ary_new();
|
2297
|
-
for (int i = 0; i < n_embd; i++) {
|
2168
|
+
for (int i = 0; i < n_tokens * n_embd; i++) {
|
2298
2169
|
rb_ary_push(output, DBL2NUM((double)(embd[i])));
|
2299
2170
|
}
|
2300
2171
|
|
@@ -2313,7 +2184,7 @@ private:
|
|
2313
2184
|
}
|
2314
2185
|
VALUE params = rb_iv_get(self, "@params");
|
2315
2186
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2316
|
-
if (!prms_ptr->params.
|
2187
|
+
if (!prms_ptr->params.embeddings) {
|
2317
2188
|
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2318
2189
|
return Qnil;
|
2319
2190
|
}
|
@@ -2331,6 +2202,36 @@ private:
|
|
2331
2202
|
return output;
|
2332
2203
|
}
|
2333
2204
|
|
2205
|
+
static VALUE _llama_context_embeddings_seq(VALUE self, VALUE seq_id) {
|
2206
|
+
if (!RB_INTEGER_TYPE_P(seq_id)) {
|
2207
|
+
rb_raise(rb_eArgError, "seq_id must be an integer");
|
2208
|
+
return Qnil;
|
2209
|
+
}
|
2210
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2211
|
+
if (ptr->ctx == NULL) {
|
2212
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2213
|
+
return Qnil;
|
2214
|
+
}
|
2215
|
+
VALUE params = rb_iv_get(self, "@params");
|
2216
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
|
2217
|
+
if (!prms_ptr->params.embeddings) {
|
2218
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
2219
|
+
return Qnil;
|
2220
|
+
}
|
2221
|
+
|
2222
|
+
VALUE model = rb_iv_get(self, "@model");
|
2223
|
+
LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
|
2224
|
+
const int n_embd = llama_n_embd(model_ptr->model);
|
2225
|
+
|
2226
|
+
VALUE output = rb_ary_new();
|
2227
|
+
const float* embd = llama_get_embeddings_seq(ptr->ctx, NUM2INT(seq_id));
|
2228
|
+
for (int i = 0; i < n_embd; i++) {
|
2229
|
+
rb_ary_push(output, DBL2NUM((double)(embd[i])));
|
2230
|
+
}
|
2231
|
+
|
2232
|
+
return output;
|
2233
|
+
}
|
2234
|
+
|
2334
2235
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
2335
2236
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2336
2237
|
if (ptr->ctx == NULL) {
|
@@ -2430,13 +2331,13 @@ private:
|
|
2430
2331
|
return Qnil;
|
2431
2332
|
}
|
2432
2333
|
|
2433
|
-
static VALUE
|
2334
|
+
static VALUE _llama_context_kv_cache_seq_add(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
|
2434
2335
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2435
2336
|
if (ptr->ctx == NULL) {
|
2436
2337
|
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
2437
2338
|
return Qnil;
|
2438
2339
|
}
|
2439
|
-
|
2340
|
+
llama_kv_cache_seq_add(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
|
2440
2341
|
return Qnil;
|
2441
2342
|
}
|
2442
2343
|
|
@@ -2450,6 +2351,35 @@ private:
|
|
2450
2351
|
return Qnil;
|
2451
2352
|
}
|
2452
2353
|
|
2354
|
+
static VALUE _llama_context_kv_cache_seq_pos_max(VALUE self, VALUE seq_id) {
|
2355
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2356
|
+
if (ptr->ctx == NULL) {
|
2357
|
+
rb_raise(rb_eArgError, "LLaMA context is not initialized");
|
2358
|
+
return Qnil;
|
2359
|
+
}
|
2360
|
+
return INT2NUM(llama_kv_cache_seq_pos_max(ptr->ctx, NUM2INT(seq_id)));
|
2361
|
+
}
|
2362
|
+
|
2363
|
+
static VALUE _llama_context_kv_cache_defrag(VALUE self) {
|
2364
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2365
|
+
if (ptr->ctx == NULL) {
|
2366
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2367
|
+
return Qnil;
|
2368
|
+
}
|
2369
|
+
llama_kv_cache_defrag(ptr->ctx);
|
2370
|
+
return Qnil;
|
2371
|
+
}
|
2372
|
+
|
2373
|
+
static VALUE _llama_context_kv_cache_update(VALUE self) {
|
2374
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2375
|
+
if (ptr->ctx == NULL) {
|
2376
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2377
|
+
return Qnil;
|
2378
|
+
}
|
2379
|
+
llama_kv_cache_update(ptr->ctx);
|
2380
|
+
return Qnil;
|
2381
|
+
}
|
2382
|
+
|
2453
2383
|
static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
|
2454
2384
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2455
2385
|
if (ptr->ctx == NULL) {
|
@@ -2659,46 +2589,6 @@ private:
|
|
2659
2589
|
return Qnil;
|
2660
2590
|
}
|
2661
2591
|
|
2662
|
-
static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
|
2663
|
-
VALUE kw_args = Qnil;
|
2664
|
-
ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
|
2665
|
-
VALUE kw_values[2] = { Qundef, Qundef };
|
2666
|
-
VALUE candidates = Qnil;
|
2667
|
-
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
2668
|
-
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
2669
|
-
|
2670
|
-
if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
|
2671
|
-
rb_raise(rb_eArgError, "guidance must be a Context");
|
2672
|
-
return Qnil;
|
2673
|
-
}
|
2674
|
-
if (!RB_FLOAT_TYPE_P(kw_values[1])) {
|
2675
|
-
rb_raise(rb_eArgError, "scale must be a float");
|
2676
|
-
return Qnil;
|
2677
|
-
}
|
2678
|
-
|
2679
|
-
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
2680
|
-
if (ctx_ptr->ctx == NULL) {
|
2681
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2682
|
-
return Qnil;
|
2683
|
-
}
|
2684
|
-
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
2685
|
-
if (cnd_ptr->array.data == nullptr) {
|
2686
|
-
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
2687
|
-
return Qnil;
|
2688
|
-
}
|
2689
|
-
|
2690
|
-
LLaMAContextWrapper* guidance_ptr = get_llama_context(kw_values[0]);
|
2691
|
-
if (guidance_ptr->ctx == NULL) {
|
2692
|
-
rb_raise(rb_eRuntimeError, "guidance context is not initialized");
|
2693
|
-
return Qnil;
|
2694
|
-
}
|
2695
|
-
const float scale = NUM2DBL(kw_values[1]);
|
2696
|
-
|
2697
|
-
llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
|
2698
|
-
|
2699
|
-
return Qnil;
|
2700
|
-
}
|
2701
|
-
|
2702
2592
|
static VALUE _llama_context_sample_softmax(VALUE self, VALUE candidates) {
|
2703
2593
|
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
2704
2594
|
rb_raise(rb_eArgError, "argument must be a TokenDataArray");
|
@@ -2994,42 +2884,6 @@ private:
|
|
2994
2884
|
return Qnil;
|
2995
2885
|
}
|
2996
2886
|
|
2997
|
-
static VALUE _llama_context_sample_temperature(int argc, VALUE* argv, VALUE self) {
|
2998
|
-
VALUE kw_args = Qnil;
|
2999
|
-
ID kw_table[1] = { rb_intern("temperature") };
|
3000
|
-
VALUE kw_values[1] = { Qundef };
|
3001
|
-
VALUE candidates = Qnil;
|
3002
|
-
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
3003
|
-
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
3004
|
-
|
3005
|
-
rb_warn("sample_temperature is deprecated. Use sample_temp instead.");
|
3006
|
-
|
3007
|
-
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
3008
|
-
rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
|
3009
|
-
return Qnil;
|
3010
|
-
}
|
3011
|
-
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
3012
|
-
rb_raise(rb_eArgError, "temperature must be a float");
|
3013
|
-
return Qnil;
|
3014
|
-
}
|
3015
|
-
|
3016
|
-
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
3017
|
-
if (ctx_ptr->ctx == NULL) {
|
3018
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
3019
|
-
return Qnil;
|
3020
|
-
}
|
3021
|
-
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
3022
|
-
if (cnd_ptr->array.data == nullptr) {
|
3023
|
-
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
3024
|
-
return Qnil;
|
3025
|
-
}
|
3026
|
-
const float temperature = NUM2DBL(kw_values[0]);
|
3027
|
-
|
3028
|
-
llama_sample_temperature(ctx_ptr->ctx, &(cnd_ptr->array), temperature);
|
3029
|
-
|
3030
|
-
return Qnil;
|
3031
|
-
}
|
3032
|
-
|
3033
2887
|
static VALUE _llama_context_sample_token_mirostat(int argc, VALUE* argv, VALUE self) {
|
3034
2888
|
VALUE kw_args = Qnil;
|
3035
2889
|
ID kw_table[4] = { rb_intern("tau"), rb_intern("eta"), rb_intern("m"), rb_intern("mu") };
|
@@ -3307,16 +3161,6 @@ static VALUE rb_llama_time_us(VALUE self) {
|
|
3307
3161
|
return LONG2NUM(llama_time_us());
|
3308
3162
|
}
|
3309
3163
|
|
3310
|
-
static VALUE rb_llama_mmap_supported(VALUE self) {
|
3311
|
-
rb_warn("mmap_supported? is deprecated. Use supports_mmap? instead.");
|
3312
|
-
return llama_mmap_supported() ? Qtrue : Qfalse;
|
3313
|
-
}
|
3314
|
-
|
3315
|
-
static VALUE rb_llama_mlock_supported(VALUE self) {
|
3316
|
-
rb_warn("mlock_supported? is deprecated. Use supports_mlock? instead.");
|
3317
|
-
return llama_mlock_supported() ? Qtrue : Qfalse;
|
3318
|
-
}
|
3319
|
-
|
3320
3164
|
static VALUE rb_llama_max_devices(VALUE self) {
|
3321
3165
|
return SIZET2NUM(llama_max_devices());
|
3322
3166
|
}
|
@@ -3355,8 +3199,6 @@ extern "C" void Init_llama_cpp(void) {
|
|
3355
3199
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
3356
3200
|
rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
|
3357
3201
|
rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
|
3358
|
-
rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
|
3359
|
-
rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
|
3360
3202
|
rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
|
3361
3203
|
rb_define_module_function(rb_mLLaMACpp, "supports_mmap?", rb_llama_supports_mmap, 0);
|
3362
3204
|
rb_define_module_function(rb_mLLaMACpp, "supports_mlock?", rb_llama_supports_mlock, 0);
|
@@ -3394,16 +3236,16 @@ extern "C" void Init_llama_cpp(void) {
|
|
3394
3236
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS));
|
3395
3237
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
|
3396
3238
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
|
3397
|
-
rb_define_const(rb_mLLaMACpp, "
|
3239
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XS));
|
3398
3240
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS));
|
3399
3241
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ1_S", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_S));
|
3400
3242
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ4_NL", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_NL));
|
3401
3243
|
|
3402
3244
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
3403
3245
|
|
3404
|
-
rb_define_const(rb_mLLaMACpp, "
|
3405
|
-
rb_define_const(rb_mLLaMACpp, "
|
3406
|
-
rb_define_const(rb_mLLaMACpp, "
|
3246
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_INT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT));
|
3247
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT));
|
3248
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL));
|
3407
3249
|
|
3408
3250
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
|
3409
3251
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
|
@@ -3413,19 +3255,20 @@ extern "C" void Init_llama_cpp(void) {
|
|
3413
3255
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
3414
3256
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
3415
3257
|
|
3416
|
-
rb_define_const(rb_mLLaMACpp, "
|
3417
|
-
rb_define_const(rb_mLLaMACpp, "
|
3418
|
-
rb_define_const(rb_mLLaMACpp, "
|
3419
|
-
rb_define_const(rb_mLLaMACpp, "
|
3420
|
-
rb_define_const(rb_mLLaMACpp, "
|
3258
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
|
3259
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
|
3260
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_TYPE_LINEAR));
|
3261
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_YARN", INT2NUM(LLAMA_ROPE_SCALING_TYPE_YARN));
|
3262
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_MAX_VALUE));
|
3421
3263
|
|
3422
|
-
rb_define_const(rb_mLLaMACpp, "
|
3423
|
-
rb_define_const(rb_mLLaMACpp, "
|
3424
|
-
rb_define_const(rb_mLLaMACpp, "
|
3264
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_POOLING_TYPE_UNSPECIFIED));
|
3265
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
|
3266
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
|
3267
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
|
3425
3268
|
|
3426
|
-
rb_define_const(rb_mLLaMACpp, "
|
3427
|
-
rb_define_const(rb_mLLaMACpp, "
|
3428
|
-
rb_define_const(rb_mLLaMACpp, "
|
3269
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
3270
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
3271
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
3429
3272
|
|
3430
3273
|
std::stringstream ss_magic;
|
3431
3274
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.14.0'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b2361'
|
10
10
|
end
|