llama_cpp 0.12.7 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -952,6 +952,8 @@ public:
952
952
  rb_define_method(rb_cLLaMAContextParams, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads_batch), 0);
953
953
  rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_scaling_type), 1);
954
954
  rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
955
+ rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
956
+ rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
955
957
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
956
958
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
957
959
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
@@ -966,20 +968,18 @@ public:
966
968
  rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
967
969
  rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
968
970
  rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
971
+ rb_define_method(rb_cLLaMAContextParams, "defrag_thold=", RUBY_METHOD_FUNC(_llama_context_params_set_defrag_thold), 1);
972
+ rb_define_method(rb_cLLaMAContextParams, "defrag_thold", RUBY_METHOD_FUNC(_llama_context_params_get_defrag_thold), 0);
969
973
  rb_define_method(rb_cLLaMAContextParams, "type_k=", RUBY_METHOD_FUNC(_llama_context_params_set_type_k), 1);
970
974
  rb_define_method(rb_cLLaMAContextParams, "type_k", RUBY_METHOD_FUNC(_llama_context_params_get_type_k), 0);
971
975
  rb_define_method(rb_cLLaMAContextParams, "type_v=", RUBY_METHOD_FUNC(_llama_context_params_set_type_v), 1);
972
976
  rb_define_method(rb_cLLaMAContextParams, "type_v", RUBY_METHOD_FUNC(_llama_context_params_get_type_v), 0);
973
- rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
974
- rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
975
977
  rb_define_method(rb_cLLaMAContextParams, "logits_all=", RUBY_METHOD_FUNC(_llama_context_params_set_logits_all), 1);
976
978
  rb_define_method(rb_cLLaMAContextParams, "logits_all", RUBY_METHOD_FUNC(_llama_context_params_get_logits_all), 0);
977
- rb_define_method(rb_cLLaMAContextParams, "embedding=", RUBY_METHOD_FUNC(_llama_context_params_set_embedding), 1);
978
- rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
979
+ rb_define_method(rb_cLLaMAContextParams, "embeddings=", RUBY_METHOD_FUNC(_llama_context_params_set_embeddings), 1);
980
+ rb_define_method(rb_cLLaMAContextParams, "embeddings", RUBY_METHOD_FUNC(_llama_context_params_get_embeddings), 0);
979
981
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv=", RUBY_METHOD_FUNC(_llama_context_params_set_offload_kqv), 1);
980
982
  rb_define_method(rb_cLLaMAContextParams, "offload_kqv", RUBY_METHOD_FUNC(_llama_context_params_get_offload_kqv), 0);
981
- rb_define_method(rb_cLLaMAContextParams, "do_pooling=", RUBY_METHOD_FUNC(_llama_context_params_set_do_pooling), 1);
982
- rb_define_method(rb_cLLaMAContextParams, "do_pooling", RUBY_METHOD_FUNC(_llama_context_params_get_do_pooling), 0);
983
983
  }
984
984
 
985
985
  private:
@@ -1058,7 +1058,7 @@ private:
1058
1058
  // rope_scaling_type
1059
1059
  static VALUE _llama_context_params_set_rope_scaling_type(VALUE self, VALUE scaling_type) {
1060
1060
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1061
- ptr->params.rope_scaling_type = NUM2INT(scaling_type);
1061
+ ptr->params.rope_scaling_type = static_cast<enum llama_rope_scaling_type>(NUM2INT(scaling_type));
1062
1062
  return INT2NUM(ptr->params.rope_scaling_type);
1063
1063
  }
1064
1064
 
@@ -1067,6 +1067,18 @@ private:
1067
1067
  return INT2NUM(ptr->params.rope_scaling_type);
1068
1068
  }
1069
1069
 
1070
+ // pooling_type
1071
+ static VALUE _llama_context_params_set_pooling_type(VALUE self, VALUE scaling_type) {
1072
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1073
+ ptr->params.pooling_type = static_cast<enum llama_pooling_type>(NUM2INT(scaling_type));
1074
+ return INT2NUM(ptr->params.pooling_type);
1075
+ }
1076
+
1077
+ static VALUE _llama_context_params_get_pooling_type(VALUE self) {
1078
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1079
+ return INT2NUM(ptr->params.pooling_type);
1080
+ }
1081
+
1070
1082
  // rope_freq_base
1071
1083
  static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
1072
1084
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -1146,6 +1158,18 @@ private:
1146
1158
  return UINT2NUM(ptr->params.yarn_orig_ctx);
1147
1159
  }
1148
1160
 
1161
+ // defrag_thold
1162
+ static VALUE _llama_context_params_set_defrag_thold(VALUE self, VALUE defrag_thold) {
1163
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1164
+ ptr->params.defrag_thold = NUM2DBL(defrag_thold);
1165
+ return DBL2NUM(ptr->params.defrag_thold);
1166
+ }
1167
+
1168
+ static VALUE _llama_context_params_get_defrag_thold(VALUE self) {
1169
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1170
+ return DBL2NUM(ptr->params.defrag_thold);
1171
+ }
1172
+
1149
1173
  static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
1150
1174
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1151
1175
  return UINT2NUM(ptr->params.yarn_orig_ctx);
@@ -1175,18 +1199,6 @@ private:
1175
1199
  return INT2NUM(ptr->params.type_v);
1176
1200
  }
1177
1201
 
1178
- // mul_mat_q
1179
- static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
1180
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1181
- ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
1182
- return ptr->params.mul_mat_q ? Qtrue : Qfalse;
1183
- }
1184
-
1185
- static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
1186
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1187
- return ptr->params.mul_mat_q ? Qtrue : Qfalse;
1188
- }
1189
-
1190
1202
  // logits_all
1191
1203
  static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
1192
1204
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -1199,16 +1211,16 @@ private:
1199
1211
  return ptr->params.logits_all ? Qtrue : Qfalse;
1200
1212
  }
1201
1213
 
1202
- // embedding
1203
- static VALUE _llama_context_params_set_embedding(VALUE self, VALUE embedding) {
1214
+ // embeddings
1215
+ static VALUE _llama_context_params_set_embeddings(VALUE self, VALUE embeddings) {
1204
1216
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1205
- ptr->params.embedding = RTEST(embedding) ? true : false;
1206
- return ptr->params.embedding ? Qtrue : Qfalse;
1217
+ ptr->params.embeddings = RTEST(embeddings) ? true : false;
1218
+ return ptr->params.embeddings ? Qtrue : Qfalse;
1207
1219
  }
1208
1220
 
1209
- static VALUE _llama_context_params_get_embedding(VALUE self) {
1221
+ static VALUE _llama_context_params_get_embeddings(VALUE self) {
1210
1222
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1211
- return ptr->params.embedding ? Qtrue : Qfalse;
1223
+ return ptr->params.embeddings ? Qtrue : Qfalse;
1212
1224
  }
1213
1225
 
1214
1226
  // offload_kqv
@@ -1222,18 +1234,6 @@ private:
1222
1234
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1223
1235
  return ptr->params.offload_kqv ? Qtrue : Qfalse;
1224
1236
  }
1225
-
1226
- // do_pooling
1227
- static VALUE _llama_context_params_set_do_pooling(VALUE self, VALUE do_pooling) {
1228
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1229
- ptr->params.do_pooling = RTEST(do_pooling) ? true : false;
1230
- return ptr->params.do_pooling ? Qtrue : Qfalse;
1231
- }
1232
-
1233
- static VALUE _llama_context_params_get_do_pooling(VALUE self) {
1234
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1235
- return ptr->params.do_pooling ? Qtrue : Qfalse;
1236
- }
1237
1237
  };
1238
1238
 
1239
1239
  const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
@@ -1433,7 +1433,8 @@ public:
1433
1433
  rb_define_method(rb_cLLaMAModel, "empty?", RUBY_METHOD_FUNC(_llama_model_empty), 0);
1434
1434
  rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
1435
1435
  rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
1436
- rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
1436
+ rb_define_method(rb_cLLaMAModel, "vocab_type", RUBY_METHOD_FUNC(_llama_model_get_model_vocab_type), 0);
1437
+ rb_define_method(rb_cLLaMAModel, "rope_type", RUBY_METHOD_FUNC(_llama_model_get_model_rope_type), 0);
1437
1438
  rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
1438
1439
  rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
1439
1440
  rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
@@ -1559,41 +1560,14 @@ private:
1559
1560
  return Qnil;
1560
1561
  }
1561
1562
 
1562
- static VALUE _llama_model_apply_lora_from_file(int argc, VALUE* argv, VALUE self) {
1563
- VALUE kw_args = Qnil;
1564
- ID kw_table[4] = { rb_intern("lora_path"), rb_intern("base_model_path"), rb_intern("n_threads"), rb_intern("scale") };
1565
- VALUE kw_values[4] = { Qundef, Qundef, Qundef, Qundef };
1566
- rb_scan_args(argc, argv, ":", &kw_args);
1567
- rb_get_kwargs(kw_args, kw_table, 1, 3, kw_values);
1568
-
1569
- if (!RB_TYPE_P(kw_values[0], T_STRING)) {
1570
- rb_raise(rb_eArgError, "lora_path must be a string");
1571
- return Qnil;
1572
- }
1573
- if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
1574
- rb_raise(rb_eArgError, "base_model_path must be a string");
1575
- return Qnil;
1576
- }
1577
- if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
1578
- rb_raise(rb_eArgError, "n_threads must be an integer");
1579
- return Qnil;
1580
- }
1581
- if (kw_values[3] != Qundef && !RB_FLOAT_TYPE_P(kw_values[3])) {
1582
- rb_raise(rb_eArgError, "scale must be a float");
1583
- return Qnil;
1584
- }
1585
-
1586
- const char* lora_path = StringValueCStr(kw_values[0]);
1587
- const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
1588
- const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
1589
- const float scale = kw_values[3] == Qundef ? 1.0 : NUM2DBL(kw_values[3]);
1563
+ static VALUE _llama_model_get_model_vocab_type(VALUE self) {
1564
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1565
+ return INT2NUM(llama_vocab_type(ptr->model));
1566
+ }
1590
1567
 
1568
+ static VALUE _llama_model_get_model_rope_type(VALUE self) {
1591
1569
  LLaMAModelWrapper* ptr = get_llama_model(self);
1592
- if (llama_model_apply_lora_from_file(ptr->model, lora_path, scale, base_model_path, n_threads) != 0) {
1593
- rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
1594
- return Qnil;
1595
- }
1596
- return Qnil;
1570
+ return INT2NUM(llama_rope_type(ptr->model));
1597
1571
  }
1598
1572
 
1599
1573
  static VALUE _llama_model_get_model_n_vocab(VALUE self) {
@@ -2038,12 +2012,11 @@ public:
2038
2012
  rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
2039
2013
  rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
2040
2014
  rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
2041
- rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
2042
- rb_define_method(rb_cLLaMAContext, "eval_embd", RUBY_METHOD_FUNC(_llama_context_eval_embd), -1);
2043
2015
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2044
2016
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2045
2017
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
2046
2018
  rb_define_method(rb_cLLaMAContext, "embeddings_ith", RUBY_METHOD_FUNC(_llama_context_embeddings_ith), 1);
2019
+ rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
2047
2020
  rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
2048
2021
  rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
2049
2022
  rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
@@ -2054,14 +2027,16 @@ public:
2054
2027
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
2055
2028
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
2056
2029
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
2057
- rb_define_method(rb_cLLaMAContext, "kv_cache_seq_shift", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_shift), 4);
2030
+ rb_define_method(rb_cLLaMAContext, "kv_cache_seq_add", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_add), 4);
2058
2031
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_div", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_div), 4);
2032
+ rb_define_method(rb_cLLaMAContext, "kv_cache_seq_pos_max", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_pos_max), 1);
2033
+ rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_defrag", RUBY_METHOD_FUNC(_llama_context_kv_cache_defrag), 0);
2034
+ rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_update", RUBY_METHOD_FUNC(_llama_context_kv_cache_update), 0);
2059
2035
  rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
2060
2036
  rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
2061
2037
  rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
2062
2038
  rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
2063
2039
  rb_define_method(rb_cLLaMAContext, "sample_apply_guidance", RUBY_METHOD_FUNC(_llama_context_sample_apply_guidance), -1);
2064
- rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
2065
2040
  rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
2066
2041
  rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
2067
2042
  rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
@@ -2070,7 +2045,6 @@ public:
2070
2045
  rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
2071
2046
  rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
2072
2047
  rb_define_method(rb_cLLaMAContext, "sample_entropy", RUBY_METHOD_FUNC(_llama_context_sample_entropy), -1);
2073
- rb_define_method(rb_cLLaMAContext, "sample_temperature", RUBY_METHOD_FUNC(_llama_context_sample_temperature), -1);
2074
2048
  rb_define_method(rb_cLLaMAContext, "sample_token_mirostat", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat), -1);
2075
2049
  rb_define_method(rb_cLLaMAContext, "sample_token_mirostat_v2", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat_v2), -1);
2076
2050
  rb_define_method(rb_cLLaMAContext, "sample_token_greedy", RUBY_METHOD_FUNC(_llama_context_sample_token_greedy), 1);
@@ -2122,110 +2096,6 @@ private:
2122
2096
  return Qnil;
2123
2097
  }
2124
2098
 
2125
- static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
2126
- VALUE kw_args = Qnil;
2127
- ID kw_table[3] = { rb_intern("tokens"), rb_intern("n_past"), rb_intern("n_tokens") };
2128
- VALUE kw_values[3] = { Qundef, Qundef, Qundef };
2129
- rb_scan_args(argc, argv, ":", &kw_args);
2130
- rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
2131
-
2132
- rb_warn("eval is deprecated. Use decode instead.");
2133
-
2134
- if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2135
- rb_raise(rb_eArgError, "tokens must be an Array");
2136
- return Qnil;
2137
- }
2138
- if (!RB_INTEGER_TYPE_P(kw_values[1])) {
2139
- rb_raise(rb_eArgError, "n_past must be an integer");
2140
- return Qnil;
2141
- }
2142
- if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
2143
- rb_raise(rb_eArgError, "n_tokens must be an integer");
2144
- return Qnil;
2145
- }
2146
-
2147
- const size_t tokens_len = RARRAY_LEN(kw_values[0]);
2148
- std::vector<llama_token> embd(tokens_len);
2149
- for (size_t i = 0; i < tokens_len; i++) {
2150
- VALUE token = rb_ary_entry(kw_values[0], i);
2151
- if (!RB_INTEGER_TYPE_P(token)) {
2152
- rb_raise(rb_eArgError, "tokens must be an array of integers");
2153
- return Qnil;
2154
- }
2155
- embd[i] = NUM2INT(token);
2156
- }
2157
-
2158
- const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
2159
- const int n_past = NUM2INT(kw_values[1]);
2160
-
2161
- LLaMAContextWrapper* ptr = get_llama_context(self);
2162
- if (ptr->ctx == NULL) {
2163
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2164
- return Qnil;
2165
- }
2166
- if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
2167
- rb_raise(rb_eRuntimeError, "Failed to evaluate");
2168
- return Qnil;
2169
- }
2170
-
2171
- rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
2172
- rb_iv_set(self, "@has_evaluated", Qtrue);
2173
-
2174
- return Qnil;
2175
- }
2176
-
2177
- static VALUE _llama_context_eval_embd(int argc, VALUE* argv, VALUE self) {
2178
- VALUE kw_args = Qnil;
2179
- ID kw_table[3] = { rb_intern("embd"), rb_intern("n_past"), rb_intern("n_tokens") };
2180
- VALUE kw_values[3] = { Qundef, Qundef, Qundef };
2181
- rb_scan_args(argc, argv, ":", &kw_args);
2182
- rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
2183
-
2184
- rb_warn("eval_embd is deprecated. Use decode instead.");
2185
-
2186
- if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2187
- rb_raise(rb_eArgError, "tokens must be an Array");
2188
- return Qnil;
2189
- }
2190
- if (!RB_INTEGER_TYPE_P(kw_values[1])) {
2191
- rb_raise(rb_eArgError, "n_past must be an integer");
2192
- return Qnil;
2193
- }
2194
- if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
2195
- rb_raise(rb_eArgError, "n_tokens must be an integer");
2196
- return Qnil;
2197
- }
2198
-
2199
- const size_t tokens_len = RARRAY_LEN(kw_values[0]);
2200
- std::vector<float> embd(tokens_len);
2201
- for (size_t i = 0; i < tokens_len; i++) {
2202
- VALUE el = rb_ary_entry(kw_values[0], i);
2203
- if (!RB_FLOAT_TYPE_P(el)) {
2204
- rb_raise(rb_eArgError, "embd must be an array of floats");
2205
- return Qnil;
2206
- }
2207
- embd[i] = NUM2DBL(el);
2208
- }
2209
-
2210
- const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
2211
- const int n_past = NUM2INT(kw_values[1]);
2212
-
2213
- LLaMAContextWrapper* ptr = get_llama_context(self);
2214
- if (ptr->ctx == NULL) {
2215
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2216
- return Qnil;
2217
- }
2218
- if (llama_eval_embd(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
2219
- rb_raise(rb_eRuntimeError, "Failed to evaluate");
2220
- return Qnil;
2221
- }
2222
-
2223
- rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
2224
- rb_iv_set(self, "@has_evaluated", Qtrue);
2225
-
2226
- return Qnil;
2227
- }
2228
-
2229
2099
  static VALUE _llama_context_decode(VALUE self, VALUE batch) {
2230
2100
  LLaMAContextWrapper* ptr = get_llama_context(self);
2231
2101
  if (ptr->ctx == NULL) {
@@ -2282,7 +2152,7 @@ private:
2282
2152
  LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
2283
2153
  VALUE params = rb_iv_get(self, "@params");
2284
2154
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
2285
- if (!prms_ptr->params.embedding) {
2155
+ if (!prms_ptr->params.embeddings) {
2286
2156
  rb_raise(rb_eRuntimeError, "embedding parameter is false");
2287
2157
  return Qnil;
2288
2158
  }
@@ -2291,10 +2161,11 @@ private:
2291
2161
  return Qnil;
2292
2162
  }
2293
2163
 
2164
+ const int n_tokens = NUM2INT(rb_iv_get(self, "@n_tokens"));
2294
2165
  const int n_embd = llama_n_embd(model_ptr->model);
2295
2166
  const float* embd = llama_get_embeddings(ptr->ctx);
2296
2167
  VALUE output = rb_ary_new();
2297
- for (int i = 0; i < n_embd; i++) {
2168
+ for (int i = 0; i < n_tokens * n_embd; i++) {
2298
2169
  rb_ary_push(output, DBL2NUM((double)(embd[i])));
2299
2170
  }
2300
2171
 
@@ -2313,7 +2184,7 @@ private:
2313
2184
  }
2314
2185
  VALUE params = rb_iv_get(self, "@params");
2315
2186
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
2316
- if (!prms_ptr->params.embedding) {
2187
+ if (!prms_ptr->params.embeddings) {
2317
2188
  rb_raise(rb_eRuntimeError, "embedding parameter is false");
2318
2189
  return Qnil;
2319
2190
  }
@@ -2331,6 +2202,36 @@ private:
2331
2202
  return output;
2332
2203
  }
2333
2204
 
2205
+ static VALUE _llama_context_embeddings_seq(VALUE self, VALUE seq_id) {
2206
+ if (!RB_INTEGER_TYPE_P(seq_id)) {
2207
+ rb_raise(rb_eArgError, "seq_id must be an integer");
2208
+ return Qnil;
2209
+ }
2210
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2211
+ if (ptr->ctx == NULL) {
2212
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2213
+ return Qnil;
2214
+ }
2215
+ VALUE params = rb_iv_get(self, "@params");
2216
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(params);
2217
+ if (!prms_ptr->params.embeddings) {
2218
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
2219
+ return Qnil;
2220
+ }
2221
+
2222
+ VALUE model = rb_iv_get(self, "@model");
2223
+ LLaMAModelWrapper* model_ptr = RbLLaMAModel::get_llama_model(model);
2224
+ const int n_embd = llama_n_embd(model_ptr->model);
2225
+
2226
+ VALUE output = rb_ary_new();
2227
+ const float* embd = llama_get_embeddings_seq(ptr->ctx, NUM2INT(seq_id));
2228
+ for (int i = 0; i < n_embd; i++) {
2229
+ rb_ary_push(output, DBL2NUM((double)(embd[i])));
2230
+ }
2231
+
2232
+ return output;
2233
+ }
2234
+
2334
2235
  static VALUE _llama_context_n_ctx(VALUE self) {
2335
2236
  LLaMAContextWrapper* ptr = get_llama_context(self);
2336
2237
  if (ptr->ctx == NULL) {
@@ -2430,13 +2331,13 @@ private:
2430
2331
  return Qnil;
2431
2332
  }
2432
2333
 
2433
- static VALUE _llama_context_kv_cache_seq_shift(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
2334
+ static VALUE _llama_context_kv_cache_seq_add(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
2434
2335
  LLaMAContextWrapper* ptr = get_llama_context(self);
2435
2336
  if (ptr->ctx == NULL) {
2436
2337
  rb_raise(rb_eArgError, "LLaMA context is not initialized");
2437
2338
  return Qnil;
2438
2339
  }
2439
- llama_kv_cache_seq_shift(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
2340
+ llama_kv_cache_seq_add(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
2440
2341
  return Qnil;
2441
2342
  }
2442
2343
 
@@ -2450,6 +2351,35 @@ private:
2450
2351
  return Qnil;
2451
2352
  }
2452
2353
 
2354
+ static VALUE _llama_context_kv_cache_seq_pos_max(VALUE self, VALUE seq_id) {
2355
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2356
+ if (ptr->ctx == NULL) {
2357
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2358
+ return Qnil;
2359
+ }
2360
+ return INT2NUM(llama_kv_cache_seq_pos_max(ptr->ctx, NUM2INT(seq_id)));
2361
+ }
2362
+
2363
+ static VALUE _llama_context_kv_cache_defrag(VALUE self) {
2364
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2365
+ if (ptr->ctx == NULL) {
2366
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2367
+ return Qnil;
2368
+ }
2369
+ llama_kv_cache_defrag(ptr->ctx);
2370
+ return Qnil;
2371
+ }
2372
+
2373
+ static VALUE _llama_context_kv_cache_update(VALUE self) {
2374
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2375
+ if (ptr->ctx == NULL) {
2376
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2377
+ return Qnil;
2378
+ }
2379
+ llama_kv_cache_update(ptr->ctx);
2380
+ return Qnil;
2381
+ }
2382
+
2453
2383
  static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
2454
2384
  LLaMAContextWrapper* ptr = get_llama_context(self);
2455
2385
  if (ptr->ctx == NULL) {
@@ -2659,46 +2589,6 @@ private:
2659
2589
  return Qnil;
2660
2590
  }
2661
2591
 
2662
- static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
2663
- VALUE kw_args = Qnil;
2664
- ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
2665
- VALUE kw_values[2] = { Qundef, Qundef };
2666
- VALUE candidates = Qnil;
2667
- rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
2668
- rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
2669
-
2670
- if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
2671
- rb_raise(rb_eArgError, "guidance must be a Context");
2672
- return Qnil;
2673
- }
2674
- if (!RB_FLOAT_TYPE_P(kw_values[1])) {
2675
- rb_raise(rb_eArgError, "scale must be a float");
2676
- return Qnil;
2677
- }
2678
-
2679
- LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
2680
- if (ctx_ptr->ctx == NULL) {
2681
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2682
- return Qnil;
2683
- }
2684
- LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
2685
- if (cnd_ptr->array.data == nullptr) {
2686
- rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
2687
- return Qnil;
2688
- }
2689
-
2690
- LLaMAContextWrapper* guidance_ptr = get_llama_context(kw_values[0]);
2691
- if (guidance_ptr->ctx == NULL) {
2692
- rb_raise(rb_eRuntimeError, "guidance context is not initialized");
2693
- return Qnil;
2694
- }
2695
- const float scale = NUM2DBL(kw_values[1]);
2696
-
2697
- llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
2698
-
2699
- return Qnil;
2700
- }
2701
-
2702
2592
  static VALUE _llama_context_sample_softmax(VALUE self, VALUE candidates) {
2703
2593
  if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
2704
2594
  rb_raise(rb_eArgError, "argument must be a TokenDataArray");
@@ -2994,42 +2884,6 @@ private:
2994
2884
  return Qnil;
2995
2885
  }
2996
2886
 
2997
- static VALUE _llama_context_sample_temperature(int argc, VALUE* argv, VALUE self) {
2998
- VALUE kw_args = Qnil;
2999
- ID kw_table[1] = { rb_intern("temperature") };
3000
- VALUE kw_values[1] = { Qundef };
3001
- VALUE candidates = Qnil;
3002
- rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
3003
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
3004
-
3005
- rb_warn("sample_temperature is deprecated. Use sample_temp instead.");
3006
-
3007
- if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
3008
- rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
3009
- return Qnil;
3010
- }
3011
- if (!RB_FLOAT_TYPE_P(kw_values[0])) {
3012
- rb_raise(rb_eArgError, "temperature must be a float");
3013
- return Qnil;
3014
- }
3015
-
3016
- LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
3017
- if (ctx_ptr->ctx == NULL) {
3018
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
3019
- return Qnil;
3020
- }
3021
- LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
3022
- if (cnd_ptr->array.data == nullptr) {
3023
- rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
3024
- return Qnil;
3025
- }
3026
- const float temperature = NUM2DBL(kw_values[0]);
3027
-
3028
- llama_sample_temperature(ctx_ptr->ctx, &(cnd_ptr->array), temperature);
3029
-
3030
- return Qnil;
3031
- }
3032
-
3033
2887
  static VALUE _llama_context_sample_token_mirostat(int argc, VALUE* argv, VALUE self) {
3034
2888
  VALUE kw_args = Qnil;
3035
2889
  ID kw_table[4] = { rb_intern("tau"), rb_intern("eta"), rb_intern("m"), rb_intern("mu") };
@@ -3307,16 +3161,6 @@ static VALUE rb_llama_time_us(VALUE self) {
3307
3161
  return LONG2NUM(llama_time_us());
3308
3162
  }
3309
3163
 
3310
- static VALUE rb_llama_mmap_supported(VALUE self) {
3311
- rb_warn("mmap_supported? is deprecated. Use supports_mmap? instead.");
3312
- return llama_mmap_supported() ? Qtrue : Qfalse;
3313
- }
3314
-
3315
- static VALUE rb_llama_mlock_supported(VALUE self) {
3316
- rb_warn("mlock_supported? is deprecated. Use supports_mlock? instead.");
3317
- return llama_mlock_supported() ? Qtrue : Qfalse;
3318
- }
3319
-
3320
3164
  static VALUE rb_llama_max_devices(VALUE self) {
3321
3165
  return SIZET2NUM(llama_max_devices());
3322
3166
  }
@@ -3355,8 +3199,6 @@ extern "C" void Init_llama_cpp(void) {
3355
3199
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
3356
3200
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
3357
3201
  rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
3358
- rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
3359
- rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
3360
3202
  rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
3361
3203
  rb_define_module_function(rb_mLLaMACpp, "supports_mmap?", rb_llama_supports_mmap, 0);
3362
3204
  rb_define_module_function(rb_mLLaMACpp, "supports_mlock?", rb_llama_supports_mlock, 0);
@@ -3394,16 +3236,16 @@ extern "C" void Init_llama_cpp(void) {
3394
3236
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS));
3395
3237
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
3396
3238
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
3397
- rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q3_K_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_XS));
3239
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XS));
3398
3240
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS));
3399
3241
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ1_S", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_S));
3400
3242
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ4_NL", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_NL));
3401
3243
 
3402
3244
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
3403
3245
 
3404
- rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_INT", INT2NUM(LLAMA_KV_OVERRIDE_INT));
3405
- rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_FLOAT));
3406
- rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_BOOL));
3246
+ rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_INT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT));
3247
+ rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT));
3248
+ rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL));
3407
3249
 
3408
3250
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
3409
3251
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
@@ -3413,19 +3255,20 @@ extern "C" void Init_llama_cpp(void) {
3413
3255
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
3414
3256
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
3415
3257
 
3416
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_UNSPECIFIED));
3417
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_NONE", INT2NUM(LLAMA_ROPE_SCALING_NONE));
3418
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_LINEAR));
3419
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_YARN", INT2NUM(LLAMA_ROPE_SCALING_YARN));
3420
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_MAX_VALUE));
3258
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
3259
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
3260
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_TYPE_LINEAR));
3261
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_YARN", INT2NUM(LLAMA_ROPE_SCALING_TYPE_YARN));
3262
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_MAX_VALUE));
3421
3263
 
3422
- rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_NONE", INT2NUM(LLAMA_POOLING_NONE));
3423
- rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_MEAN", INT2NUM(LLAMA_POOLING_MEAN));
3424
- rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_CLS", INT2NUM(LLAMA_POOLING_CLS));
3264
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_POOLING_TYPE_UNSPECIFIED));
3265
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
3266
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
3267
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
3425
3268
 
3426
- rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_NONE", INT2NUM(LLAMA_SPLIT_NONE));
3427
- rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_LAYER", INT2NUM(LLAMA_SPLIT_LAYER));
3428
- rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_ROW", INT2NUM(LLAMA_SPLIT_ROW));
3269
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
3270
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
3271
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
3429
3272
 
3430
3273
  std::stringstream ss_magic;
3431
3274
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.7'
6
+ VERSION = '0.14.0'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2249'
9
+ LLAMA_CPP_VERSION = 'b2361'
10
10
  end