llama_cpp 0.12.6 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 296b29b7d20c7bfd66f69749ccd41e63d6998589af0d3514db8f6c08011d545f
4
- data.tar.gz: 48f8787a63759a95049bbc515f4b35c74d07b356f1635d751d8d9d852e386c5a
3
+ metadata.gz: 8e8d23f3abceeea388895f198a3906b7a24d692cba97e46934a14567450fc3a2
4
+ data.tar.gz: 9d1385671b76ea826fbc000910e102fbbb951970f77b7511fdf2653adbc97334
5
5
  SHA512:
6
- metadata.gz: 5cd4c284a31fcdd36565b481c2456545eaf3fe19fda3778121f26f529ca01d18a894ba73739d966dc29f5aa239f8784ed56801bac5db3d21ae13e5b5aa2b4012
7
- data.tar.gz: 7d03f1d081d097913fe3489a0432a5869a13e0a0371458c6c4d6cdea7296422a5af51c13ae05ea0d752e068865cc99e52ee0c4f3d67de892003c76e9126d5940
6
+ metadata.gz: 24746b8aaaa749b4058ddb64f6b07952356a6947ef1f40bc8bf7010a37b8b476e71632452ce28b6e61b11c66249a9d4fb6573de31e66e750bdb4391ce8f3286c
7
+ data.tar.gz: 56f79812ecdeecfc2dce6f68a73fc72d4495c6a51cc1d2ea7ccfeeb3e1ac9b6e72e78cbed019108e05987e431c4634bbfa1029f380f813a7fb6e009b5f6ec4e3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,24 @@
1
+ ## [[0.13.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.7...v0.13.0)] - 2024-03-02
2
+
3
+ - Bump bundled llama.cpp from b2143 to b2303.
4
+ - Remove deprecated methods:
5
+ - `map_supported?`, `mlock_supported?`, `apply_lora_from_file`, `eval`, `eval_embd`, `sample_classifier_free_guidance`, `sample_temperature`, and `mul_mat_q`.
6
+ - Rename some constants.
7
+ - Rename `kv_cache_seq_shift` method to `kv_cache_seq_add`.
8
+ - Add `defrag_thold` accessor to `ContextParams`.
9
+ - Add `vocab_type` and `rope_type` methods to `Model`.
10
+ - Add `kv_cache_seq_pos_max`, `kv_cache_defrag`, and `kv_cache_update` methods to `Context`.
11
+
12
+ ## [[0.12.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.6...v0.12.7)] - 2024-02-24
13
+
14
+ - Bump bundled llama.cpp from b2106 to b2143.
15
+ - Add constants for file type: `LLAMA_FTYPE_MOSTLY_IQ1_S` and `LLAMA_FTYPE_MOSTLY_IQ4_NL`.
16
+ - Add constants for pooling type: `LLAMA_POOLING_NONE`, `LLAMA_POOLING_MEAN`, and `LLAMA_POOLING_CLS`.
17
+ - Add `numa_init` module function to `LLaMACpp`.
18
+ - Remove unnecessary argument from `backend_init`.
19
+
20
+ Implementation of llama_chat_apply_template binding has been postponed for the time being.
21
+
1
22
  ## [[0.12.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.12.5...v0.12.6)] - 2024-02-17
2
23
 
3
24
  - Bump bundled llama.cpp from b2106 to b2143.
@@ -966,12 +966,12 @@ public:
966
966
  rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
967
967
  rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
968
968
  rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
969
+ rb_define_method(rb_cLLaMAContextParams, "defrag_thold=", RUBY_METHOD_FUNC(_llama_context_params_set_defrag_thold), 1);
970
+ rb_define_method(rb_cLLaMAContextParams, "defrag_thold", RUBY_METHOD_FUNC(_llama_context_params_get_defrag_thold), 0);
969
971
  rb_define_method(rb_cLLaMAContextParams, "type_k=", RUBY_METHOD_FUNC(_llama_context_params_set_type_k), 1);
970
972
  rb_define_method(rb_cLLaMAContextParams, "type_k", RUBY_METHOD_FUNC(_llama_context_params_get_type_k), 0);
971
973
  rb_define_method(rb_cLLaMAContextParams, "type_v=", RUBY_METHOD_FUNC(_llama_context_params_set_type_v), 1);
972
974
  rb_define_method(rb_cLLaMAContextParams, "type_v", RUBY_METHOD_FUNC(_llama_context_params_get_type_v), 0);
973
- rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
974
- rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
975
975
  rb_define_method(rb_cLLaMAContextParams, "logits_all=", RUBY_METHOD_FUNC(_llama_context_params_set_logits_all), 1);
976
976
  rb_define_method(rb_cLLaMAContextParams, "logits_all", RUBY_METHOD_FUNC(_llama_context_params_get_logits_all), 0);
977
977
  rb_define_method(rb_cLLaMAContextParams, "embedding=", RUBY_METHOD_FUNC(_llama_context_params_set_embedding), 1);
@@ -1146,6 +1146,18 @@ private:
1146
1146
  return UINT2NUM(ptr->params.yarn_orig_ctx);
1147
1147
  }
1148
1148
 
1149
+ // defrag_thold
1150
+ static VALUE _llama_context_params_set_defrag_thold(VALUE self, VALUE defrag_thold) {
1151
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1152
+ ptr->params.defrag_thold = NUM2DBL(defrag_thold);
1153
+ return DBL2NUM(ptr->params.defrag_thold);
1154
+ }
1155
+
1156
+ static VALUE _llama_context_params_get_defrag_thold(VALUE self) {
1157
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1158
+ return DBL2NUM(ptr->params.defrag_thold);
1159
+ }
1160
+
1149
1161
  static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
1150
1162
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1151
1163
  return UINT2NUM(ptr->params.yarn_orig_ctx);
@@ -1175,18 +1187,6 @@ private:
1175
1187
  return INT2NUM(ptr->params.type_v);
1176
1188
  }
1177
1189
 
1178
- // mul_mat_q
1179
- static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
1180
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1181
- ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
1182
- return ptr->params.mul_mat_q ? Qtrue : Qfalse;
1183
- }
1184
-
1185
- static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
1186
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1187
- return ptr->params.mul_mat_q ? Qtrue : Qfalse;
1188
- }
1189
-
1190
1190
  // logits_all
1191
1191
  static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
1192
1192
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -1433,7 +1433,8 @@ public:
1433
1433
  rb_define_method(rb_cLLaMAModel, "empty?", RUBY_METHOD_FUNC(_llama_model_empty), 0);
1434
1434
  rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
1435
1435
  rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
1436
- rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
1436
+ rb_define_method(rb_cLLaMAModel, "vocab_type", RUBY_METHOD_FUNC(_llama_model_get_model_vocab_type), 0);
1437
+ rb_define_method(rb_cLLaMAModel, "rope_type", RUBY_METHOD_FUNC(_llama_model_get_model_rope_type), 0);
1437
1438
  rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
1438
1439
  rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
1439
1440
  rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
@@ -1559,41 +1560,14 @@ private:
1559
1560
  return Qnil;
1560
1561
  }
1561
1562
 
1562
- static VALUE _llama_model_apply_lora_from_file(int argc, VALUE* argv, VALUE self) {
1563
- VALUE kw_args = Qnil;
1564
- ID kw_table[4] = { rb_intern("lora_path"), rb_intern("base_model_path"), rb_intern("n_threads"), rb_intern("scale") };
1565
- VALUE kw_values[4] = { Qundef, Qundef, Qundef, Qundef };
1566
- rb_scan_args(argc, argv, ":", &kw_args);
1567
- rb_get_kwargs(kw_args, kw_table, 1, 3, kw_values);
1568
-
1569
- if (!RB_TYPE_P(kw_values[0], T_STRING)) {
1570
- rb_raise(rb_eArgError, "lora_path must be a string");
1571
- return Qnil;
1572
- }
1573
- if (kw_values[1] != Qundef && !RB_TYPE_P(kw_values[1], T_STRING)) {
1574
- rb_raise(rb_eArgError, "base_model_path must be a string");
1575
- return Qnil;
1576
- }
1577
- if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
1578
- rb_raise(rb_eArgError, "n_threads must be an integer");
1579
- return Qnil;
1580
- }
1581
- if (kw_values[3] != Qundef && !RB_FLOAT_TYPE_P(kw_values[3])) {
1582
- rb_raise(rb_eArgError, "scale must be a float");
1583
- return Qnil;
1584
- }
1585
-
1586
- const char* lora_path = StringValueCStr(kw_values[0]);
1587
- const char* base_model_path = kw_values[1] == Qundef ? NULL : StringValueCStr(kw_values[1]);
1588
- const int n_threads = kw_values[2] == Qundef ? 1 : NUM2INT(kw_values[2]);
1589
- const float scale = kw_values[3] == Qundef ? 1.0 : NUM2DBL(kw_values[3]);
1563
+ static VALUE _llama_model_get_model_vocab_type(VALUE self) {
1564
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1565
+ return INT2NUM(llama_vocab_type(ptr->model));
1566
+ }
1590
1567
 
1568
+ static VALUE _llama_model_get_model_rope_type(VALUE self) {
1591
1569
  LLaMAModelWrapper* ptr = get_llama_model(self);
1592
- if (llama_model_apply_lora_from_file(ptr->model, lora_path, scale, base_model_path, n_threads) != 0) {
1593
- rb_raise(rb_eRuntimeError, "Failed to apply LoRA");
1594
- return Qnil;
1595
- }
1596
- return Qnil;
1570
+ return INT2NUM(llama_rope_type(ptr->model));
1597
1571
  }
1598
1572
 
1599
1573
  static VALUE _llama_model_get_model_n_vocab(VALUE self) {
@@ -2038,8 +2012,6 @@ public:
2038
2012
  rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
2039
2013
  rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
2040
2014
  rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
2041
- rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
2042
- rb_define_method(rb_cLLaMAContext, "eval_embd", RUBY_METHOD_FUNC(_llama_context_eval_embd), -1);
2043
2015
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2044
2016
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2045
2017
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
@@ -2054,14 +2026,16 @@ public:
2054
2026
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
2055
2027
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
2056
2028
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
2057
- rb_define_method(rb_cLLaMAContext, "kv_cache_seq_shift", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_shift), 4);
2029
+ rb_define_method(rb_cLLaMAContext, "kv_cache_seq_add", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_add), 4);
2058
2030
  rb_define_method(rb_cLLaMAContext, "kv_cache_seq_div", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_div), 4);
2031
+ rb_define_method(rb_cLLaMAContext, "kv_cache_seq_pos_max", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_pos_max), 1);
2032
+ rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_defrag", RUBY_METHOD_FUNC(_llama_context_kv_cache_defrag), 0);
2033
+ rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_update", RUBY_METHOD_FUNC(_llama_context_kv_cache_update), 0);
2059
2034
  rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
2060
2035
  rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
2061
2036
  rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
2062
2037
  rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
2063
2038
  rb_define_method(rb_cLLaMAContext, "sample_apply_guidance", RUBY_METHOD_FUNC(_llama_context_sample_apply_guidance), -1);
2064
- rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
2065
2039
  rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
2066
2040
  rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
2067
2041
  rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
@@ -2070,7 +2044,6 @@ public:
2070
2044
  rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
2071
2045
  rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
2072
2046
  rb_define_method(rb_cLLaMAContext, "sample_entropy", RUBY_METHOD_FUNC(_llama_context_sample_entropy), -1);
2073
- rb_define_method(rb_cLLaMAContext, "sample_temperature", RUBY_METHOD_FUNC(_llama_context_sample_temperature), -1);
2074
2047
  rb_define_method(rb_cLLaMAContext, "sample_token_mirostat", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat), -1);
2075
2048
  rb_define_method(rb_cLLaMAContext, "sample_token_mirostat_v2", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat_v2), -1);
2076
2049
  rb_define_method(rb_cLLaMAContext, "sample_token_greedy", RUBY_METHOD_FUNC(_llama_context_sample_token_greedy), 1);
@@ -2122,110 +2095,6 @@ private:
2122
2095
  return Qnil;
2123
2096
  }
2124
2097
 
2125
- static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
2126
- VALUE kw_args = Qnil;
2127
- ID kw_table[3] = { rb_intern("tokens"), rb_intern("n_past"), rb_intern("n_tokens") };
2128
- VALUE kw_values[3] = { Qundef, Qundef, Qundef };
2129
- rb_scan_args(argc, argv, ":", &kw_args);
2130
- rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
2131
-
2132
- rb_warn("eval is deprecated. Use decode instead.");
2133
-
2134
- if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2135
- rb_raise(rb_eArgError, "tokens must be an Array");
2136
- return Qnil;
2137
- }
2138
- if (!RB_INTEGER_TYPE_P(kw_values[1])) {
2139
- rb_raise(rb_eArgError, "n_past must be an integer");
2140
- return Qnil;
2141
- }
2142
- if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
2143
- rb_raise(rb_eArgError, "n_tokens must be an integer");
2144
- return Qnil;
2145
- }
2146
-
2147
- const size_t tokens_len = RARRAY_LEN(kw_values[0]);
2148
- std::vector<llama_token> embd(tokens_len);
2149
- for (size_t i = 0; i < tokens_len; i++) {
2150
- VALUE token = rb_ary_entry(kw_values[0], i);
2151
- if (!RB_INTEGER_TYPE_P(token)) {
2152
- rb_raise(rb_eArgError, "tokens must be an array of integers");
2153
- return Qnil;
2154
- }
2155
- embd[i] = NUM2INT(token);
2156
- }
2157
-
2158
- const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
2159
- const int n_past = NUM2INT(kw_values[1]);
2160
-
2161
- LLaMAContextWrapper* ptr = get_llama_context(self);
2162
- if (ptr->ctx == NULL) {
2163
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2164
- return Qnil;
2165
- }
2166
- if (llama_eval(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
2167
- rb_raise(rb_eRuntimeError, "Failed to evaluate");
2168
- return Qnil;
2169
- }
2170
-
2171
- rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
2172
- rb_iv_set(self, "@has_evaluated", Qtrue);
2173
-
2174
- return Qnil;
2175
- }
2176
-
2177
- static VALUE _llama_context_eval_embd(int argc, VALUE* argv, VALUE self) {
2178
- VALUE kw_args = Qnil;
2179
- ID kw_table[3] = { rb_intern("embd"), rb_intern("n_past"), rb_intern("n_tokens") };
2180
- VALUE kw_values[3] = { Qundef, Qundef, Qundef };
2181
- rb_scan_args(argc, argv, ":", &kw_args);
2182
- rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
2183
-
2184
- rb_warn("eval_embd is deprecated. Use decode instead.");
2185
-
2186
- if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2187
- rb_raise(rb_eArgError, "tokens must be an Array");
2188
- return Qnil;
2189
- }
2190
- if (!RB_INTEGER_TYPE_P(kw_values[1])) {
2191
- rb_raise(rb_eArgError, "n_past must be an integer");
2192
- return Qnil;
2193
- }
2194
- if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
2195
- rb_raise(rb_eArgError, "n_tokens must be an integer");
2196
- return Qnil;
2197
- }
2198
-
2199
- const size_t tokens_len = RARRAY_LEN(kw_values[0]);
2200
- std::vector<float> embd(tokens_len);
2201
- for (size_t i = 0; i < tokens_len; i++) {
2202
- VALUE el = rb_ary_entry(kw_values[0], i);
2203
- if (!RB_FLOAT_TYPE_P(el)) {
2204
- rb_raise(rb_eArgError, "embd must be an array of floats");
2205
- return Qnil;
2206
- }
2207
- embd[i] = NUM2DBL(el);
2208
- }
2209
-
2210
- const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
2211
- const int n_past = NUM2INT(kw_values[1]);
2212
-
2213
- LLaMAContextWrapper* ptr = get_llama_context(self);
2214
- if (ptr->ctx == NULL) {
2215
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2216
- return Qnil;
2217
- }
2218
- if (llama_eval_embd(ptr->ctx, embd.data(), n_tokens, n_past) != 0) {
2219
- rb_raise(rb_eRuntimeError, "Failed to evaluate");
2220
- return Qnil;
2221
- }
2222
-
2223
- rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
2224
- rb_iv_set(self, "@has_evaluated", Qtrue);
2225
-
2226
- return Qnil;
2227
- }
2228
-
2229
2098
  static VALUE _llama_context_decode(VALUE self, VALUE batch) {
2230
2099
  LLaMAContextWrapper* ptr = get_llama_context(self);
2231
2100
  if (ptr->ctx == NULL) {
@@ -2430,13 +2299,13 @@ private:
2430
2299
  return Qnil;
2431
2300
  }
2432
2301
 
2433
- static VALUE _llama_context_kv_cache_seq_shift(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
2302
+ static VALUE _llama_context_kv_cache_seq_add(VALUE self, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
2434
2303
  LLaMAContextWrapper* ptr = get_llama_context(self);
2435
2304
  if (ptr->ctx == NULL) {
2436
2305
  rb_raise(rb_eArgError, "LLaMA context is not initialized");
2437
2306
  return Qnil;
2438
2307
  }
2439
- llama_kv_cache_seq_shift(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
2308
+ llama_kv_cache_seq_add(ptr->ctx, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
2440
2309
  return Qnil;
2441
2310
  }
2442
2311
 
@@ -2450,6 +2319,35 @@ private:
2450
2319
  return Qnil;
2451
2320
  }
2452
2321
 
2322
+ static VALUE _llama_context_kv_cache_seq_pos_max(VALUE self, VALUE seq_id) {
2323
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2324
+ if (ptr->ctx == NULL) {
2325
+ rb_raise(rb_eArgError, "LLaMA context is not initialized");
2326
+ return Qnil;
2327
+ }
2328
+ return INT2NUM(llama_kv_cache_seq_pos_max(ptr->ctx, NUM2INT(seq_id)));
2329
+ }
2330
+
2331
+ static VALUE _llama_context_kv_cache_defrag(VALUE self) {
2332
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2333
+ if (ptr->ctx == NULL) {
2334
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2335
+ return Qnil;
2336
+ }
2337
+ llama_kv_cache_defrag(ptr->ctx);
2338
+ return Qnil;
2339
+ }
2340
+
2341
+ static VALUE _llama_context_kv_cache_update(VALUE self) {
2342
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2343
+ if (ptr->ctx == NULL) {
2344
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2345
+ return Qnil;
2346
+ }
2347
+ llama_kv_cache_update(ptr->ctx);
2348
+ return Qnil;
2349
+ }
2350
+
2453
2351
  static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
2454
2352
  LLaMAContextWrapper* ptr = get_llama_context(self);
2455
2353
  if (ptr->ctx == NULL) {
@@ -2659,46 +2557,6 @@ private:
2659
2557
  return Qnil;
2660
2558
  }
2661
2559
 
2662
- static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
2663
- VALUE kw_args = Qnil;
2664
- ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
2665
- VALUE kw_values[2] = { Qundef, Qundef };
2666
- VALUE candidates = Qnil;
2667
- rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
2668
- rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
2669
-
2670
- if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
2671
- rb_raise(rb_eArgError, "guidance must be a Context");
2672
- return Qnil;
2673
- }
2674
- if (!RB_FLOAT_TYPE_P(kw_values[1])) {
2675
- rb_raise(rb_eArgError, "scale must be a float");
2676
- return Qnil;
2677
- }
2678
-
2679
- LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
2680
- if (ctx_ptr->ctx == NULL) {
2681
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2682
- return Qnil;
2683
- }
2684
- LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
2685
- if (cnd_ptr->array.data == nullptr) {
2686
- rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
2687
- return Qnil;
2688
- }
2689
-
2690
- LLaMAContextWrapper* guidance_ptr = get_llama_context(kw_values[0]);
2691
- if (guidance_ptr->ctx == NULL) {
2692
- rb_raise(rb_eRuntimeError, "guidance context is not initialized");
2693
- return Qnil;
2694
- }
2695
- const float scale = NUM2DBL(kw_values[1]);
2696
-
2697
- llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
2698
-
2699
- return Qnil;
2700
- }
2701
-
2702
2560
  static VALUE _llama_context_sample_softmax(VALUE self, VALUE candidates) {
2703
2561
  if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
2704
2562
  rb_raise(rb_eArgError, "argument must be a TokenDataArray");
@@ -2994,42 +2852,6 @@ private:
2994
2852
  return Qnil;
2995
2853
  }
2996
2854
 
2997
- static VALUE _llama_context_sample_temperature(int argc, VALUE* argv, VALUE self) {
2998
- VALUE kw_args = Qnil;
2999
- ID kw_table[1] = { rb_intern("temperature") };
3000
- VALUE kw_values[1] = { Qundef };
3001
- VALUE candidates = Qnil;
3002
- rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
3003
- rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
3004
-
3005
- rb_warn("sample_temperature is deprecated. Use sample_temp instead.");
3006
-
3007
- if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
3008
- rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
3009
- return Qnil;
3010
- }
3011
- if (!RB_FLOAT_TYPE_P(kw_values[0])) {
3012
- rb_raise(rb_eArgError, "temperature must be a float");
3013
- return Qnil;
3014
- }
3015
-
3016
- LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
3017
- if (ctx_ptr->ctx == NULL) {
3018
- rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
3019
- return Qnil;
3020
- }
3021
- LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
3022
- if (cnd_ptr->array.data == nullptr) {
3023
- rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
3024
- return Qnil;
3025
- }
3026
- const float temperature = NUM2DBL(kw_values[0]);
3027
-
3028
- llama_sample_temperature(ctx_ptr->ctx, &(cnd_ptr->array), temperature);
3029
-
3030
- return Qnil;
3031
- }
3032
-
3033
2855
  static VALUE _llama_context_sample_token_mirostat(int argc, VALUE* argv, VALUE self) {
3034
2856
  VALUE kw_args = Qnil;
3035
2857
  ID kw_table[4] = { rb_intern("tau"), rb_intern("eta"), rb_intern("m"), rb_intern("mu") };
@@ -3243,15 +3065,8 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
3243
3065
 
3244
3066
  // module functions
3245
3067
 
3246
- static VALUE rb_llama_llama_backend_init(int argc, VALUE* argv, VALUE self) {
3247
- VALUE kw_args = Qnil;
3248
- ID kw_table[1] = { rb_intern("numa") };
3249
- VALUE kw_values[1] = { Qundef };
3250
- rb_scan_args(argc, argv, ":", &kw_args);
3251
- rb_get_kwargs(kw_args, kw_table, 0, 1, kw_values);
3252
-
3253
- const bool numa = kw_values[0] == Qundef ? false : (RTEST(kw_values[0]) ? true : false);
3254
- llama_backend_init(numa);
3068
+ static VALUE rb_llama_llama_backend_init(VALUE self) {
3069
+ llama_backend_init();
3255
3070
 
3256
3071
  return Qnil;
3257
3072
  }
@@ -3262,6 +3077,17 @@ static VALUE rb_llama_llama_backend_free(VALUE self) {
3262
3077
  return Qnil;
3263
3078
  }
3264
3079
 
3080
+ static VALUE rb_llama_llama_numa_init(VALUE self, VALUE strategy) {
3081
+ if (!RB_INTEGER_TYPE_P(strategy)) {
3082
+ rb_raise(rb_eArgError, "strategy must be an integer");
3083
+ return Qnil;
3084
+ }
3085
+
3086
+ llama_numa_init(static_cast<enum ggml_numa_strategy>(NUM2INT(strategy)));
3087
+
3088
+ return Qnil;
3089
+ }
3090
+
3265
3091
  static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
3266
3092
  VALUE kw_args = Qnil;
3267
3093
  ID kw_table[3] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("params") };
@@ -3303,16 +3129,6 @@ static VALUE rb_llama_time_us(VALUE self) {
3303
3129
  return LONG2NUM(llama_time_us());
3304
3130
  }
3305
3131
 
3306
- static VALUE rb_llama_mmap_supported(VALUE self) {
3307
- rb_warn("mmap_supported? is deprecated. Use supports_mmap? instead.");
3308
- return llama_mmap_supported() ? Qtrue : Qfalse;
3309
- }
3310
-
3311
- static VALUE rb_llama_mlock_supported(VALUE self) {
3312
- rb_warn("mlock_supported? is deprecated. Use supports_mlock? instead.");
3313
- return llama_mlock_supported() ? Qtrue : Qfalse;
3314
- }
3315
-
3316
3132
  static VALUE rb_llama_max_devices(VALUE self) {
3317
3133
  return SIZET2NUM(llama_max_devices());
3318
3134
  }
@@ -3345,13 +3161,12 @@ extern "C" void Init_llama_cpp(void) {
3345
3161
  RbLLaMAGrammarElement::define_class(rb_mLLaMACpp);
3346
3162
  RbLLaMAGrammar::define_class(rb_mLLaMACpp);
3347
3163
 
3348
- rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
3164
+ rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, 0);
3349
3165
  rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
3166
+ rb_define_module_function(rb_mLLaMACpp, "numa_init", rb_llama_llama_numa_init, 1);
3350
3167
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
3351
3168
  rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
3352
3169
  rb_define_module_function(rb_mLLaMACpp, "time_us", rb_llama_time_us, 0);
3353
- rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
3354
- rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
3355
3170
  rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
3356
3171
  rb_define_module_function(rb_mLLaMACpp, "supports_mmap?", rb_llama_supports_mmap, 0);
3357
3172
  rb_define_module_function(rb_mLLaMACpp, "supports_mlock?", rb_llama_supports_mlock, 0);
@@ -3389,14 +3204,16 @@ extern "C" void Init_llama_cpp(void) {
3389
3204
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XXS));
3390
3205
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ2_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ2_XS));
3391
3206
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q2_K_S", INT2NUM(LLAMA_FTYPE_MOSTLY_Q2_K_S));
3392
- rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q3_K_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_Q3_K_XS));
3207
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XS));
3393
3208
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ3_XXS", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ3_XXS));
3209
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ1_S", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ1_S));
3210
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_IQ4_NL", INT2NUM(LLAMA_FTYPE_MOSTLY_IQ4_NL));
3394
3211
 
3395
3212
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
3396
3213
 
3397
- rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_INT", INT2NUM(LLAMA_KV_OVERRIDE_INT));
3398
- rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_FLOAT));
3399
- rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_BOOL));
3214
+ rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_INT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_INT));
3215
+ rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_FLOAT", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_FLOAT));
3216
+ rb_define_const(rb_mLLaMACpp, "LLAMA_KV_OVERRIDE_TYPE_BOOL", INT2NUM(LLAMA_KV_OVERRIDE_TYPE_BOOL));
3400
3217
 
3401
3218
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_END", INT2NUM(LLAMA_GRETYPE_END));
3402
3219
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_ALT", INT2NUM(LLAMA_GRETYPE_ALT));
@@ -3406,15 +3223,19 @@ extern "C" void Init_llama_cpp(void) {
3406
3223
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
3407
3224
  rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
3408
3225
 
3409
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_UNSPECIFIED));
3410
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_NONE", INT2NUM(LLAMA_ROPE_SCALING_NONE));
3411
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_LINEAR));
3412
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_YARN", INT2NUM(LLAMA_ROPE_SCALING_YARN));
3413
- rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_MAX_VALUE));
3226
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED));
3227
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_NONE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_NONE));
3228
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_TYPE_LINEAR));
3229
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_YARN", INT2NUM(LLAMA_ROPE_SCALING_TYPE_YARN));
3230
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_TYPE_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_TYPE_MAX_VALUE));
3231
+
3232
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_NONE", INT2NUM(LLAMA_POOLING_TYPE_NONE));
3233
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_MEAN", INT2NUM(LLAMA_POOLING_TYPE_MEAN));
3234
+ rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
3414
3235
 
3415
- rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_NONE", INT2NUM(LLAMA_SPLIT_NONE));
3416
- rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_LAYER", INT2NUM(LLAMA_SPLIT_LAYER));
3417
- rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_ROW", INT2NUM(LLAMA_SPLIT_ROW));
3236
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
3237
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
3238
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
3418
3239
 
3419
3240
  std::stringstream ss_magic;
3420
3241
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.12.6'
6
+ VERSION = '0.13.0'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b2143'
9
+ LLAMA_CPP_VERSION = 'b2303'
10
10
  end