llama_cpp 0.20.4 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6acca29477a43c9703c7035e53acd69450de7103b2d7f242506c7e2016f1a261
4
- data.tar.gz: 77c108b2f1ea33588a9dbe6c0538e289f90ea5be2090adbf8f663fff8b8b0221
3
+ metadata.gz: 64aab047f44a9002c5c388f6a774448671d7be9618170310d2b7dd1091a99670
4
+ data.tar.gz: 63f0d908b99a45865a9b9d81ae595adb0d9a1f258976267da3ce5b5df747ae0d
5
5
  SHA512:
6
- metadata.gz: 69d453a3cf9c23ab3aaa60c6a20d80a7fc75424cb762c631a06712a2134fc7cf6830168241a3d689fd0b7b621804e27b6461415977fb51c096c10fbb2aa0e922
7
- data.tar.gz: bc8de61663616ffd40c90e34df71095a7c85e3aa3373fc0c395fe101ded4f38e1670af42aeb11c6380c6932d2939d137d299d377db3ce9dc0510fd56e9d8b7a5
6
+ metadata.gz: 4f5344b36ff0c0bbd812f25f2b7a897893cb8ffb0700e3f8c23dfbd81cc8b1c4ee1893d5198c74df625fbc3c4c925f32e4c868c60b6ed13e7a5412eef3d9b2b6
7
+ data.tar.gz: dbc8d279a9284ba62786d3c5d267352445814289cb1f8a2268d2bb2ff72a4bc7a2534cb2c4582f945e13a68a645cec51101085203ff6dac1a2b7df6885412398
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## [[0.21.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.0...v0.21.1)] - 2025-07-19
2
+
3
+ - Change supported llama.cpp version to b5930.
4
+ - Add `n_reused` reader to `LlamaPerfContextData`.
5
+ - Add `llama_vocab_mask` module function.
6
+ - Add `kv_unified` accessor to `LlamaContextParams`.
7
+ - Add `LLAMA_VOCAB_TYPE_PLAMO2` contant.
8
+
9
+ ## [[0.21.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.20.4...v0.21.0)] - 2025-07-12
10
+
11
+ - Change supported llama.cpp version to b5870.
12
+ - Remove constants for `llama_voca_pre_type` such as `LLAMA_VOCAB_PRE_TYPE_DEFAULT` and `LLAMA_VOCAB_PRE_TYPE_LLAMA3`.
13
+
1
14
  ## [[0.20.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.20.3...v0.20.4)] - 2025-06-21
2
15
 
3
16
  - Change supported llama.cpp version to b5720.
@@ -5,12 +18,10 @@
5
18
 
6
19
  ## [[0.20.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.20.2...v0.20.3)] - 2025-06-14
7
20
 
8
-
9
21
  - Change supported llama.cpp version to b5650
10
22
  - Add `data` argument to `llama_memory_clear` module function.
11
23
  - Fix llama_memory_t wrapper by removing unnecessary struct keyword and pointer symbol.
12
24
 
13
-
14
25
  ## [[0.20.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.20.1...v0.20.2)] - 2025-06-07
15
26
 
16
27
  - Change supported llama.cpp version to b5600
@@ -838,6 +838,17 @@ static VALUE llama_context_params_set_swa_full(VALUE self, VALUE swa_full) {
838
838
  return swa_full;
839
839
  }
840
840
 
841
+ static VALUE llama_context_params_get_kv_unified(VALUE self) {
842
+ struct llama_context_params* data = get_llama_context_params(self);
843
+ return data->kv_unified ? Qtrue : Qfalse;
844
+ }
845
+
846
+ static VALUE llama_context_params_set_kv_unified(VALUE self, VALUE kv_unified) {
847
+ struct llama_context_params* data = get_llama_context_params(self);
848
+ data->kv_unified = RTEST(kv_unified) ? true : false;
849
+ return kv_unified;
850
+ }
851
+
841
852
  /* llama_model_quantize_params */
842
853
  static void llama_model_quantize_params_free(void *ptr) {
843
854
  if (ptr) {
@@ -2940,6 +2951,22 @@ static VALUE rb_llama_vocab_pad(VALUE self, VALUE vocab) {
2940
2951
  return INT2NUM(token);
2941
2952
  }
2942
2953
 
2954
+ /**
2955
+ * @overload llama_vocab_mask(vocab)
2956
+ * @param [LlamaVocab] vocab
2957
+ * @return [Integer]
2958
+ */
2959
+ static VALUE rb_llama_vocab_mask(VALUE self, VALUE vocab) {
2960
+ if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
2961
+ rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
2962
+ return Qnil;
2963
+ }
2964
+ llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
2965
+ const int32_t token = llama_vocab_mask(vocab_wrapper->vocab);
2966
+ RB_GC_GUARD(vocab);
2967
+ return INT2NUM(token);
2968
+ }
2969
+
2943
2970
  /**
2944
2971
  * @overload llama_vocab_get_add_bos
2945
2972
  * @param [LlamaVocab] vocab
@@ -3914,6 +3941,7 @@ static VALUE llama_perf_context_data_alloc(VALUE self) {
3914
3941
  data->t_eval_ms = 0.0;
3915
3942
  data->n_p_eval = 0;
3916
3943
  data->n_eval = 0;
3944
+ data->n_reused = 0;
3917
3945
  return TypedData_Wrap_Struct(self, &llama_perf_context_data_type, data);
3918
3946
  }
3919
3947
 
@@ -3953,6 +3981,11 @@ static VALUE llama_perf_context_data_get_n_eval(VALUE self) {
3953
3981
  return INT2NUM(data->n_eval);
3954
3982
  }
3955
3983
 
3984
+ static VALUE llama_perf_context_data_get_n_reused(VALUE self) {
3985
+ struct llama_perf_context_data* data = get_llama_perf_context_data(self);
3986
+ return INT2NUM(data->n_reused);
3987
+ }
3988
+
3956
3989
  /* struct llama_perf_sampler_data */
3957
3990
  static void llama_perf_sampler_data_free(void* ptr) {
3958
3991
  if (ptr) {
@@ -4154,44 +4187,7 @@ void Init_llama_cpp(void) {
4154
4187
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
4155
4188
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_UGM", INT2NUM(LLAMA_VOCAB_TYPE_UGM));
4156
4189
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_RWKV", INT2NUM(LLAMA_VOCAB_TYPE_RWKV));
4157
- /* llama_vocab_pre_type */
4158
- /* Document-const: LlamaCpp::LLAMA_VOCAB_PRE_TYPE_DEFAULT */
4159
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEFAULT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEFAULT));
4160
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_LLAMA3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_LLAMA3));
4161
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM));
4162
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER));
4163
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_FALCON", INT2NUM(LLAMA_VOCAB_PRE_TYPE_FALCON));
4164
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_MPT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_MPT));
4165
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_STARCODER", INT2NUM(LLAMA_VOCAB_PRE_TYPE_STARCODER));
4166
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_GPT2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT2));
4167
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_REFACT", INT2NUM(LLAMA_VOCAB_PRE_TYPE_REFACT));
4168
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_COMMAND_R", INT2NUM(LLAMA_VOCAB_PRE_TYPE_COMMAND_R));
4169
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_STABLELM2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_STABLELM2));
4170
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_QWEN2", INT2NUM(LLAMA_VOCAB_PRE_TYPE_QWEN2));
4171
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_OLMO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_OLMO));
4172
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
4173
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
4174
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
4175
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM3));
4176
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM4", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM4));
4177
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
4178
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_JAIS", INT2NUM(LLAMA_VOCAB_PRE_TYPE_JAIS));
4179
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_TEKKEN", INT2NUM(LLAMA_VOCAB_PRE_TYPE_TEKKEN));
4180
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_SMOLLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMOLLM));
4181
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CODESHELL", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CODESHELL));
4182
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_BLOOM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_BLOOM));
4183
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH));
4184
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_EXAONE", INT2NUM(LLAMA_VOCAB_PRE_TYPE_EXAONE));
4185
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CHAMELEON", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHAMELEON));
4186
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_MINERVA", INT2NUM(LLAMA_VOCAB_PRE_TYPE_MINERVA));
4187
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM));
4188
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_GPT4O", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT4O));
4189
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_SUPERBPE", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SUPERBPE));
4190
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_TRILLION", INT2NUM(LLAMA_VOCAB_PRE_TYPE_TRILLION));
4191
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_BAILINGMOE", INT2NUM(LLAMA_VOCAB_PRE_TYPE_BAILINGMOE));
4192
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_LLAMA4", INT2NUM(LLAMA_VOCAB_PRE_TYPE_LLAMA4));
4193
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_PIXTRAL", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PIXTRAL));
4194
- rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_SEED_CODER", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SEED_CODER));
4190
+ rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_PLAMO2", INT2NUM(LLAMA_VOCAB_TYPE_PLAMO2));
4195
4191
  /* llama_rope_type */
4196
4192
  /* Document-const: LlamaCpp::LLAMA_ROPE_TYPE_NONE */
4197
4193
  rb_define_const(rb_mLlamaCpp, "LLAMA_ROPE_TYPE_NONE", INT2NUM(LLAMA_ROPE_TYPE_NONE));
@@ -4784,6 +4780,17 @@ void Init_llama_cpp(void) {
4784
4780
  * @return [Boolean]
4785
4781
  */
4786
4782
  rb_define_method(rb_cLlamaContextParams, "swa_full=", RUBY_METHOD_FUNC(llama_context_params_set_swa_full), 1);
4783
+ /**
4784
+ * Document-method: kv_unified
4785
+ * @return [Boolean]
4786
+ */
4787
+ rb_define_method(rb_cLlamaContextParams, "kv_unified", RUBY_METHOD_FUNC(llama_context_params_get_kv_unified), 0);
4788
+ /**
4789
+ * Document-method: kv_unified=
4790
+ * @param [Boolean] kv_unified
4791
+ * @return [Boolean]
4792
+ */
4793
+ rb_define_method(rb_cLlamaContextParams, "kv_unified=", RUBY_METHOD_FUNC(llama_context_params_set_kv_unified), 1);
4787
4794
  /* TODO: ggml_abort_callback abort_callback */
4788
4795
  /* TODO: void* abort_callback_data */
4789
4796
 
@@ -4895,6 +4902,7 @@ void Init_llama_cpp(void) {
4895
4902
  /* TODO: void* imatrix */
4896
4903
  /* TODO: void* kv_overrides */
4897
4904
  /* TODO: void* tensor_types */
4905
+ /* TODO: void* prune_layers */
4898
4906
 
4899
4907
  /**
4900
4908
  * Document-class: LlamaCpp::LlamaLogitBias
@@ -5285,6 +5293,9 @@ void Init_llama_cpp(void) {
5285
5293
  /* llama_vocab_pad */
5286
5294
  rb_define_module_function(rb_mLlamaCpp, "llama_vocab_pad", rb_llama_vocab_pad, 1);
5287
5295
 
5296
+ /* llama_vocab_mask */
5297
+ rb_define_module_function(rb_mLlamaCpp, "llama_vocab_mask", rb_llama_vocab_mask, 1);
5298
+
5288
5299
  /* llama_vocab_get_add_bos */
5289
5300
  rb_define_module_function(rb_mLlamaCpp, "llama_vocab_get_add_bos", rb_llama_vocab_get_add_bos, 1);
5290
5301
 
@@ -5468,6 +5479,11 @@ void Init_llama_cpp(void) {
5468
5479
  * @return [Integer]
5469
5480
  */
5470
5481
  rb_define_method(rb_cLlamaPerfContextData, "n_eval", RUBY_METHOD_FUNC(llama_perf_context_data_get_n_eval), 0);
5482
+ /**
5483
+ * Document-method: n_reused
5484
+ * @return [Integer]
5485
+ */
5486
+ rb_define_method(rb_cLlamaPerfContextData, "n_reused", RUBY_METHOD_FUNC(llama_perf_context_data_get_n_reused), 0);
5471
5487
 
5472
5488
  /**
5473
5489
  * Document-class: LlamaCpp::LlamaPerfSamplerData
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LlamaCpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.20.4'
6
+ VERSION = '0.21.1'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b5720'
9
+ LLAMA_CPP_VERSION = 'b5930'
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.4
4
+ version: 0.21.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku