llama_cpp 0.21.0 → 0.21.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 691ff52591a63387090485f1d322726b8dc5dc89630cdac4fe2bfcb3372da50d
4
- data.tar.gz: c27f3a43878e787f32eaeaa2538d03dca4e2e3de0484ad2e920ec14826cf6ba5
3
+ metadata.gz: 5f3d80e415240f21df0ba7e314118712f6f2d2d8cb052eef41127e7de7ae7e51
4
+ data.tar.gz: 67fff55bf83ef4d97f24b0c3c450eb493464763c23595fcb5b60c248fd1f551e
5
5
  SHA512:
6
- metadata.gz: df049a84a78bb2d95cd4fe1f63f05c1bb9f965c0e5c3bfaca9668e98fca0db2eb8d80560ffa819540fa25b0a099a0b2a3feafc85a38d73632f3db79445d19a07
7
- data.tar.gz: d9138045ba1d37dbaab919ea973ef81306dde30dd6d67048660108dd9f90fe1ffe671becd7f295afa2b01150d73269ac045474373c98ff5805ce35c582dd28d4
6
+ metadata.gz: f6a129952bc812c130a235743beeb0a5ebcafcbe627d7d13d625e8cf5b143e2cb41cd4d78cfacc98c00a1d0857dc68bb8a6b30d486da5f22e8bcbc6a63ac5489
7
+ data.tar.gz: c944f2a37727bd3e295dc83d9d9c5c5748a04d7f867aad86895c312a100fa65b7c3fa9f3973bf08adc47c26c2a0e73586f7aa92b92230bee72c86b10dc4693fc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,18 @@
1
+ ## [[0.21.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.1...v0.21.2)] - 2025-08-09
2
+
3
+ - Change supported llama.cpp version to b6100.
4
+ - Add `LLAMA_FTYPE_MOSTLY_MXFP4_MOE` constant.
5
+ - Add `use_extra_bufts` accessor to `LlamaModelParams`.
6
+ - Add `llama_model_is_diffusion?` module function.
7
+
8
+ ## [[0.21.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.0...v0.21.1)] - 2025-07-19
9
+
10
+ - Change supported llama.cpp version to b5930.
11
+ - Add `n_reused` reader to `LlamaPerfContextData`.
12
+ - Add `llama_vocab_mask` module function.
13
+ - Add `kv_unified` accessor to `LlamaContextParams`.
14
+ - Add `LLAMA_VOCAB_TYPE_PLAMO2` constant.
15
+
1
16
  ## [[0.21.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.20.4...v0.21.0)] - 2025-07-12
2
17
 
3
18
  - Change supported llama.cpp version to b5870.
@@ -530,6 +530,17 @@ static VALUE llama_model_params_set_check_tensors(VALUE self, VALUE check_tensor
530
530
  return check_tensors;
531
531
  }
532
532
 
533
+ static VALUE llama_model_params_get_use_extra_bufts(VALUE self) {
534
+ struct llama_model_params* data = get_llama_model_params(self);
535
+ return data->use_extra_bufts ? Qtrue : Qfalse;
536
+ }
537
+
538
+ static VALUE llama_model_params_set_use_extra_bufts(VALUE self, VALUE use_extra_bufts) {
539
+ struct llama_model_params* data = get_llama_model_params(self);
540
+ data->use_extra_bufts = RTEST(use_extra_bufts) ? true : false;
541
+ return use_extra_bufts;
542
+ }
543
+
533
544
  /* struct llama_context_params */
534
545
  static void llama_context_params_free(void *ptr) {
535
546
  if (ptr) {
@@ -838,6 +849,17 @@ static VALUE llama_context_params_set_swa_full(VALUE self, VALUE swa_full) {
838
849
  return swa_full;
839
850
  }
840
851
 
852
+ static VALUE llama_context_params_get_kv_unified(VALUE self) {
853
+ struct llama_context_params* data = get_llama_context_params(self);
854
+ return data->kv_unified ? Qtrue : Qfalse;
855
+ }
856
+
857
+ static VALUE llama_context_params_set_kv_unified(VALUE self, VALUE kv_unified) {
858
+ struct llama_context_params* data = get_llama_context_params(self);
859
+ data->kv_unified = RTEST(kv_unified) ? true : false;
860
+ return kv_unified;
861
+ }
862
+
841
863
  /* llama_model_quantize_params */
842
864
  static void llama_model_quantize_params_free(void *ptr) {
843
865
  if (ptr) {
@@ -1763,6 +1785,20 @@ static VALUE rb_llama_model_is_recurrent(VALUE self, VALUE model) {
1763
1785
  return llama_model_is_recurrent(model_wrapper->model) ? Qtrue : Qfalse;
1764
1786
  }
1765
1787
 
1788
+ /**
1789
+ * @overload llama_model_is_diffusion?(model)
1790
+ * @param [LlamaModel] model
1791
+ * @return [Boolean]
1792
+ */
1793
+ static VALUE rb_llama_model_is_diffusion(VALUE self, VALUE model) {
1794
+ if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
1795
+ rb_raise(rb_eArgError, "model must be a LlamaModel");
1796
+ return Qnil;
1797
+ }
1798
+ llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
1799
+ return llama_model_is_diffusion(model_wrapper->model) ? Qtrue : Qfalse;
1800
+ }
1801
+
1766
1802
  /**
1767
1803
  * @overload llama_model_quantize(fname_inp, fname_out, params)
1768
1804
  * @param [String] fname_inp
@@ -2940,6 +2976,22 @@ static VALUE rb_llama_vocab_pad(VALUE self, VALUE vocab) {
2940
2976
  return INT2NUM(token);
2941
2977
  }
2942
2978
 
2979
+ /**
2980
+ * @overload llama_vocab_mask(vocab)
2981
+ * @param [LlamaVocab] vocab
2982
+ * @return [Integer]
2983
+ */
2984
+ static VALUE rb_llama_vocab_mask(VALUE self, VALUE vocab) {
2985
+ if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
2986
+ rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
2987
+ return Qnil;
2988
+ }
2989
+ llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
2990
+ const int32_t token = llama_vocab_mask(vocab_wrapper->vocab);
2991
+ RB_GC_GUARD(vocab);
2992
+ return INT2NUM(token);
2993
+ }
2994
+
2943
2995
  /**
2944
2996
  * @overload llama_vocab_get_add_bos
2945
2997
  * @param [LlamaVocab] vocab
@@ -3914,6 +3966,7 @@ static VALUE llama_perf_context_data_alloc(VALUE self) {
3914
3966
  data->t_eval_ms = 0.0;
3915
3967
  data->n_p_eval = 0;
3916
3968
  data->n_eval = 0;
3969
+ data->n_reused = 0;
3917
3970
  return TypedData_Wrap_Struct(self, &llama_perf_context_data_type, data);
3918
3971
  }
3919
3972
 
@@ -3953,6 +4006,11 @@ static VALUE llama_perf_context_data_get_n_eval(VALUE self) {
3953
4006
  return INT2NUM(data->n_eval);
3954
4007
  }
3955
4008
 
4009
+ static VALUE llama_perf_context_data_get_n_reused(VALUE self) {
4010
+ struct llama_perf_context_data* data = get_llama_perf_context_data(self);
4011
+ return INT2NUM(data->n_reused);
4012
+ }
4013
+
3956
4014
  /* struct llama_perf_sampler_data */
3957
4015
  static void llama_perf_sampler_data_free(void* ptr) {
3958
4016
  if (ptr) {
@@ -4154,6 +4212,7 @@ void Init_llama_cpp(void) {
4154
4212
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
4155
4213
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_UGM", INT2NUM(LLAMA_VOCAB_TYPE_UGM));
4156
4214
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_RWKV", INT2NUM(LLAMA_VOCAB_TYPE_RWKV));
4215
+ rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_PLAMO2", INT2NUM(LLAMA_VOCAB_TYPE_PLAMO2));
4157
4216
  /* llama_rope_type */
4158
4217
  /* Document-const: LlamaCpp::LLAMA_ROPE_TYPE_NONE */
4159
4218
  rb_define_const(rb_mLlamaCpp, "LLAMA_ROPE_TYPE_NONE", INT2NUM(LLAMA_ROPE_TYPE_NONE));
@@ -4217,6 +4276,7 @@ void Init_llama_cpp(void) {
4217
4276
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_BF16", INT2NUM(LLAMA_FTYPE_MOSTLY_BF16));
4218
4277
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ1_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ1_0));
4219
4278
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ2_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0));
4279
+ rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_MXFP4_MOE", INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE));
4220
4280
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
4221
4281
  /* llama_rope_scaling_type */
4222
4282
  /* Document-const: LlamaCpp::LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED */
@@ -4462,6 +4522,17 @@ void Init_llama_cpp(void) {
4462
4522
  * @return [Boolean]
4463
4523
  */
4464
4524
  rb_define_method(rb_cLlamaModelParams, "check_tensors=", RUBY_METHOD_FUNC(llama_model_params_set_check_tensors), 1);
4525
+ /**
4526
+ * Document-method: use_extra_bufts
4527
+ * @return [Boolean]
4528
+ */
4529
+ rb_define_method(rb_cLlamaModelParams, "use_extra_bufts", RUBY_METHOD_FUNC(llama_model_params_get_use_extra_bufts), 0);
4530
+ /**
4531
+ * Document-method: use_extra_bufts=
4532
+ * @param [Boolean] use_extra_bufts
4533
+ * @return [Boolean]
4534
+ */
4535
+ rb_define_method(rb_cLlamaModelParams, "use_extra_bufts=", RUBY_METHOD_FUNC(llama_model_params_set_use_extra_bufts), 1);
4465
4536
 
4466
4537
  /**
4467
4538
  * Document-class: LlamaCpp::LlamaContextParams
@@ -4746,6 +4817,17 @@ void Init_llama_cpp(void) {
4746
4817
  * @return [Boolean]
4747
4818
  */
4748
4819
  rb_define_method(rb_cLlamaContextParams, "swa_full=", RUBY_METHOD_FUNC(llama_context_params_set_swa_full), 1);
4820
+ /**
4821
+ * Document-method: kv_unified
4822
+ * @return [Boolean]
4823
+ */
4824
+ rb_define_method(rb_cLlamaContextParams, "kv_unified", RUBY_METHOD_FUNC(llama_context_params_get_kv_unified), 0);
4825
+ /**
4826
+ * Document-method: kv_unified=
4827
+ * @param [Boolean] kv_unified
4828
+ * @return [Boolean]
4829
+ */
4830
+ rb_define_method(rb_cLlamaContextParams, "kv_unified=", RUBY_METHOD_FUNC(llama_context_params_set_kv_unified), 1);
4749
4831
  /* TODO: ggml_abort_callback abort_callback */
4750
4832
  /* TODO: void* abort_callback_data */
4751
4833
 
@@ -5061,6 +5143,9 @@ void Init_llama_cpp(void) {
5061
5143
  /* llama_model_is_recurrent */
5062
5144
  rb_define_module_function(rb_mLlamaCpp, "llama_model_is_recurrent?", rb_llama_model_is_recurrent, 1);
5063
5145
 
5146
+ /* llama_model_is_diffusion */
5147
+ rb_define_module_function(rb_mLlamaCpp, "llama_model_is_diffusion?", rb_llama_model_is_diffusion, 1);
5148
+
5064
5149
  /* llama_model_quantize */
5065
5150
  rb_define_module_function(rb_mLlamaCpp, "llama_model_quantize", rb_llama_model_quantize, 3);
5066
5151
 
@@ -5248,6 +5333,9 @@ void Init_llama_cpp(void) {
5248
5333
  /* llama_vocab_pad */
5249
5334
  rb_define_module_function(rb_mLlamaCpp, "llama_vocab_pad", rb_llama_vocab_pad, 1);
5250
5335
 
5336
+ /* llama_vocab_mask */
5337
+ rb_define_module_function(rb_mLlamaCpp, "llama_vocab_mask", rb_llama_vocab_mask, 1);
5338
+
5251
5339
  /* llama_vocab_get_add_bos */
5252
5340
  rb_define_module_function(rb_mLlamaCpp, "llama_vocab_get_add_bos", rb_llama_vocab_get_add_bos, 1);
5253
5341
 
@@ -5431,6 +5519,11 @@ void Init_llama_cpp(void) {
5431
5519
  * @return [Integer]
5432
5520
  */
5433
5521
  rb_define_method(rb_cLlamaPerfContextData, "n_eval", RUBY_METHOD_FUNC(llama_perf_context_data_get_n_eval), 0);
5522
+ /**
5523
+ * Document-method: n_reused
5524
+ * @return [Integer]
5525
+ */
5526
+ rb_define_method(rb_cLlamaPerfContextData, "n_reused", RUBY_METHOD_FUNC(llama_perf_context_data_get_n_reused), 0);
5434
5527
 
5435
5528
  /**
5436
5529
  * Document-class: LlamaCpp::LlamaPerfSamplerData
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LlamaCpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.21.0'
6
+ VERSION = '0.21.2'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b5870'
9
+ LLAMA_CPP_VERSION = 'b6100'
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.21.0
4
+ version: 0.21.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
51
  requirements: []
52
- rubygems_version: 3.6.9
52
+ rubygems_version: 3.7.0
53
53
  specification_version: 4
54
54
  summary: Ruby bindings for the llama.cpp.
55
55
  test_files: []