llama_cpp 0.21.0 → 0.21.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/ext/llama_cpp/llama_cpp.c +93 -0
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f3d80e415240f21df0ba7e314118712f6f2d2d8cb052eef41127e7de7ae7e51
|
4
|
+
data.tar.gz: 67fff55bf83ef4d97f24b0c3c450eb493464763c23595fcb5b60c248fd1f551e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f6a129952bc812c130a235743beeb0a5ebcafcbe627d7d13d625e8cf5b143e2cb41cd4d78cfacc98c00a1d0857dc68bb8a6b30d486da5f22e8bcbc6a63ac5489
|
7
|
+
data.tar.gz: c944f2a37727bd3e295dc83d9d9c5c5748a04d7f867aad86895c312a100fa65b7c3fa9f3973bf08adc47c26c2a0e73586f7aa92b92230bee72c86b10dc4693fc
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
## [[0.21.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.1...v0.21.2)] - 2025-08-09
|
2
|
+
|
3
|
+
- Change supported llama.cpp version to b6100.
|
4
|
+
- Add `LLAMA_FTYPE_MOSTLY_MXFP4_MOE` constant.
|
5
|
+
- Add `use_extra_bufts` accessor to `LlamaModelParams`.
|
6
|
+
- Add `llama_model_is_diffusion?` module function.
|
7
|
+
|
8
|
+
## [[0.21.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.0...v0.21.1)] - 2025-07-19
|
9
|
+
|
10
|
+
- Change supported llama.cpp version to b5930.
|
11
|
+
- Add `n_reused` reader to `LlamaPerfContextData`.
|
12
|
+
- Add `llama_vocab_mask` module function.
|
13
|
+
- Add `kv_unified` accessor to `LlamaContextParams`.
|
14
|
+
- Add `LLAMA_VOCAB_TYPE_PLAMO2` constant.
|
15
|
+
|
1
16
|
## [[0.21.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.20.4...v0.21.0)] - 2025-07-12
|
2
17
|
|
3
18
|
- Change supported llama.cpp version to b5870.
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
@@ -530,6 +530,17 @@ static VALUE llama_model_params_set_check_tensors(VALUE self, VALUE check_tensor
|
|
530
530
|
return check_tensors;
|
531
531
|
}
|
532
532
|
|
533
|
+
static VALUE llama_model_params_get_use_extra_bufts(VALUE self) {
|
534
|
+
struct llama_model_params* data = get_llama_model_params(self);
|
535
|
+
return data->use_extra_bufts ? Qtrue : Qfalse;
|
536
|
+
}
|
537
|
+
|
538
|
+
static VALUE llama_model_params_set_use_extra_bufts(VALUE self, VALUE use_extra_bufts) {
|
539
|
+
struct llama_model_params* data = get_llama_model_params(self);
|
540
|
+
data->use_extra_bufts = RTEST(use_extra_bufts) ? true : false;
|
541
|
+
return use_extra_bufts;
|
542
|
+
}
|
543
|
+
|
533
544
|
/* struct llama_context_params */
|
534
545
|
static void llama_context_params_free(void *ptr) {
|
535
546
|
if (ptr) {
|
@@ -838,6 +849,17 @@ static VALUE llama_context_params_set_swa_full(VALUE self, VALUE swa_full) {
|
|
838
849
|
return swa_full;
|
839
850
|
}
|
840
851
|
|
852
|
+
static VALUE llama_context_params_get_kv_unified(VALUE self) {
|
853
|
+
struct llama_context_params* data = get_llama_context_params(self);
|
854
|
+
return data->kv_unified ? Qtrue : Qfalse;
|
855
|
+
}
|
856
|
+
|
857
|
+
static VALUE llama_context_params_set_kv_unified(VALUE self, VALUE kv_unified) {
|
858
|
+
struct llama_context_params* data = get_llama_context_params(self);
|
859
|
+
data->kv_unified = RTEST(kv_unified) ? true : false;
|
860
|
+
return kv_unified;
|
861
|
+
}
|
862
|
+
|
841
863
|
/* llama_model_quantize_params */
|
842
864
|
static void llama_model_quantize_params_free(void *ptr) {
|
843
865
|
if (ptr) {
|
@@ -1763,6 +1785,20 @@ static VALUE rb_llama_model_is_recurrent(VALUE self, VALUE model) {
|
|
1763
1785
|
return llama_model_is_recurrent(model_wrapper->model) ? Qtrue : Qfalse;
|
1764
1786
|
}
|
1765
1787
|
|
1788
|
+
/**
|
1789
|
+
* @overload llama_model_is_diffusion?(model)
|
1790
|
+
* @param [LlamaModel] model
|
1791
|
+
* @return [Boolean]
|
1792
|
+
*/
|
1793
|
+
static VALUE rb_llama_model_is_diffusion(VALUE self, VALUE model) {
|
1794
|
+
if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
|
1795
|
+
rb_raise(rb_eArgError, "model must be a LlamaModel");
|
1796
|
+
return Qnil;
|
1797
|
+
}
|
1798
|
+
llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
|
1799
|
+
return llama_model_is_diffusion(model_wrapper->model) ? Qtrue : Qfalse;
|
1800
|
+
}
|
1801
|
+
|
1766
1802
|
/**
|
1767
1803
|
* @overload llama_model_quantize(fname_inp, fname_out, params)
|
1768
1804
|
* @param [String] fname_inp
|
@@ -2940,6 +2976,22 @@ static VALUE rb_llama_vocab_pad(VALUE self, VALUE vocab) {
|
|
2940
2976
|
return INT2NUM(token);
|
2941
2977
|
}
|
2942
2978
|
|
2979
|
+
/**
|
2980
|
+
* @overload llama_vocab_mask(vocab)
|
2981
|
+
* @param [LlamaVocab] vocab
|
2982
|
+
* @return [Integer]
|
2983
|
+
*/
|
2984
|
+
static VALUE rb_llama_vocab_mask(VALUE self, VALUE vocab) {
|
2985
|
+
if (!rb_obj_is_kind_of(vocab, rb_cLlamaVocab)) {
|
2986
|
+
rb_raise(rb_eArgError, "vocab must be a LlamaVocab");
|
2987
|
+
return Qnil;
|
2988
|
+
}
|
2989
|
+
llama_vocab_wrapper* vocab_wrapper = get_llama_vocab_wrapper(vocab);
|
2990
|
+
const int32_t token = llama_vocab_mask(vocab_wrapper->vocab);
|
2991
|
+
RB_GC_GUARD(vocab);
|
2992
|
+
return INT2NUM(token);
|
2993
|
+
}
|
2994
|
+
|
2943
2995
|
/**
|
2944
2996
|
* @overload llama_vocab_get_add_bos
|
2945
2997
|
* @param [LlamaVocab] vocab
|
@@ -3914,6 +3966,7 @@ static VALUE llama_perf_context_data_alloc(VALUE self) {
|
|
3914
3966
|
data->t_eval_ms = 0.0;
|
3915
3967
|
data->n_p_eval = 0;
|
3916
3968
|
data->n_eval = 0;
|
3969
|
+
data->n_reused = 0;
|
3917
3970
|
return TypedData_Wrap_Struct(self, &llama_perf_context_data_type, data);
|
3918
3971
|
}
|
3919
3972
|
|
@@ -3953,6 +4006,11 @@ static VALUE llama_perf_context_data_get_n_eval(VALUE self) {
|
|
3953
4006
|
return INT2NUM(data->n_eval);
|
3954
4007
|
}
|
3955
4008
|
|
4009
|
+
static VALUE llama_perf_context_data_get_n_reused(VALUE self) {
|
4010
|
+
struct llama_perf_context_data* data = get_llama_perf_context_data(self);
|
4011
|
+
return INT2NUM(data->n_reused);
|
4012
|
+
}
|
4013
|
+
|
3956
4014
|
/* struct llama_perf_sampler_data */
|
3957
4015
|
static void llama_perf_sampler_data_free(void* ptr) {
|
3958
4016
|
if (ptr) {
|
@@ -4154,6 +4212,7 @@ void Init_llama_cpp(void) {
|
|
4154
4212
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
|
4155
4213
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_UGM", INT2NUM(LLAMA_VOCAB_TYPE_UGM));
|
4156
4214
|
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_RWKV", INT2NUM(LLAMA_VOCAB_TYPE_RWKV));
|
4215
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_TYPE_PLAMO2", INT2NUM(LLAMA_VOCAB_TYPE_PLAMO2));
|
4157
4216
|
/* llama_rope_type */
|
4158
4217
|
/* Document-const: LlamaCpp::LLAMA_ROPE_TYPE_NONE */
|
4159
4218
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ROPE_TYPE_NONE", INT2NUM(LLAMA_ROPE_TYPE_NONE));
|
@@ -4217,6 +4276,7 @@ void Init_llama_cpp(void) {
|
|
4217
4276
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_BF16", INT2NUM(LLAMA_FTYPE_MOSTLY_BF16));
|
4218
4277
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ1_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ1_0));
|
4219
4278
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ2_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0));
|
4279
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_MXFP4_MOE", INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE));
|
4220
4280
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
4221
4281
|
/* llama_rope_scaling_type */
|
4222
4282
|
/* Document-const: LlamaCpp::LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED */
|
@@ -4462,6 +4522,17 @@ void Init_llama_cpp(void) {
|
|
4462
4522
|
* @return [Boolean]
|
4463
4523
|
*/
|
4464
4524
|
rb_define_method(rb_cLlamaModelParams, "check_tensors=", RUBY_METHOD_FUNC(llama_model_params_set_check_tensors), 1);
|
4525
|
+
/**
|
4526
|
+
* Document-method: use_extra_bufts
|
4527
|
+
* @return [Boolean]
|
4528
|
+
*/
|
4529
|
+
rb_define_method(rb_cLlamaModelParams, "use_extra_bufts", RUBY_METHOD_FUNC(llama_model_params_get_use_extra_bufts), 0);
|
4530
|
+
/**
|
4531
|
+
* Document-method: use_extra_bufts=
|
4532
|
+
* @param [Boolean] use_extra_bufts
|
4533
|
+
* @return [Boolean]
|
4534
|
+
*/
|
4535
|
+
rb_define_method(rb_cLlamaModelParams, "use_extra_bufts=", RUBY_METHOD_FUNC(llama_model_params_set_use_extra_bufts), 1);
|
4465
4536
|
|
4466
4537
|
/**
|
4467
4538
|
* Document-class: LlamaCpp::LlamaContextParams
|
@@ -4746,6 +4817,17 @@ void Init_llama_cpp(void) {
|
|
4746
4817
|
* @return [Boolean]
|
4747
4818
|
*/
|
4748
4819
|
rb_define_method(rb_cLlamaContextParams, "swa_full=", RUBY_METHOD_FUNC(llama_context_params_set_swa_full), 1);
|
4820
|
+
/**
|
4821
|
+
* Document-method: kv_unified
|
4822
|
+
* @return [Boolean]
|
4823
|
+
*/
|
4824
|
+
rb_define_method(rb_cLlamaContextParams, "kv_unified", RUBY_METHOD_FUNC(llama_context_params_get_kv_unified), 0);
|
4825
|
+
/**
|
4826
|
+
* Document-method: kv_unified=
|
4827
|
+
* @param [Boolean] kv_unified
|
4828
|
+
* @return [Boolean]
|
4829
|
+
*/
|
4830
|
+
rb_define_method(rb_cLlamaContextParams, "kv_unified=", RUBY_METHOD_FUNC(llama_context_params_set_kv_unified), 1);
|
4749
4831
|
/* TODO: ggml_abort_callback abort_callback */
|
4750
4832
|
/* TODO: void* abort_callback_data */
|
4751
4833
|
|
@@ -5061,6 +5143,9 @@ void Init_llama_cpp(void) {
|
|
5061
5143
|
/* llama_model_is_recurrent */
|
5062
5144
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_is_recurrent?", rb_llama_model_is_recurrent, 1);
|
5063
5145
|
|
5146
|
+
/* llama_model_is_diffusion */
|
5147
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_model_is_diffusion?", rb_llama_model_is_diffusion, 1);
|
5148
|
+
|
5064
5149
|
/* llama_model_quantize */
|
5065
5150
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_quantize", rb_llama_model_quantize, 3);
|
5066
5151
|
|
@@ -5248,6 +5333,9 @@ void Init_llama_cpp(void) {
|
|
5248
5333
|
/* llama_vocab_pad */
|
5249
5334
|
rb_define_module_function(rb_mLlamaCpp, "llama_vocab_pad", rb_llama_vocab_pad, 1);
|
5250
5335
|
|
5336
|
+
/* llama_vocab_mask */
|
5337
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_vocab_mask", rb_llama_vocab_mask, 1);
|
5338
|
+
|
5251
5339
|
/* llama_vocab_get_add_bos */
|
5252
5340
|
rb_define_module_function(rb_mLlamaCpp, "llama_vocab_get_add_bos", rb_llama_vocab_get_add_bos, 1);
|
5253
5341
|
|
@@ -5431,6 +5519,11 @@ void Init_llama_cpp(void) {
|
|
5431
5519
|
* @return [Integer]
|
5432
5520
|
*/
|
5433
5521
|
rb_define_method(rb_cLlamaPerfContextData, "n_eval", RUBY_METHOD_FUNC(llama_perf_context_data_get_n_eval), 0);
|
5522
|
+
/**
|
5523
|
+
* Document-method: n_reused
|
5524
|
+
* @return [Integer]
|
5525
|
+
*/
|
5526
|
+
rb_define_method(rb_cLlamaPerfContextData, "n_reused", RUBY_METHOD_FUNC(llama_perf_context_data_get_n_reused), 0);
|
5434
5527
|
|
5435
5528
|
/**
|
5436
5529
|
* Document-class: LlamaCpp::LlamaPerfSamplerData
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LlamaCpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.21.
|
6
|
+
VERSION = '0.21.2'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b6100'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.21.
|
4
|
+
version: 0.21.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
51
|
requirements: []
|
52
|
-
rubygems_version: 3.
|
52
|
+
rubygems_version: 3.7.0
|
53
53
|
specification_version: 4
|
54
54
|
summary: Ruby bindings for the llama.cpp.
|
55
55
|
test_files: []
|