llama_cpp 0.22.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9d289500d478dbaea942656eeaf4c076dd81fdcbf5fe670c323c5ce431945da
4
- data.tar.gz: 5dff9d66db034b7f275add566e760adf84b10b578ff9a2fb32a684fd17735f8a
3
+ metadata.gz: bdc82f63eb7cca5133f24159dd648fa2722896a3b9cee9cafc11022e28646b5d
4
+ data.tar.gz: 0adad024f89582c57f1e541cb85b4220b171b64c4b12bb2f38d635efed4b6458
5
5
  SHA512:
6
- metadata.gz: 6b3109cee58c80d79ba90aa3ec33e0517c8bb54a1592ff4020ca18e16bf78c0bed5389ac1c5dd1221e757c87c8e3fb1af226c45d5b3d2a5b1ee6bc7afd13e242
7
- data.tar.gz: 139cb66ec6cd2adbd2b178ad7f84581698905870bb549105f9683e14a80d04d8210405762e98937fed685352a4434d89f8fe43d8cbf500b1396d091c7d6366ba
6
+ metadata.gz: b2e9ce298ed0f5d2cb684c4362ed4edadd14af7b3c99ccb989308483b58b81eb659c64b8c91216eb27684f17f36170833a9bb3e3dd4c05ceacf9bc3f0603c159
7
+ data.tar.gz: aee0bcd5b2cecd91baf0473705ba4e43b74325a0a490f3d7a872a47e629248734cc76076dd18f584855db83671ae00896bf8185a046086cfbc9bcc741fc67064
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## [[0.23.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.22.1...v0.23.0)] - 2025-09-05
2
+
3
+ - Change supported llama.cpp version to b6380.
4
+ - Add `llama_flash_attn_type_name` module function.
5
+ - Add `flash_attn_type` accessor to `LlamaContextParams`.
6
+ - Add `LLAMA_FLASH_ATTN_TYPE_AUTO`, `LLAMA_FLASH_ATTN_TYPE_DISABLED`, and `LLAMA_FLASH_ATTN_TYPE_ENABLED` constants.
7
+ - Remove `flash_attn` accessor from `LlamaContextParams`.
8
+
1
9
  ## [[0.22.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.22.0...v0.22.1)] - 2025-08-30
2
10
 
3
11
  - Change supported llama.cpp version to b6310.
@@ -672,6 +672,17 @@ static VALUE llama_context_params_set_attention_type(VALUE self, VALUE attention
672
672
  return attention_type;
673
673
  }
674
674
 
675
+ static VALUE llama_context_params_get_flash_attn_type(VALUE self) {
676
+ struct llama_context_params* data = get_llama_context_params(self);
677
+ return INT2NUM(data->flash_attn_type);
678
+ }
679
+
680
+ static VALUE llama_context_params_set_flash_attn_type(VALUE self, VALUE flash_attn_type) {
681
+ struct llama_context_params* data = get_llama_context_params(self);
682
+ data->flash_attn_type = (enum llama_flash_attn_type)NUM2INT(flash_attn_type);
683
+ return flash_attn_type;
684
+ }
685
+
675
686
  static VALUE llama_context_params_get_rope_freq_base(VALUE self) {
676
687
  struct llama_context_params* data = get_llama_context_params(self);
677
688
  return DBL2NUM(data->rope_freq_base);
@@ -804,17 +815,6 @@ static VALUE llama_context_params_set_offload_kqv(VALUE self, VALUE offload_kqv)
804
815
  return offload_kqv;
805
816
  }
806
817
 
807
- static VALUE llama_context_params_get_flash_attn(VALUE self) {
808
- struct llama_context_params* data = get_llama_context_params(self);
809
- return data->flash_attn ? Qtrue : Qfalse;
810
- }
811
-
812
- static VALUE llama_context_params_set_flash_attn(VALUE self, VALUE flash_attn) {
813
- struct llama_context_params* data = get_llama_context_params(self);
814
- data->flash_attn = RTEST(flash_attn) ? true : false;
815
- return flash_attn;
816
- }
817
-
818
818
  static VALUE llama_context_params_get_no_perf(VALUE self) {
819
819
  struct llama_context_params* data = get_llama_context_params(self);
820
820
  return data->no_perf ? Qtrue : Qfalse;
@@ -3872,6 +3872,20 @@ static VALUE rb_llama_perf_sampler_reset(VALUE self, VALUE chain) {
3872
3872
  return Qnil;
3873
3873
  }
3874
3874
 
3875
+ /**
3876
+ * @overload llama_flash_attn_type_name(flash_attn_type)
3877
+ * @param [Integer] flash_attn_type
3878
+ * @return [String]
3879
+ */
3880
+ static VALUE rb_llama_flash_attn_type_name(VALUE self, VALUE flash_attn_type) {
3881
+ if (!RB_INTEGER_TYPE_P(flash_attn_type)) {
3882
+ rb_raise(rb_eArgError, "flash_attn_type must be an Integer");
3883
+ return Qnil;
3884
+ }
3885
+ const char* name = llama_flash_attn_type_name((enum llama_flash_attn_type)NUM2INT(flash_attn_type));
3886
+ return rb_utf8_str_new_cstr(name);
3887
+ }
3888
+
3875
3889
  /* MAIN */
3876
3890
  void Init_llama_cpp(void) {
3877
3891
  char tmp[12];
@@ -4019,12 +4033,19 @@ void Init_llama_cpp(void) {
4019
4033
  rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
4020
4034
  rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
4021
4035
  rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
4036
+ /* llama_flash_attn_type */
4037
+ /* Document-const: LlamaCpp::LLAMA_FLASH_ATTN_TYPE_AUTO */
4038
+ rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_AUTO", INT2NUM(LLAMA_FLASH_ATTN_TYPE_AUTO));
4039
+ rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_DISABLED", INT2NUM(LLAMA_FLASH_ATTN_TYPE_DISABLED));
4040
+ rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_ENABLED", INT2NUM(LLAMA_FLASH_ATTN_TYPE_ENABLED));
4022
4041
  /* llama_split_mode */
4023
4042
  /* Document-const: LlamaCpp::LLAMA_SPLIT_MODE_NONE */
4024
4043
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
4025
4044
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
4026
4045
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
4027
4046
 
4047
+ rb_define_module_function(rb_mLlamaCpp, "llama_flash_attn_type_name", rb_llama_flash_attn_type_name, 1);
4048
+
4028
4049
  /**
4029
4050
  * Document-class: LlamaCpp::LlamaTokenData
4030
4051
  * "struct llama_token_data" wrapper class
@@ -4359,6 +4380,17 @@ void Init_llama_cpp(void) {
4359
4380
  * @return [Integer]
4360
4381
  */
4361
4382
  rb_define_method(rb_cLlamaContextParams, "attention_type=", RUBY_METHOD_FUNC(llama_context_params_set_attention_type), 1);
4383
+ /**
4384
+ * Document-method: flash_attn_type
4385
+ * @return [Integer]
4386
+ */
4387
+ rb_define_method(rb_cLlamaContextParams, "flash_attn_type", RUBY_METHOD_FUNC(llama_context_params_get_flash_attn_type), 0);
4388
+ /**
4389
+ * Document-method: flash_attn_type=
4390
+ * @param [Integer] flash_attn_type
4391
+ * @return [Integer]
4392
+ */
4393
+ rb_define_method(rb_cLlamaContextParams, "flash_attn_type=", RUBY_METHOD_FUNC(llama_context_params_set_flash_attn_type), 1);
4362
4394
  /**
4363
4395
  * Document-method: rope_freq_base
4364
4396
  * @return [Float]
@@ -4493,17 +4525,6 @@ void Init_llama_cpp(void) {
4493
4525
  * @return [Boolean]
4494
4526
  */
4495
4527
  rb_define_method(rb_cLlamaContextParams, "offload_kqv=", RUBY_METHOD_FUNC(llama_context_params_set_offload_kqv), 1);
4496
- /**
4497
- * Document-method: flash_attn
4498
- * @return [Boolean]
4499
- */
4500
- rb_define_method(rb_cLlamaContextParams, "flash_attn", RUBY_METHOD_FUNC(llama_context_params_get_flash_attn), 0);
4501
- /**
4502
- * Document-method: flash_attn=
4503
- * @param [Boolean] flash_attn
4504
- * @return [Boolean]
4505
- */
4506
- rb_define_method(rb_cLlamaContextParams, "flash_attn=", RUBY_METHOD_FUNC(llama_context_params_set_flash_attn), 1);
4507
4528
  /**
4508
4529
  * Document-method: no_perf
4509
4530
  * @return [Boolean]
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LlamaCpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.22.1'
6
+ VERSION = '0.23.0'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b6310'
9
+ LLAMA_CPP_VERSION = 'b6380'
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.1
4
+ version: 0.23.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku