llama_cpp 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/ext/llama_cpp/llama_cpp.c +65 -22
- data/lib/llama_cpp/version.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bdc82f63eb7cca5133f24159dd648fa2722896a3b9cee9cafc11022e28646b5d
|
4
|
+
data.tar.gz: 0adad024f89582c57f1e541cb85b4220b171b64c4b12bb2f38d635efed4b6458
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2e9ce298ed0f5d2cb684c4362ed4edadd14af7b3c99ccb989308483b58b81eb659c64b8c91216eb27684f17f36170833a9bb3e3dd4c05ceacf9bc3f0603c159
|
7
|
+
data.tar.gz: aee0bcd5b2cecd91baf0473705ba4e43b74325a0a490f3d7a872a47e629248734cc76076dd18f584855db83671ae00896bf8185a046086cfbc9bcc741fc67064
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
## [[0.23.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.22.1...v0.23.0)] - 2025-09-05
|
2
|
+
|
3
|
+
- Change supported llama.cpp version to b6380.
|
4
|
+
- Add `llama_flash_attn_type_name` module function.
|
5
|
+
- Add `flash_attn_type` accessor to `LlamaContextParams`.
|
6
|
+
- Add `LLAMA_FLASH_ATTN_TYPE_AUTO`, `LLAMA_FLASH_ATTN_TYPE_DISABLED`, and `LLAMA_FLASH_ATTN_TYPE_ENABLED` constants.
|
7
|
+
- Remove `flash_attn` accessor from `LlamaContextParams`.
|
8
|
+
|
9
|
+
## [[0.22.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.22.0...v0.22.1)] - 2025-08-30
|
10
|
+
|
11
|
+
- Change supported llama.cpp version to b6310.
|
12
|
+
- Add `llama_adapter_meta_count` module function.
|
13
|
+
|
1
14
|
## [[0.22.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.2...v0.22.0)] - 2025-08-23
|
2
15
|
|
3
16
|
- Change supported llama.cpp version to b6240.
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
@@ -672,6 +672,17 @@ static VALUE llama_context_params_set_attention_type(VALUE self, VALUE attention
|
|
672
672
|
return attention_type;
|
673
673
|
}
|
674
674
|
|
675
|
+
static VALUE llama_context_params_get_flash_attn_type(VALUE self) {
|
676
|
+
struct llama_context_params* data = get_llama_context_params(self);
|
677
|
+
return INT2NUM(data->flash_attn_type);
|
678
|
+
}
|
679
|
+
|
680
|
+
static VALUE llama_context_params_set_flash_attn_type(VALUE self, VALUE flash_attn_type) {
|
681
|
+
struct llama_context_params* data = get_llama_context_params(self);
|
682
|
+
data->flash_attn_type = (enum llama_flash_attn_type)NUM2INT(flash_attn_type);
|
683
|
+
return flash_attn_type;
|
684
|
+
}
|
685
|
+
|
675
686
|
static VALUE llama_context_params_get_rope_freq_base(VALUE self) {
|
676
687
|
struct llama_context_params* data = get_llama_context_params(self);
|
677
688
|
return DBL2NUM(data->rope_freq_base);
|
@@ -804,17 +815,6 @@ static VALUE llama_context_params_set_offload_kqv(VALUE self, VALUE offload_kqv)
|
|
804
815
|
return offload_kqv;
|
805
816
|
}
|
806
817
|
|
807
|
-
static VALUE llama_context_params_get_flash_attn(VALUE self) {
|
808
|
-
struct llama_context_params* data = get_llama_context_params(self);
|
809
|
-
return data->flash_attn ? Qtrue : Qfalse;
|
810
|
-
}
|
811
|
-
|
812
|
-
static VALUE llama_context_params_set_flash_attn(VALUE self, VALUE flash_attn) {
|
813
|
-
struct llama_context_params* data = get_llama_context_params(self);
|
814
|
-
data->flash_attn = RTEST(flash_attn) ? true : false;
|
815
|
-
return flash_attn;
|
816
|
-
}
|
817
|
-
|
818
818
|
static VALUE llama_context_params_get_no_perf(VALUE self) {
|
819
819
|
struct llama_context_params* data = get_llama_context_params(self);
|
820
820
|
return data->no_perf ? Qtrue : Qfalse;
|
@@ -1852,6 +1852,20 @@ static VALUE rb_llama_adapter_lora_init(VALUE self, VALUE model, VALUE path_lora
|
|
1852
1852
|
return TypedData_Wrap_Struct(rb_cLlamaAdapterLora, &llama_adapter_lora_wrapper_data_type, adapter_wrapper);
|
1853
1853
|
}
|
1854
1854
|
|
1855
|
+
/**
|
1856
|
+
* @overload llama_adapter_meta_count(adapter)
|
1857
|
+
* @param [LlamaAdapterLora] adapter
|
1858
|
+
* @return [Integer]
|
1859
|
+
*/
|
1860
|
+
static VALUE rb_llama_adapter_meta_count(VALUE self, VALUE adapter) {
|
1861
|
+
if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
|
1862
|
+
rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
|
1863
|
+
return Qnil;
|
1864
|
+
}
|
1865
|
+
llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
|
1866
|
+
return INT2NUM(llama_adapter_meta_count(adapter_wrapper->adapter));
|
1867
|
+
}
|
1868
|
+
|
1855
1869
|
/**
|
1856
1870
|
* @overload llama_set_adapter_lora(context, adapter, scale)
|
1857
1871
|
* @param [LlamaContext] context
|
@@ -3858,6 +3872,20 @@ static VALUE rb_llama_perf_sampler_reset(VALUE self, VALUE chain) {
|
|
3858
3872
|
return Qnil;
|
3859
3873
|
}
|
3860
3874
|
|
3875
|
+
/**
|
3876
|
+
* @overload llama_flash_attn_type_name(flash_attn_type)
|
3877
|
+
* @param [Integer] flash_attn_type
|
3878
|
+
* @return [String]
|
3879
|
+
*/
|
3880
|
+
static VALUE rb_llama_flash_attn_type_name(VALUE self, VALUE flash_attn_type) {
|
3881
|
+
if (!RB_INTEGER_TYPE_P(flash_attn_type)) {
|
3882
|
+
rb_raise(rb_eArgError, "flash_attn_type must be an Integer");
|
3883
|
+
return Qnil;
|
3884
|
+
}
|
3885
|
+
const char* name = llama_flash_attn_type_name((enum llama_flash_attn_type)NUM2INT(flash_attn_type));
|
3886
|
+
return rb_utf8_str_new_cstr(name);
|
3887
|
+
}
|
3888
|
+
|
3861
3889
|
/* MAIN */
|
3862
3890
|
void Init_llama_cpp(void) {
|
3863
3891
|
char tmp[12];
|
@@ -4005,12 +4033,19 @@ void Init_llama_cpp(void) {
|
|
4005
4033
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
|
4006
4034
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
|
4007
4035
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
|
4036
|
+
/* llama_flash_attn_type */
|
4037
|
+
/* Document-const: LlamaCpp::LLAMA_FLASH_ATTN_TYPE_AUTO */
|
4038
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_AUTO", INT2NUM(LLAMA_FLASH_ATTN_TYPE_AUTO));
|
4039
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_DISABLED", INT2NUM(LLAMA_FLASH_ATTN_TYPE_DISABLED));
|
4040
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_ENABLED", INT2NUM(LLAMA_FLASH_ATTN_TYPE_ENABLED));
|
4008
4041
|
/* llama_split_mode */
|
4009
4042
|
/* Document-const: LlamaCpp::LLAMA_SPLIT_MODE_NONE */
|
4010
4043
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
4011
4044
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
4012
4045
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
4013
4046
|
|
4047
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_flash_attn_type_name", rb_llama_flash_attn_type_name, 1);
|
4048
|
+
|
4014
4049
|
/**
|
4015
4050
|
* Document-class: LlamaCpp::LlamaTokenData
|
4016
4051
|
* "struct llama_token_data" wrapper class
|
@@ -4345,6 +4380,17 @@ void Init_llama_cpp(void) {
|
|
4345
4380
|
* @return [Integer]
|
4346
4381
|
*/
|
4347
4382
|
rb_define_method(rb_cLlamaContextParams, "attention_type=", RUBY_METHOD_FUNC(llama_context_params_set_attention_type), 1);
|
4383
|
+
/**
|
4384
|
+
* Document-method: flash_attn_type
|
4385
|
+
* @return [Integer]
|
4386
|
+
*/
|
4387
|
+
rb_define_method(rb_cLlamaContextParams, "flash_attn_type", RUBY_METHOD_FUNC(llama_context_params_get_flash_attn_type), 0);
|
4388
|
+
/**
|
4389
|
+
* Document-method: flash_attn_type=
|
4390
|
+
* @param [Integer] flash_attn_type
|
4391
|
+
* @return [Integer]
|
4392
|
+
*/
|
4393
|
+
rb_define_method(rb_cLlamaContextParams, "flash_attn_type=", RUBY_METHOD_FUNC(llama_context_params_set_flash_attn_type), 1);
|
4348
4394
|
/**
|
4349
4395
|
* Document-method: rope_freq_base
|
4350
4396
|
* @return [Float]
|
@@ -4479,17 +4525,6 @@ void Init_llama_cpp(void) {
|
|
4479
4525
|
* @return [Boolean]
|
4480
4526
|
*/
|
4481
4527
|
rb_define_method(rb_cLlamaContextParams, "offload_kqv=", RUBY_METHOD_FUNC(llama_context_params_set_offload_kqv), 1);
|
4482
|
-
/**
|
4483
|
-
* Document-method: flash_attn
|
4484
|
-
* @return [Boolean]
|
4485
|
-
*/
|
4486
|
-
rb_define_method(rb_cLlamaContextParams, "flash_attn", RUBY_METHOD_FUNC(llama_context_params_get_flash_attn), 0);
|
4487
|
-
/**
|
4488
|
-
* Document-method: flash_attn=
|
4489
|
-
* @param [Boolean] flash_attn
|
4490
|
-
* @return [Boolean]
|
4491
|
-
*/
|
4492
|
-
rb_define_method(rb_cLlamaContextParams, "flash_attn=", RUBY_METHOD_FUNC(llama_context_params_set_flash_attn), 1);
|
4493
4528
|
/**
|
4494
4529
|
* Document-method: no_perf
|
4495
4530
|
* @return [Boolean]
|
@@ -4855,6 +4890,14 @@ void Init_llama_cpp(void) {
|
|
4855
4890
|
/* llama_adapter_lora_init */
|
4856
4891
|
rb_define_module_function(rb_mLlamaCpp, "llama_adapter_lora_init", rb_llama_adapter_lora_init, 2);
|
4857
4892
|
|
4893
|
+
/* TODO: llama_adapter_meta_val_str */
|
4894
|
+
|
4895
|
+
/* llama_adapter_meta_count */
|
4896
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_adapter_meta_count", rb_llama_adapter_meta_count, 1);
|
4897
|
+
|
4898
|
+
/* TODO: llama_adapter_meta_key_by_index */
|
4899
|
+
/* TODO: llama_adapter_meta_val_str_by_index */
|
4900
|
+
|
4858
4901
|
/* llama_set_adapter_lora */
|
4859
4902
|
rb_define_module_function(rb_mLlamaCpp, "llama_set_adapter_lora", rb_llama_set_adapter_lora, 3);
|
4860
4903
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LlamaCpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.23.0'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b6380'
|
10
10
|
end
|