llama_cpp 0.22.1 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/ext/llama_cpp/llama_cpp.c +62 -22
- data/lib/llama_cpp/version.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 839ef41e6f1588768629f776034abce6ea4c668b5de753ea8d9ac5c4c0d93ddd
|
4
|
+
data.tar.gz: c18f5ac34f673247eea8eb63e3e10aed14cd95abe9f8fcb90d7931a32b94482d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1045e721b28f804e6536461f15c0de640fc1201d02c97c8ca807c383e9730d779795fbc745d0404dab51dffa59f90e28a90335dff7513bd1909294e4b3382cd9
|
7
|
+
data.tar.gz: 9c8db351db13d57153c1b939eccf73cd355dae7b7281faaa55e91a41e5762db854e685cd69c25c4c1d9e06a9379dd41d07cb18d1fe82154e9bbf7070617a779e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
## [[0.23.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.23.0...v0.23.1)] - 2025-09-13
|
2
|
+
|
3
|
+
- Change supported llama.cpp version to b6440.
|
4
|
+
- Add `llama_adapter_get_alora_n_invocation_tokens` module function.
|
5
|
+
|
6
|
+
## [[0.23.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.22.1...v0.23.0)] - 2025-09-05
|
7
|
+
|
8
|
+
- Change supported llama.cpp version to b6380.
|
9
|
+
- Add `llama_flash_attn_type_name` module function.
|
10
|
+
- Add `flash_attn_type` accessor to `LlamaContextParams`.
|
11
|
+
- Add `LLAMA_FLASH_ATTN_TYPE_AUTO`, `LLAMA_FLASH_ATTN_TYPE_DISABLED`, and `LLAMA_FLASH_ATTN_TYPE_ENABLED` constants.
|
12
|
+
- Remove `flash_attn` accessor from `LlamaContextParams`.
|
13
|
+
|
1
14
|
## [[0.22.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.22.0...v0.22.1)] - 2025-08-30
|
2
15
|
|
3
16
|
- Change supported llama.cpp version to b6310.
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
@@ -672,6 +672,17 @@ static VALUE llama_context_params_set_attention_type(VALUE self, VALUE attention
|
|
672
672
|
return attention_type;
|
673
673
|
}
|
674
674
|
|
675
|
+
static VALUE llama_context_params_get_flash_attn_type(VALUE self) {
|
676
|
+
struct llama_context_params* data = get_llama_context_params(self);
|
677
|
+
return INT2NUM(data->flash_attn_type);
|
678
|
+
}
|
679
|
+
|
680
|
+
static VALUE llama_context_params_set_flash_attn_type(VALUE self, VALUE flash_attn_type) {
|
681
|
+
struct llama_context_params* data = get_llama_context_params(self);
|
682
|
+
data->flash_attn_type = (enum llama_flash_attn_type)NUM2INT(flash_attn_type);
|
683
|
+
return flash_attn_type;
|
684
|
+
}
|
685
|
+
|
675
686
|
static VALUE llama_context_params_get_rope_freq_base(VALUE self) {
|
676
687
|
struct llama_context_params* data = get_llama_context_params(self);
|
677
688
|
return DBL2NUM(data->rope_freq_base);
|
@@ -804,17 +815,6 @@ static VALUE llama_context_params_set_offload_kqv(VALUE self, VALUE offload_kqv)
|
|
804
815
|
return offload_kqv;
|
805
816
|
}
|
806
817
|
|
807
|
-
static VALUE llama_context_params_get_flash_attn(VALUE self) {
|
808
|
-
struct llama_context_params* data = get_llama_context_params(self);
|
809
|
-
return data->flash_attn ? Qtrue : Qfalse;
|
810
|
-
}
|
811
|
-
|
812
|
-
static VALUE llama_context_params_set_flash_attn(VALUE self, VALUE flash_attn) {
|
813
|
-
struct llama_context_params* data = get_llama_context_params(self);
|
814
|
-
data->flash_attn = RTEST(flash_attn) ? true : false;
|
815
|
-
return flash_attn;
|
816
|
-
}
|
817
|
-
|
818
818
|
static VALUE llama_context_params_get_no_perf(VALUE self) {
|
819
819
|
struct llama_context_params* data = get_llama_context_params(self);
|
820
820
|
return data->no_perf ? Qtrue : Qfalse;
|
@@ -1952,6 +1952,20 @@ static VALUE rb_llama_adapter_lora_free(VALUE self, VALUE adapter) {
|
|
1952
1952
|
return Qnil;
|
1953
1953
|
}
|
1954
1954
|
|
1955
|
+
/**
|
1956
|
+
* @overload llama_adapter_get_alora_n_invocation_tokens(adapter)
|
1957
|
+
* @param [LlamaAdapterLora] adapter
|
1958
|
+
* @return [Integer]
|
1959
|
+
*/
|
1960
|
+
static VALUE rb_llama_adapter_get_alora_n_invocation_tokens(VALUE self, VALUE adapter) {
|
1961
|
+
if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
|
1962
|
+
rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
|
1963
|
+
return Qnil;
|
1964
|
+
}
|
1965
|
+
llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
|
1966
|
+
return UINT2NUM(llama_adapter_get_alora_n_invocation_tokens(adapter_wrapper->adapter));
|
1967
|
+
}
|
1968
|
+
|
1955
1969
|
/* llama_memory_t wrapper */
|
1956
1970
|
typedef struct {
|
1957
1971
|
llama_memory_t memory;
|
@@ -3872,6 +3886,20 @@ static VALUE rb_llama_perf_sampler_reset(VALUE self, VALUE chain) {
|
|
3872
3886
|
return Qnil;
|
3873
3887
|
}
|
3874
3888
|
|
3889
|
+
/**
|
3890
|
+
* @overload llama_flash_attn_type_name(flash_attn_type)
|
3891
|
+
* @param [Integer] flash_attn_type
|
3892
|
+
* @return [String]
|
3893
|
+
*/
|
3894
|
+
static VALUE rb_llama_flash_attn_type_name(VALUE self, VALUE flash_attn_type) {
|
3895
|
+
if (!RB_INTEGER_TYPE_P(flash_attn_type)) {
|
3896
|
+
rb_raise(rb_eArgError, "flash_attn_type must be an Integer");
|
3897
|
+
return Qnil;
|
3898
|
+
}
|
3899
|
+
const char* name = llama_flash_attn_type_name((enum llama_flash_attn_type)NUM2INT(flash_attn_type));
|
3900
|
+
return rb_utf8_str_new_cstr(name);
|
3901
|
+
}
|
3902
|
+
|
3875
3903
|
/* MAIN */
|
3876
3904
|
void Init_llama_cpp(void) {
|
3877
3905
|
char tmp[12];
|
@@ -4019,12 +4047,19 @@ void Init_llama_cpp(void) {
|
|
4019
4047
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
|
4020
4048
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
|
4021
4049
|
rb_define_const(rb_mLlamaCpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
|
4050
|
+
/* llama_flash_attn_type */
|
4051
|
+
/* Document-const: LlamaCpp::LLAMA_FLASH_ATTN_TYPE_AUTO */
|
4052
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_AUTO", INT2NUM(LLAMA_FLASH_ATTN_TYPE_AUTO));
|
4053
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_DISABLED", INT2NUM(LLAMA_FLASH_ATTN_TYPE_DISABLED));
|
4054
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FLASH_ATTN_TYPE_ENABLED", INT2NUM(LLAMA_FLASH_ATTN_TYPE_ENABLED));
|
4022
4055
|
/* llama_split_mode */
|
4023
4056
|
/* Document-const: LlamaCpp::LLAMA_SPLIT_MODE_NONE */
|
4024
4057
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
4025
4058
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
4026
4059
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
4027
4060
|
|
4061
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_flash_attn_type_name", rb_llama_flash_attn_type_name, 1);
|
4062
|
+
|
4028
4063
|
/**
|
4029
4064
|
* Document-class: LlamaCpp::LlamaTokenData
|
4030
4065
|
* "struct llama_token_data" wrapper class
|
@@ -4359,6 +4394,17 @@ void Init_llama_cpp(void) {
|
|
4359
4394
|
* @return [Integer]
|
4360
4395
|
*/
|
4361
4396
|
rb_define_method(rb_cLlamaContextParams, "attention_type=", RUBY_METHOD_FUNC(llama_context_params_set_attention_type), 1);
|
4397
|
+
/**
|
4398
|
+
* Document-method: flash_attn_type
|
4399
|
+
* @return [Integer]
|
4400
|
+
*/
|
4401
|
+
rb_define_method(rb_cLlamaContextParams, "flash_attn_type", RUBY_METHOD_FUNC(llama_context_params_get_flash_attn_type), 0);
|
4402
|
+
/**
|
4403
|
+
* Document-method: flash_attn_type=
|
4404
|
+
* @param [Integer] flash_attn_type
|
4405
|
+
* @return [Integer]
|
4406
|
+
*/
|
4407
|
+
rb_define_method(rb_cLlamaContextParams, "flash_attn_type=", RUBY_METHOD_FUNC(llama_context_params_set_flash_attn_type), 1);
|
4362
4408
|
/**
|
4363
4409
|
* Document-method: rope_freq_base
|
4364
4410
|
* @return [Float]
|
@@ -4493,17 +4539,6 @@ void Init_llama_cpp(void) {
|
|
4493
4539
|
* @return [Boolean]
|
4494
4540
|
*/
|
4495
4541
|
rb_define_method(rb_cLlamaContextParams, "offload_kqv=", RUBY_METHOD_FUNC(llama_context_params_set_offload_kqv), 1);
|
4496
|
-
/**
|
4497
|
-
* Document-method: flash_attn
|
4498
|
-
* @return [Boolean]
|
4499
|
-
*/
|
4500
|
-
rb_define_method(rb_cLlamaContextParams, "flash_attn", RUBY_METHOD_FUNC(llama_context_params_get_flash_attn), 0);
|
4501
|
-
/**
|
4502
|
-
* Document-method: flash_attn=
|
4503
|
-
* @param [Boolean] flash_attn
|
4504
|
-
* @return [Boolean]
|
4505
|
-
*/
|
4506
|
-
rb_define_method(rb_cLlamaContextParams, "flash_attn=", RUBY_METHOD_FUNC(llama_context_params_set_flash_attn), 1);
|
4507
4542
|
/**
|
4508
4543
|
* Document-method: no_perf
|
4509
4544
|
* @return [Boolean]
|
@@ -4889,6 +4924,11 @@ void Init_llama_cpp(void) {
|
|
4889
4924
|
/* llama_adapter_lora_free */
|
4890
4925
|
rb_define_module_function(rb_mLlamaCpp, "llama_adapter_lora_free", rb_llama_adapter_lora_free, 1);
|
4891
4926
|
|
4927
|
+
/* llama_adapter_get_alora_n_invocation_tokens */
|
4928
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_adapter_get_alora_n_invocation_tokens", rb_llama_adapter_get_alora_n_invocation_tokens, 1);
|
4929
|
+
|
4930
|
+
/* TODO: llama_adapter_get_alora_invocation_tokens */
|
4931
|
+
|
4892
4932
|
/* TODO: llama_apply_adapter_cvec */
|
4893
4933
|
|
4894
4934
|
/**
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LlamaCpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.23.1'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b6440'
|
10
10
|
end
|