llama_cpp 0.23.11 → 0.24.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/ext/llama_cpp/llama_cpp.c +65 -55
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1130bd5d4bd478e4aed2e67d836fe66aa0bf166dac85e28557e05814b75d48b2
|
|
4
|
+
data.tar.gz: fb276cff62ba89f3726b526c7efea7d6b76ff4164b3885cd70c07c36ae2a4ec7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f45b85cc4dfebd8a0afb0592a1ddee159656a749033cc5abf395f88c19742a705263018b72acb234142d2821b3b5ba2e1a09ff1884347b0a73bea5b1a6b0c3bc
|
|
7
|
+
data.tar.gz: 9b000d84f97eaa7e4f6b775ce03899a60aae832440fb53b28115d576441df330d42bd3761984fe2765c3b9fed60121caeaed7c697288140a58fb193467e9d082
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,21 @@
|
|
|
1
|
+
## [[0.24.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.1...v0.24.2)] - 2026-03-15
|
|
2
|
+
|
|
3
|
+
- Change supported llama.cpp version to b8340.
|
|
4
|
+
- Add `LLAMA_FTYPE_MOSTLY_NVFP4` constant value.
|
|
5
|
+
|
|
6
|
+
## [[0.24.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.0...v0.24.1)] - 2026-03-01
|
|
7
|
+
|
|
8
|
+
- Change supported llama.cpp version to b8170.
|
|
9
|
+
- Add `dry_run` accessor to `LlamaModelQuantizeParams`.
|
|
10
|
+
|
|
11
|
+
## [[0.24.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.23.11...v0.24.0)] - 2026-02-22
|
|
12
|
+
|
|
13
|
+
- Change supported llama.cpp version to b8110.
|
|
14
|
+
- Add `llama_set_adapters_lora` module function to `LlamaCpp`.
|
|
15
|
+
- Remove `llama_set_adapter_lora` module function.
|
|
16
|
+
- Remove `llama_clear_adapter_lora` module function.
|
|
17
|
+
- Remove `llama_rm_adapter_lora` module function.
|
|
18
|
+
|
|
1
19
|
## [[0.23.11](https://github.com/yoshoku/llama_cpp.rb/compare/v0.23.10...v0.23.11)] - 2026-01-24
|
|
2
20
|
|
|
3
21
|
- Change supported llama.cpp version to b7790.
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
|
@@ -1024,6 +1024,17 @@ static VALUE llama_model_quantize_params_set_keep_split(VALUE self, VALUE keep_s
|
|
|
1024
1024
|
return keep_split;
|
|
1025
1025
|
}
|
|
1026
1026
|
|
|
1027
|
+
static VALUE llama_model_quantize_params_get_dry_run(VALUE self) {
|
|
1028
|
+
llama_model_quantize_params* data = get_llama_model_quantize_params(self);
|
|
1029
|
+
return data->dry_run ? Qtrue : Qfalse;
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
static VALUE llama_model_quantize_params_set_dry_run(VALUE self, VALUE dry_run) {
|
|
1033
|
+
llama_model_quantize_params* data = get_llama_model_quantize_params(self);
|
|
1034
|
+
data->dry_run = RTEST(dry_run) ? true : false;
|
|
1035
|
+
return dry_run;
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1027
1038
|
/* llama_logit_bias */
|
|
1028
1039
|
static void llama_logit_bias_free(void *ptr) {
|
|
1029
1040
|
if (ptr) {
|
|
@@ -1992,70 +2003,62 @@ static VALUE rb_llama_adapter_meta_count(VALUE self, VALUE adapter) {
|
|
|
1992
2003
|
}
|
|
1993
2004
|
|
|
1994
2005
|
/**
|
|
1995
|
-
* @overload
|
|
2006
|
+
* @overload llama_set_adapters_lora(context, adapters, scales)
|
|
1996
2007
|
* @param [LlamaContext] context
|
|
1997
|
-
* @param [LlamaAdapterLora]
|
|
1998
|
-
* @param [Float]
|
|
2008
|
+
* @param [Array<LlamaAdapterLora>] adapters
|
|
2009
|
+
* @param [Array<Float>] scales
|
|
1999
2010
|
* @return [Integer]
|
|
2000
2011
|
*/
|
|
2001
|
-
static VALUE
|
|
2012
|
+
static VALUE rb_llama_set_adapters_lora(VALUE self, VALUE ctx, VALUE adapters, VALUE scales) {
|
|
2002
2013
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
|
2003
2014
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
|
2004
2015
|
return Qnil;
|
|
2005
2016
|
}
|
|
2006
|
-
if (!
|
|
2007
|
-
rb_raise(rb_eArgError, "
|
|
2017
|
+
if (!RB_TYPE_P(adapters, T_ARRAY)) {
|
|
2018
|
+
rb_raise(rb_eArgError, "adapters must be an Array");
|
|
2008
2019
|
return Qnil;
|
|
2009
2020
|
}
|
|
2010
|
-
if (!
|
|
2011
|
-
rb_raise(rb_eArgError, "
|
|
2021
|
+
if (!RB_TYPE_P(scales, T_ARRAY)) {
|
|
2022
|
+
rb_raise(rb_eArgError, "scales must be an Array");
|
|
2012
2023
|
return Qnil;
|
|
2013
2024
|
}
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
RB_GC_GUARD(adapter);
|
|
2019
|
-
return NUM2INT(res);
|
|
2020
|
-
}
|
|
2021
|
-
|
|
2022
|
-
/**
|
|
2023
|
-
* @overload llama_rm_adapter_lora(context, adapter)
|
|
2024
|
-
* @param [LlamaContext] context
|
|
2025
|
-
* @param [LlamaAdapterLora] adapter
|
|
2026
|
-
* @return [Integer]
|
|
2027
|
-
*/
|
|
2028
|
-
static VALUE rb_llama_rm_adapter_lora(VALUE self, VALUE ctx, VALUE adapter) {
|
|
2029
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
|
2030
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
|
2025
|
+
long n_adapters = RARRAY_LEN(adapters);
|
|
2026
|
+
long n_scales = RARRAY_LEN(scales);
|
|
2027
|
+
if (n_adapters != n_scales) {
|
|
2028
|
+
rb_raise(rb_eArgError, "adapters and scales must have the same length");
|
|
2031
2029
|
return Qnil;
|
|
2032
2030
|
}
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2031
|
+
for (long i = 0; i < n_adapters; i++) {
|
|
2032
|
+
VALUE adapter = rb_ary_entry(adapters, i);
|
|
2033
|
+
if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
|
|
2034
|
+
rb_raise(rb_eArgError, "adapters must be an Array of LlamaAdapterLora");
|
|
2035
|
+
return Qnil;
|
|
2036
|
+
}
|
|
2036
2037
|
}
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
}
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2038
|
+
for (long i = 0; i < n_scales; i++) {
|
|
2039
|
+
VALUE scale = rb_ary_entry(scales, i);
|
|
2040
|
+
if (!RB_FLOAT_TYPE_P(scale)) {
|
|
2041
|
+
rb_raise(rb_eArgError, "scales must be an Array of Float");
|
|
2042
|
+
return Qnil;
|
|
2043
|
+
}
|
|
2044
|
+
}
|
|
2045
|
+
struct llama_adapter_lora** adapters_ = ALLOCA_N(struct llama_adapter_lora*, n_adapters);
|
|
2046
|
+
for (long i = 0; i < n_adapters; i++) {
|
|
2047
|
+
VALUE adapter = rb_ary_entry(adapters, i);
|
|
2048
|
+
llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
|
|
2049
|
+
adapters_[i] = adapter_wrapper->adapter;
|
|
2050
|
+
}
|
|
2051
|
+
float* scales_ = ALLOCA_N(float, n_scales);
|
|
2052
|
+
for (long i = 0; i < n_scales; i++) {
|
|
2053
|
+
VALUE scale = rb_ary_entry(scales, i);
|
|
2054
|
+
scales_[i] = (float)NUM2DBL(scale);
|
|
2054
2055
|
}
|
|
2055
2056
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
|
2056
|
-
|
|
2057
|
+
const int32_t res = llama_set_adapters_lora(context_wrapper->context, adapters_, n_adapters, scales_);
|
|
2057
2058
|
RB_GC_GUARD(ctx);
|
|
2058
|
-
|
|
2059
|
+
RB_GC_GUARD(adapters);
|
|
2060
|
+
RB_GC_GUARD(scales);
|
|
2061
|
+
return NUM2INT(res);
|
|
2059
2062
|
}
|
|
2060
2063
|
|
|
2061
2064
|
/**
|
|
@@ -4192,6 +4195,7 @@ void Init_llama_cpp(void) {
|
|
|
4192
4195
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ1_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ1_0));
|
|
4193
4196
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ2_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0));
|
|
4194
4197
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_MXFP4_MOE", INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE));
|
|
4198
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_NVFP4", INT2NUM(LLAMA_FTYPE_MOSTLY_NVFP4));
|
|
4195
4199
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
|
4196
4200
|
/* llama_rope_scaling_type */
|
|
4197
4201
|
/* Document-const: LlamaCpp::LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED */
|
|
@@ -4912,6 +4916,17 @@ void Init_llama_cpp(void) {
|
|
|
4912
4916
|
* @return [Boolean]
|
|
4913
4917
|
*/
|
|
4914
4918
|
rb_define_method(rb_cLlamaModelQuantizeParams, "keep_split=", RUBY_METHOD_FUNC(llama_model_quantize_params_set_keep_split), 1);
|
|
4919
|
+
/**
|
|
4920
|
+
* Document-method: dry_run
|
|
4921
|
+
* @return [Boolean]
|
|
4922
|
+
*/
|
|
4923
|
+
rb_define_method(rb_cLlamaModelQuantizeParams, "dry_run", RUBY_METHOD_FUNC(llama_model_quantize_params_get_dry_run), 0);
|
|
4924
|
+
/**
|
|
4925
|
+
* Document-method: dry_run=
|
|
4926
|
+
* @param [Boolean] dry_run
|
|
4927
|
+
* @return [Boolean]
|
|
4928
|
+
*/
|
|
4929
|
+
rb_define_method(rb_cLlamaModelQuantizeParams, "dry_run=", RUBY_METHOD_FUNC(llama_model_quantize_params_set_dry_run), 1);
|
|
4915
4930
|
/* TODO: void* imatrix */
|
|
4916
4931
|
/* TODO: void* kv_overrides */
|
|
4917
4932
|
/* TODO: void* tensor_types */
|
|
@@ -4987,6 +5002,7 @@ void Init_llama_cpp(void) {
|
|
|
4987
5002
|
|
|
4988
5003
|
/* TODO: llama_attach_threadpool */
|
|
4989
5004
|
/* TODO: llama_detach_threadpool */
|
|
5005
|
+
/* TODO: llama_model_init_from_user */
|
|
4990
5006
|
|
|
4991
5007
|
/* llama_model_load_from_file */
|
|
4992
5008
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_load_from_file", rb_llama_model_load_from_file, 2);
|
|
@@ -5159,14 +5175,8 @@ void Init_llama_cpp(void) {
|
|
|
5159
5175
|
/* TODO: llama_adapter_meta_key_by_index */
|
|
5160
5176
|
/* TODO: llama_adapter_meta_val_str_by_index */
|
|
5161
5177
|
|
|
5162
|
-
/*
|
|
5163
|
-
rb_define_module_function(rb_mLlamaCpp, "
|
|
5164
|
-
|
|
5165
|
-
/* llama_rm_adapter_lora */
|
|
5166
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_rm_adapter_lora", rb_llama_rm_adapter_lora, 2);
|
|
5167
|
-
|
|
5168
|
-
/* llama_clear_adapter_lora */
|
|
5169
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_clear_adapter_lora", rb_llama_clear_adapter_lora, 1);
|
|
5178
|
+
/* llama_set_adapters_lora */
|
|
5179
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_set_adapters_lora", rb_llama_set_adapters_lora, 3);
|
|
5170
5180
|
|
|
5171
5181
|
/* llama_adapter_lora_free */
|
|
5172
5182
|
rb_define_module_function(rb_mLlamaCpp, "llama_adapter_lora_free", rb_llama_adapter_lora_free, 1);
|
data/lib/llama_cpp/version.rb
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
|
4
4
|
module LlamaCpp
|
|
5
5
|
# The version of llama_cpp.rb you install.
|
|
6
|
-
VERSION = '0.
|
|
6
|
+
VERSION = '0.24.2'
|
|
7
7
|
|
|
8
8
|
# The supported version of llama.cpp.
|
|
9
|
-
LLAMA_CPP_VERSION = '
|
|
9
|
+
LLAMA_CPP_VERSION = 'b8340'
|
|
10
10
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llama_cpp
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.24.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- yoshoku
|
|
@@ -33,7 +33,7 @@ metadata:
|
|
|
33
33
|
homepage_uri: https://github.com/yoshoku/llama_cpp.rb
|
|
34
34
|
source_code_uri: https://github.com/yoshoku/llama_cpp.rb
|
|
35
35
|
changelog_uri: https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md
|
|
36
|
-
documentation_uri: https://gemdocs.org/gems/llama_cpp/0.
|
|
36
|
+
documentation_uri: https://gemdocs.org/gems/llama_cpp/0.24.2/
|
|
37
37
|
rubygems_mfa_required: 'true'
|
|
38
38
|
rdoc_options: []
|
|
39
39
|
require_paths:
|