llama_cpp 0.23.11 → 0.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5fc81dd7e098dace7f301394170fa517ec4d214bc41d76dbbf4b79c162ebff85
4
- data.tar.gz: 8641cedea81065a2d7ced2e8db028bf8209de42312e5b3431749c19f605d5134
3
+ metadata.gz: 1130bd5d4bd478e4aed2e67d836fe66aa0bf166dac85e28557e05814b75d48b2
4
+ data.tar.gz: fb276cff62ba89f3726b526c7efea7d6b76ff4164b3885cd70c07c36ae2a4ec7
5
5
  SHA512:
6
- metadata.gz: 3cb0176fc18bb430ee7d00177a911ebe204f975ef7d2db88a79c696a1e4b3fde2ae74cc9fa34648294a552b4f026238b00572e35c9bb20b892c14e8108286557
7
- data.tar.gz: f1431d0adb6348e78b62e96c96fa017dda979c282a5d5c61a164f287a6a85b3650bb83cf1206c40d0eb3387ddd9c6cab2325d6d2c12824e0ca5a7df4d9576602
6
+ metadata.gz: f45b85cc4dfebd8a0afb0592a1ddee159656a749033cc5abf395f88c19742a705263018b72acb234142d2821b3b5ba2e1a09ff1884347b0a73bea5b1a6b0c3bc
7
+ data.tar.gz: 9b000d84f97eaa7e4f6b775ce03899a60aae832440fb53b28115d576441df330d42bd3761984fe2765c3b9fed60121caeaed7c697288140a58fb193467e9d082
data/CHANGELOG.md CHANGED
@@ -1,3 +1,21 @@
1
+ ## [[0.24.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.1...v0.24.2)] - 2026-03-15
2
+
3
+ - Change supported llama.cpp version to b8340.
4
+ - Add `LLAMA_FTYPE_MOSTLY_NVFP4` constant value.
5
+
6
+ ## [[0.24.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.0...v0.24.1)] - 2026-03-01
7
+
8
+ - Change supported llama.cpp version to b8170.
9
+ - Add `dry_run` accessor to `LlamaModelQuantizeParams`.
10
+
11
+ ## [[0.24.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.23.11...v0.24.0)] - 2026-02-22
12
+
13
+ - Change supported llama.cpp version to b8110.
14
+ - Add `llama_set_adapters_lora` module function to `LlamaCpp`.
15
+ - Remove `llama_set_adapter_lora` module function.
16
+ - Remove `llama_clear_adapter_lora` module function.
17
+ - Remove `llama_rm_adapter_lora` module function.
18
+
1
19
  ## [[0.23.11](https://github.com/yoshoku/llama_cpp.rb/compare/v0.23.10...v0.23.11)] - 2026-01-24
2
20
 
3
21
  - Change supported llama.cpp version to b7790.
@@ -1024,6 +1024,17 @@ static VALUE llama_model_quantize_params_set_keep_split(VALUE self, VALUE keep_s
1024
1024
  return keep_split;
1025
1025
  }
1026
1026
 
1027
+ static VALUE llama_model_quantize_params_get_dry_run(VALUE self) {
1028
+ llama_model_quantize_params* data = get_llama_model_quantize_params(self);
1029
+ return data->dry_run ? Qtrue : Qfalse;
1030
+ }
1031
+
1032
+ static VALUE llama_model_quantize_params_set_dry_run(VALUE self, VALUE dry_run) {
1033
+ llama_model_quantize_params* data = get_llama_model_quantize_params(self);
1034
+ data->dry_run = RTEST(dry_run) ? true : false;
1035
+ return dry_run;
1036
+ }
1037
+
1027
1038
  /* llama_logit_bias */
1028
1039
  static void llama_logit_bias_free(void *ptr) {
1029
1040
  if (ptr) {
@@ -1992,70 +2003,62 @@ static VALUE rb_llama_adapter_meta_count(VALUE self, VALUE adapter) {
1992
2003
  }
1993
2004
 
1994
2005
  /**
1995
- * @overload llama_set_adapter_lora(context, adapter, scale)
2006
+ * @overload llama_set_adapters_lora(context, adapters, scales)
1996
2007
  * @param [LlamaContext] context
1997
- * @param [LlamaAdapterLora] adapter
1998
- * @param [Float] scale
2008
+ * @param [Array<LlamaAdapterLora>] adapters
2009
+ * @param [Array<Float>] scales
1999
2010
  * @return [Integer]
2000
2011
  */
2001
- static VALUE rb_llama_set_adapter_lora(VALUE self, VALUE ctx, VALUE adapter, VALUE scale) {
2012
+ static VALUE rb_llama_set_adapters_lora(VALUE self, VALUE ctx, VALUE adapters, VALUE scales) {
2002
2013
  if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
2003
2014
  rb_raise(rb_eArgError, "ctx must be a LlamaContext");
2004
2015
  return Qnil;
2005
2016
  }
2006
- if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
2007
- rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
2017
+ if (!RB_TYPE_P(adapters, T_ARRAY)) {
2018
+ rb_raise(rb_eArgError, "adapters must be an Array");
2008
2019
  return Qnil;
2009
2020
  }
2010
- if (!RB_FLOAT_TYPE_P(scale)) {
2011
- rb_raise(rb_eArgError, "scale must be a Float");
2021
+ if (!RB_TYPE_P(scales, T_ARRAY)) {
2022
+ rb_raise(rb_eArgError, "scales must be an Array");
2012
2023
  return Qnil;
2013
2024
  }
2014
- llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
2015
- llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
2016
- const int32_t res = llama_set_adapter_lora(context_wrapper->context, adapter_wrapper->adapter, (float)NUM2DBL(scale));
2017
- RB_GC_GUARD(ctx);
2018
- RB_GC_GUARD(adapter);
2019
- return NUM2INT(res);
2020
- }
2021
-
2022
- /**
2023
- * @overload llama_rm_adapter_lora(context, adapter)
2024
- * @param [LlamaContext] context
2025
- * @param [LlamaAdapterLora] adapter
2026
- * @return [Integer]
2027
- */
2028
- static VALUE rb_llama_rm_adapter_lora(VALUE self, VALUE ctx, VALUE adapter) {
2029
- if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
2030
- rb_raise(rb_eArgError, "ctx must be a LlamaContext");
2025
+ long n_adapters = RARRAY_LEN(adapters);
2026
+ long n_scales = RARRAY_LEN(scales);
2027
+ if (n_adapters != n_scales) {
2028
+ rb_raise(rb_eArgError, "adapters and scales must have the same length");
2031
2029
  return Qnil;
2032
2030
  }
2033
- if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
2034
- rb_raise(rb_eArgError, "adapter must be a LlamaAdapterLora");
2035
- return Qnil;
2031
+ for (long i = 0; i < n_adapters; i++) {
2032
+ VALUE adapter = rb_ary_entry(adapters, i);
2033
+ if (!rb_obj_is_kind_of(adapter, rb_cLlamaAdapterLora)) {
2034
+ rb_raise(rb_eArgError, "adapters must be an Array of LlamaAdapterLora");
2035
+ return Qnil;
2036
+ }
2036
2037
  }
2037
- llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
2038
- llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
2039
- const int32_t res = llama_rm_adapter_lora(context_wrapper->context, adapter_wrapper->adapter);
2040
- RB_GC_GUARD(ctx);
2041
- RB_GC_GUARD(adapter);
2042
- return NUM2INT(res);
2043
- }
2044
-
2045
- /**
2046
- * @overload llama_clear_adapter_lora(context)
2047
- * @param [LlamaContext] context
2048
- * @return [NilClass]
2049
- */
2050
- static VALUE rb_llama_clear_adapter_lora(VALUE self, VALUE ctx) {
2051
- if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
2052
- rb_raise(rb_eArgError, "ctx must be a LlamaContext");
2053
- return Qnil;
2038
+ for (long i = 0; i < n_scales; i++) {
2039
+ VALUE scale = rb_ary_entry(scales, i);
2040
+ if (!RB_FLOAT_TYPE_P(scale)) {
2041
+ rb_raise(rb_eArgError, "scales must be an Array of Float");
2042
+ return Qnil;
2043
+ }
2044
+ }
2045
+ struct llama_adapter_lora** adapters_ = ALLOCA_N(struct llama_adapter_lora*, n_adapters);
2046
+ for (long i = 0; i < n_adapters; i++) {
2047
+ VALUE adapter = rb_ary_entry(adapters, i);
2048
+ llama_adapter_lora_wrapper* adapter_wrapper = get_llama_adapter_lora_wrapper(adapter);
2049
+ adapters_[i] = adapter_wrapper->adapter;
2050
+ }
2051
+ float* scales_ = ALLOCA_N(float, n_scales);
2052
+ for (long i = 0; i < n_scales; i++) {
2053
+ VALUE scale = rb_ary_entry(scales, i);
2054
+ scales_[i] = (float)NUM2DBL(scale);
2054
2055
  }
2055
2056
  llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
2056
- llama_clear_adapter_lora(context_wrapper->context);
2057
+ const int32_t res = llama_set_adapters_lora(context_wrapper->context, adapters_, n_adapters, scales_);
2057
2058
  RB_GC_GUARD(ctx);
2058
- return Qnil;
2059
+ RB_GC_GUARD(adapters);
2060
+ RB_GC_GUARD(scales);
2061
+ return NUM2INT(res);
2059
2062
  }
2060
2063
 
2061
2064
  /**
@@ -4192,6 +4195,7 @@ void Init_llama_cpp(void) {
4192
4195
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ1_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ1_0));
4193
4196
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ2_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0));
4194
4197
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_MXFP4_MOE", INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE));
4198
+ rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_NVFP4", INT2NUM(LLAMA_FTYPE_MOSTLY_NVFP4));
4195
4199
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
4196
4200
  /* llama_rope_scaling_type */
4197
4201
  /* Document-const: LlamaCpp::LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED */
@@ -4912,6 +4916,17 @@ void Init_llama_cpp(void) {
4912
4916
  * @return [Boolean]
4913
4917
  */
4914
4918
  rb_define_method(rb_cLlamaModelQuantizeParams, "keep_split=", RUBY_METHOD_FUNC(llama_model_quantize_params_set_keep_split), 1);
4919
+ /**
4920
+ * Document-method: dry_run
4921
+ * @return [Boolean]
4922
+ */
4923
+ rb_define_method(rb_cLlamaModelQuantizeParams, "dry_run", RUBY_METHOD_FUNC(llama_model_quantize_params_get_dry_run), 0);
4924
+ /**
4925
+ * Document-method: dry_run=
4926
+ * @param [Boolean] dry_run
4927
+ * @return [Boolean]
4928
+ */
4929
+ rb_define_method(rb_cLlamaModelQuantizeParams, "dry_run=", RUBY_METHOD_FUNC(llama_model_quantize_params_set_dry_run), 1);
4915
4930
  /* TODO: void* imatrix */
4916
4931
  /* TODO: void* kv_overrides */
4917
4932
  /* TODO: void* tensor_types */
@@ -4987,6 +5002,7 @@ void Init_llama_cpp(void) {
4987
5002
 
4988
5003
  /* TODO: llama_attach_threadpool */
4989
5004
  /* TODO: llama_detach_threadpool */
5005
+ /* TODO: llama_model_init_from_user */
4990
5006
 
4991
5007
  /* llama_model_load_from_file */
4992
5008
  rb_define_module_function(rb_mLlamaCpp, "llama_model_load_from_file", rb_llama_model_load_from_file, 2);
@@ -5159,14 +5175,8 @@ void Init_llama_cpp(void) {
5159
5175
  /* TODO: llama_adapter_meta_key_by_index */
5160
5176
  /* TODO: llama_adapter_meta_val_str_by_index */
5161
5177
 
5162
- /* llama_set_adapter_lora */
5163
- rb_define_module_function(rb_mLlamaCpp, "llama_set_adapter_lora", rb_llama_set_adapter_lora, 3);
5164
-
5165
- /* llama_rm_adapter_lora */
5166
- rb_define_module_function(rb_mLlamaCpp, "llama_rm_adapter_lora", rb_llama_rm_adapter_lora, 2);
5167
-
5168
- /* llama_clear_adapter_lora */
5169
- rb_define_module_function(rb_mLlamaCpp, "llama_clear_adapter_lora", rb_llama_clear_adapter_lora, 1);
5178
+ /* llama_set_adapters_lora */
5179
+ rb_define_module_function(rb_mLlamaCpp, "llama_set_adapters_lora", rb_llama_set_adapters_lora, 3);
5170
5180
 
5171
5181
  /* llama_adapter_lora_free */
5172
5182
  rb_define_module_function(rb_mLlamaCpp, "llama_adapter_lora_free", rb_llama_adapter_lora_free, 1);
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LlamaCpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.23.11'
6
+ VERSION = '0.24.2'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b7790'
9
+ LLAMA_CPP_VERSION = 'b8340'
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.23.11
4
+ version: 0.24.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
@@ -33,7 +33,7 @@ metadata:
33
33
  homepage_uri: https://github.com/yoshoku/llama_cpp.rb
34
34
  source_code_uri: https://github.com/yoshoku/llama_cpp.rb
35
35
  changelog_uri: https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md
36
- documentation_uri: https://gemdocs.org/gems/llama_cpp/0.23.11/
36
+ documentation_uri: https://gemdocs.org/gems/llama_cpp/0.24.2/
37
37
  rubygems_mfa_required: 'true'
38
38
  rdoc_options: []
39
39
  require_paths: