llama_cpp 0.24.2 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1130bd5d4bd478e4aed2e67d836fe66aa0bf166dac85e28557e05814b75d48b2
4
- data.tar.gz: fb276cff62ba89f3726b526c7efea7d6b76ff4164b3885cd70c07c36ae2a4ec7
3
+ metadata.gz: 953b205d7cedadd2f1db35fc301c6b94b1db87e0121317f6c154c204e09e9d56
4
+ data.tar.gz: bbbc1eef7f7312e667fd238b1e5ef353861beaa445c348595470879eccd12280
5
5
  SHA512:
6
- metadata.gz: f45b85cc4dfebd8a0afb0592a1ddee159656a749033cc5abf395f88c19742a705263018b72acb234142d2821b3b5ba2e1a09ff1884347b0a73bea5b1a6b0c3bc
7
- data.tar.gz: 9b000d84f97eaa7e4f6b775ce03899a60aae832440fb53b28115d576441df330d42bd3761984fe2765c3b9fed60121caeaed7c697288140a58fb193467e9d082
6
+ metadata.gz: a38097de3f8e5a8acf862bf28db94140efc8f5e31d2be8e9fcbbf461ad79c77614a65acb873d25a706380056295b09cc14f94d90661e4c05e81a7c84a7cc461d
7
+ data.tar.gz: 2225bfdc2526274a6c3b60ec7c6ad0d88c62a291c847d511dd43772b7cc4c978cecbf69dd0fa50d10fafc3a915b95c44d8eebfdb14815777ace34318f1b28066
data/CHANGELOG.md CHANGED
@@ -1,3 +1,19 @@
1
+ ## [[0.25.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.3...v0.25.0)] - 2026-04-25
2
+
3
+ - Change supported llama.cpp version to b8920.
4
+ - Remove `LLAMA_PARAMS_FIT_STATUS_SUCCESS` constant value.
5
+ - Remove `LLAMA_PARAMS_FIT_STATUS_FAILURE` constant value.
6
+ - Remove `LLAMA_PARAMS_FIT_STATUS_ERROR` constant value.
7
+ - Remove `llama_memory_breakdown_print` module function.
8
+ - Add `LLAMA_FTYPE_MOSTLY_Q1_0` constant value.
9
+ - Add `LLAMA_SPLIT_MODE_TENSOR` constant value.
10
+
11
+ ## [[0.24.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.2...v0.24.3)] - 2026-04-06
12
+
13
+ - Change supported llama.cpp version to b8640.
14
+ - Add `LlamaModelImatrixData` class to `LlamaCpp`.
15
+ - Add `LlamaModelTensorOverride` class to `LlamaCpp`.
16
+
1
17
  ## [[0.24.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.1...v0.24.2)] - 2026-03-15
2
18
 
3
19
  - Change supported llama.cpp version to b8340.
@@ -7,6 +7,8 @@ VALUE rb_cLlamaContext;
7
7
  VALUE rb_cLlamaModelTensorBuftOverride;
8
8
  VALUE rb_cLlamaModelParams;
9
9
  VALUE rb_cLlamaContextParams;
10
+ VALUE rb_cLlamaModelTensorOverride;
11
+ VALUE rb_cLlamaModelImatrixData;
10
12
  VALUE rb_cLlamaModelQuantizeParams;
11
13
  VALUE rb_cLlamaLogitBias;
12
14
  VALUE rb_cLlamaAdapterLora;
@@ -892,6 +894,100 @@ static VALUE llama_context_params_set_kv_unified(VALUE self, VALUE kv_unified) {
892
894
  return kv_unified;
893
895
  }
894
896
 
897
+ /* struct llama_model_tensor_override */
898
+ static void llama_model_tensor_override_free(void *ptr) {
899
+ if (ptr) {
900
+ ruby_xfree(ptr);
901
+ }
902
+ }
903
+
904
+ static size_t llama_model_tensor_override_size(const void *ptr) {
905
+ return sizeof(*((struct llama_model_tensor_override*)ptr));
906
+ }
907
+
908
+ static rb_data_type_t llama_model_tensor_override_type = {
909
+ "LlamaModelTensorOverride",
910
+ { NULL,
911
+ llama_model_tensor_override_free,
912
+ llama_model_tensor_override_size },
913
+ NULL,
914
+ NULL,
915
+ RUBY_TYPED_FREE_IMMEDIATELY
916
+ };
917
+
918
+ static VALUE llama_model_tensor_override_alloc(VALUE self) {
919
+ struct llama_model_tensor_override* data = (struct llama_model_tensor_override*)ruby_xmalloc(sizeof(struct llama_model_tensor_override));
920
+ return TypedData_Wrap_Struct(self, &llama_model_tensor_override_type, data);
921
+ }
922
+
923
+ static struct llama_model_tensor_override* get_llama_model_tensor_override(VALUE self) {
924
+ struct llama_model_tensor_override* data = NULL;
925
+ TypedData_Get_Struct(self, struct llama_model_tensor_override, &llama_model_tensor_override_type, data);
926
+ return data;
927
+ }
928
+
929
+ static VALUE llama_model_tensor_override_get_pattern(VALUE self) {
930
+ struct llama_model_tensor_override* data = get_llama_model_tensor_override(self);
931
+ const char* pattern = data->pattern;
932
+ return rb_utf8_str_new_cstr(pattern);
933
+ }
934
+
935
+ static VALUE llama_model_tensor_override_get_type(VALUE self) {
936
+ struct llama_model_tensor_override* data = get_llama_model_tensor_override(self);
937
+ return INT2NUM(data->type);
938
+ }
939
+
940
+ /* struct llama_model_imatrix_data */
941
+ static void llama_model_imatrix_data_free(void *ptr) {
942
+ if (ptr) {
943
+ ruby_xfree(ptr);
944
+ }
945
+ }
946
+
947
+ static size_t llama_model_imatrix_data_size(const void *ptr) {
948
+ return sizeof(*((struct llama_model_imatrix_data*)ptr));
949
+ }
950
+
951
+ static rb_data_type_t llama_model_imatrix_data_type = {
952
+ "LlamaModelImatrixData",
953
+ { NULL,
954
+ llama_model_imatrix_data_free,
955
+ llama_model_imatrix_data_size },
956
+ NULL,
957
+ NULL,
958
+ RUBY_TYPED_FREE_IMMEDIATELY
959
+ };
960
+
961
+ static VALUE llama_model_imatrix_data_alloc(VALUE self) {
962
+ struct llama_model_imatrix_data* data = (struct llama_model_imatrix_data*)ruby_xmalloc(sizeof(struct llama_model_imatrix_data));
963
+ return TypedData_Wrap_Struct(self, &llama_model_imatrix_data_type, data);
964
+ }
965
+
966
+ static struct llama_model_imatrix_data* get_llama_model_imatrix_data(VALUE self) {
967
+ struct llama_model_imatrix_data* data = NULL;
968
+ TypedData_Get_Struct(self, struct llama_model_imatrix_data, &llama_model_imatrix_data_type, data);
969
+ return data;
970
+ }
971
+
972
+ static VALUE llama_model_imatrix_data_get_name(VALUE self) {
973
+ struct llama_model_imatrix_data* data = get_llama_model_imatrix_data(self);
974
+ return rb_utf8_str_new_cstr(data->name);
975
+ }
976
+
977
+ static VALUE llama_model_imatrix_data_get_size(VALUE self) {
978
+ struct llama_model_imatrix_data* data = get_llama_model_imatrix_data(self);
979
+ return SIZET2NUM(data->size);
980
+ }
981
+
982
+ static VALUE llama_model_imatrix_data_get_data(VALUE self) {
983
+ struct llama_model_imatrix_data* data = get_llama_model_imatrix_data(self);
984
+ VALUE ary = rb_ary_new2(data->size);
985
+ for (size_t i = 0; i < data->size; i++) {
986
+ rb_ary_store(ary, i, DBL2NUM(data->data[i]));
987
+ }
988
+ return ary;
989
+ }
990
+
895
991
  /* llama_model_quantize_params */
896
992
  static void llama_model_quantize_params_free(void *ptr) {
897
993
  if (ptr) {
@@ -3279,22 +3375,6 @@ static VALUE rb_llama_sampler_reset(VALUE self, VALUE sampler) {
3279
3375
  return Qnil;
3280
3376
  }
3281
3377
 
3282
- /**
3283
- * @overload llama_memory_breakdown_print(context)
3284
- * @param [LlamaContext] context
3285
- * @return [NilClass]
3286
- */
3287
- static VALUE rb_llama_memory_breakdown_print(VALUE self, VALUE ctx) {
3288
- if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
3289
- rb_raise(rb_eArgError, "ctx must be a LlamaContext");
3290
- return Qnil;
3291
- }
3292
- llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
3293
- llama_memory_breakdown_print(context_wrapper->context);
3294
- RB_GC_GUARD(ctx);
3295
- return Qnil;
3296
- }
3297
-
3298
3378
  /**
3299
3379
  * @overload llama_sampler_clone(sampler)
3300
3380
  * @param [LlamaSampler] sampler
@@ -4196,6 +4276,7 @@ void Init_llama_cpp(void) {
4196
4276
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ2_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0));
4197
4277
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_MXFP4_MOE", INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE));
4198
4278
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_NVFP4", INT2NUM(LLAMA_FTYPE_MOSTLY_NVFP4));
4279
+ rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_Q1_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q1_0));
4199
4280
  rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
4200
4281
  /* llama_rope_scaling_type */
4201
4282
  /* Document-const: LlamaCpp::LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED */
@@ -4228,6 +4309,7 @@ void Init_llama_cpp(void) {
4228
4309
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
4229
4310
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
4230
4311
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
4312
+ rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_TENSOR", INT2NUM(LLAMA_SPLIT_MODE_TENSOR));
4231
4313
 
4232
4314
  rb_define_module_function(rb_mLlamaCpp, "llama_flash_attn_type_name", rb_llama_flash_attn_type_name, 1);
4233
4315
 
@@ -4811,6 +4893,45 @@ void Init_llama_cpp(void) {
4811
4893
  /* TODO: ggml_abort_callback abort_callback */
4812
4894
  /* TODO: void* abort_callback_data */
4813
4895
 
4896
+ /**
4897
+ * Document-class: LlamaCpp::LlamaModelTensorOverride
4898
+ * "struct llama_model_tensor_override" wrapper class
4899
+ */
4900
+ rb_cLlamaModelTensorOverride = rb_define_class_under(rb_mLlamaCpp, "LlamaModelTensorOverride", rb_cObject);
4901
+ rb_define_alloc_func(rb_cLlamaModelTensorOverride, llama_model_tensor_override_alloc);
4902
+ /**
4903
+ * Document-method: pattern
4904
+ * @return [String]
4905
+ */
4906
+ rb_define_method(rb_cLlamaModelTensorOverride, "pattern", RUBY_METHOD_FUNC(llama_model_tensor_override_get_pattern), 0);
4907
+ /**
4908
+ * Document-method: type
4909
+ * @return [Integer]
4910
+ */
4911
+ rb_define_method(rb_cLlamaModelTensorOverride, "type", RUBY_METHOD_FUNC(llama_model_tensor_override_get_type), 0);
4912
+
4913
+ /**
4914
+ * Document-class: LlamaCpp::LlamaModelImatrixData
4915
+ * "struct llama_model_i_matrix_data" wrapper class
4916
+ */
4917
+ rb_cLlamaModelImatrixData = rb_define_class_under(rb_mLlamaCpp, "LlamaModelImatrixData", rb_cObject);
4918
+ rb_define_alloc_func(rb_cLlamaModelImatrixData, llama_model_imatrix_data_alloc);
4919
+ /**
4920
+ * Document-method: name
4921
+ * @return [String]
4922
+ */
4923
+ rb_define_method(rb_cLlamaModelImatrixData, "name", RUBY_METHOD_FUNC(llama_model_imatrix_data_get_name), 0);
4924
+ /**
4925
+ * Document-method: size
4926
+ * @return [Integer]
4927
+ */
4928
+ rb_define_method(rb_cLlamaModelImatrixData, "size", RUBY_METHOD_FUNC(llama_model_imatrix_data_get_size), 0);
4929
+ /**
4930
+ * Document-method: data
4931
+ * @return [Array<Float>]
4932
+ */
4933
+ rb_define_method(rb_cLlamaModelImatrixData, "data", RUBY_METHOD_FUNC(llama_model_imatrix_data_get_data), 0);
4934
+
4814
4935
  /**
4815
4936
  * Document-class: LlamaCpp::LlamaModelQuantizeParams
4816
4937
  * "struct llama_model_quantize_params" wrapper class
@@ -4927,10 +5048,10 @@ void Init_llama_cpp(void) {
4927
5048
  * @return [Boolean]
4928
5049
  */
4929
5050
  rb_define_method(rb_cLlamaModelQuantizeParams, "dry_run=", RUBY_METHOD_FUNC(llama_model_quantize_params_set_dry_run), 1);
4930
- /* TODO: void* imatrix */
4931
- /* TODO: void* kv_overrides */
4932
- /* TODO: void* tensor_types */
4933
- /* TODO: void* prune_layers */
5051
+ /* TODO: const struct llama_model_imatrix_data* imatrix */
5052
+ /* TODO: const struct llama_model_kv_override* kv_overrides */
5053
+ /* TODO: const struct llama_model_tensor_override* tt_overrides */
5054
+ /* TODO: const int32_t* prune_layers */
4934
5055
 
4935
5056
  /**
4936
5057
  * Document-class: LlamaCpp::LlamaLogitBias
@@ -5022,12 +5143,6 @@ void Init_llama_cpp(void) {
5022
5143
  /* llama_free */
5023
5144
  rb_define_module_function(rb_mLlamaCpp, "llama_free", rb_llama_free, 1);
5024
5145
 
5025
- /* llama_params_fit_status */
5026
- /* Document-const: LlamaCpp::LLAMA_PARAMS_FIT_STATUS_SUCCESS */
5027
- rb_define_const(rb_mLlamaCpp, "LLAMA_PARAMS_FIT_STATUS_SUCCESS", INT2NUM(LLAMA_PARAMS_FIT_STATUS_SUCCESS));
5028
- rb_define_const(rb_mLlamaCpp, "LLAMA_PARAMS_FIT_STATUS_FAILURE", INT2NUM(LLAMA_PARAMS_FIT_STATUS_FAILURE));
5029
- rb_define_const(rb_mLlamaCpp, "LLAMA_PARAMS_FIT_STATUS_ERROR", INT2NUM(LLAMA_PARAMS_FIT_STATUS_ERROR));
5030
-
5031
5146
  /* TODO: llama_params_fit */
5032
5147
 
5033
5148
  /* llama_time_us */
@@ -5564,9 +5679,6 @@ void Init_llama_cpp(void) {
5564
5679
  /* llama_perf_sampler_reset */
5565
5680
  rb_define_module_function(rb_mLlamaCpp, "llama_perf_sampler_reset", rb_llama_perf_sampler_reset, 1);
5566
5681
 
5567
- /* llama_memory_breakdown_print */
5568
- rb_define_module_function(rb_mLlamaCpp, "llama_memory_breakdown_print", rb_llama_memory_breakdown_print, 1);
5569
-
5570
5682
  /* TODO: typedef bool (*llama_opt_param_filter) */
5571
5683
  /* TODO: bool llama_opt_param_filter_all */
5572
5684
  /* TODO: struct llama_opt_params */
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LlamaCpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.24.2'
6
+ VERSION = '0.25.0'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b8340'
9
+ LLAMA_CPP_VERSION = 'b8920'
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.24.2
4
+ version: 0.25.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
@@ -33,7 +33,7 @@ metadata:
33
33
  homepage_uri: https://github.com/yoshoku/llama_cpp.rb
34
34
  source_code_uri: https://github.com/yoshoku/llama_cpp.rb
35
35
  changelog_uri: https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md
36
- documentation_uri: https://gemdocs.org/gems/llama_cpp/0.24.2/
36
+ documentation_uri: https://gemdocs.org/gems/llama_cpp/0.25.0/
37
37
  rubygems_mfa_required: 'true'
38
38
  rdoc_options: []
39
39
  require_paths:
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
51
  requirements: []
52
- rubygems_version: 4.0.3
52
+ rubygems_version: 4.0.6
53
53
  specification_version: 4
54
54
  summary: Ruby bindings for the llama.cpp.
55
55
  test_files: []