llama_cpp 0.24.2 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/ext/llama_cpp/llama_cpp.c +141 -29
- data/lib/llama_cpp/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 953b205d7cedadd2f1db35fc301c6b94b1db87e0121317f6c154c204e09e9d56
|
|
4
|
+
data.tar.gz: bbbc1eef7f7312e667fd238b1e5ef353861beaa445c348595470879eccd12280
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a38097de3f8e5a8acf862bf28db94140efc8f5e31d2be8e9fcbbf461ad79c77614a65acb873d25a706380056295b09cc14f94d90661e4c05e81a7c84a7cc461d
|
|
7
|
+
data.tar.gz: 2225bfdc2526274a6c3b60ec7c6ad0d88c62a291c847d511dd43772b7cc4c978cecbf69dd0fa50d10fafc3a915b95c44d8eebfdb14815777ace34318f1b28066
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
## [[0.25.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.3...v0.25.0)] - 2026-04-25
|
|
2
|
+
|
|
3
|
+
- Change supported llama.cpp version to b8920.
|
|
4
|
+
- Remove `LLAMA_PARAMS_FIT_STATUS_SUCCESS` constant value.
|
|
5
|
+
- Remove `LLAMA_PARAMS_FIT_STATUS_FAILURE` constant value.
|
|
6
|
+
- Remove `LLAMA_PARAMS_FIT_STATUS_ERROR` constant value.
|
|
7
|
+
- Remove `llama_memory_breakdown_print` module function.
|
|
8
|
+
- Add `LLAMA_FTYPE_MOSTLY_Q1_0` constant value.
|
|
9
|
+
- Add `LLAMA_SPLIT_MODE_TENSOR` constant value.
|
|
10
|
+
|
|
11
|
+
## [[0.24.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.2...v0.24.3)] - 2026-04-06
|
|
12
|
+
|
|
13
|
+
- Change supported llama.cpp version to b8640.
|
|
14
|
+
- Add `LlamaModelImatrixData` class to `LlamaCpp`.
|
|
15
|
+
- Add `LlamaModelTensorOverride` class to `LlamaCpp`.
|
|
16
|
+
|
|
1
17
|
## [[0.24.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.1...v0.24.2)] - 2026-03-15
|
|
2
18
|
|
|
3
19
|
- Change supported llama.cpp version to b8340.
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
|
@@ -7,6 +7,8 @@ VALUE rb_cLlamaContext;
|
|
|
7
7
|
VALUE rb_cLlamaModelTensorBuftOverride;
|
|
8
8
|
VALUE rb_cLlamaModelParams;
|
|
9
9
|
VALUE rb_cLlamaContextParams;
|
|
10
|
+
VALUE rb_cLlamaModelTensorOverride;
|
|
11
|
+
VALUE rb_cLlamaModelImatrixData;
|
|
10
12
|
VALUE rb_cLlamaModelQuantizeParams;
|
|
11
13
|
VALUE rb_cLlamaLogitBias;
|
|
12
14
|
VALUE rb_cLlamaAdapterLora;
|
|
@@ -892,6 +894,100 @@ static VALUE llama_context_params_set_kv_unified(VALUE self, VALUE kv_unified) {
|
|
|
892
894
|
return kv_unified;
|
|
893
895
|
}
|
|
894
896
|
|
|
897
|
+
/* struct llama_model_tensor_override */
|
|
898
|
+
static void llama_model_tensor_override_free(void *ptr) {
|
|
899
|
+
if (ptr) {
|
|
900
|
+
ruby_xfree(ptr);
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
static size_t llama_model_tensor_override_size(const void *ptr) {
|
|
905
|
+
return sizeof(*((struct llama_model_tensor_override*)ptr));
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
static rb_data_type_t llama_model_tensor_override_type = {
|
|
909
|
+
"LlamaModelTensorOverride",
|
|
910
|
+
{ NULL,
|
|
911
|
+
llama_model_tensor_override_free,
|
|
912
|
+
llama_model_tensor_override_size },
|
|
913
|
+
NULL,
|
|
914
|
+
NULL,
|
|
915
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
916
|
+
};
|
|
917
|
+
|
|
918
|
+
static VALUE llama_model_tensor_override_alloc(VALUE self) {
|
|
919
|
+
struct llama_model_tensor_override* data = (struct llama_model_tensor_override*)ruby_xmalloc(sizeof(struct llama_model_tensor_override));
|
|
920
|
+
return TypedData_Wrap_Struct(self, &llama_model_tensor_override_type, data);
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
static struct llama_model_tensor_override* get_llama_model_tensor_override(VALUE self) {
|
|
924
|
+
struct llama_model_tensor_override* data = NULL;
|
|
925
|
+
TypedData_Get_Struct(self, struct llama_model_tensor_override, &llama_model_tensor_override_type, data);
|
|
926
|
+
return data;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
static VALUE llama_model_tensor_override_get_pattern(VALUE self) {
|
|
930
|
+
struct llama_model_tensor_override* data = get_llama_model_tensor_override(self);
|
|
931
|
+
const char* pattern = data->pattern;
|
|
932
|
+
return rb_utf8_str_new_cstr(pattern);
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
static VALUE llama_model_tensor_override_get_type(VALUE self) {
|
|
936
|
+
struct llama_model_tensor_override* data = get_llama_model_tensor_override(self);
|
|
937
|
+
return INT2NUM(data->type);
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
/* struct llama_model_imatrix_data */
|
|
941
|
+
static void llama_model_imatrix_data_free(void *ptr) {
|
|
942
|
+
if (ptr) {
|
|
943
|
+
ruby_xfree(ptr);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
static size_t llama_model_imatrix_data_size(const void *ptr) {
|
|
948
|
+
return sizeof(*((struct llama_model_imatrix_data*)ptr));
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
static rb_data_type_t llama_model_imatrix_data_type = {
|
|
952
|
+
"LlamaModelImatrixData",
|
|
953
|
+
{ NULL,
|
|
954
|
+
llama_model_imatrix_data_free,
|
|
955
|
+
llama_model_imatrix_data_size },
|
|
956
|
+
NULL,
|
|
957
|
+
NULL,
|
|
958
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
959
|
+
};
|
|
960
|
+
|
|
961
|
+
static VALUE llama_model_imatrix_data_alloc(VALUE self) {
|
|
962
|
+
struct llama_model_imatrix_data* data = (struct llama_model_imatrix_data*)ruby_xmalloc(sizeof(struct llama_model_imatrix_data));
|
|
963
|
+
return TypedData_Wrap_Struct(self, &llama_model_imatrix_data_type, data);
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
static struct llama_model_imatrix_data* get_llama_model_imatrix_data(VALUE self) {
|
|
967
|
+
struct llama_model_imatrix_data* data = NULL;
|
|
968
|
+
TypedData_Get_Struct(self, struct llama_model_imatrix_data, &llama_model_imatrix_data_type, data);
|
|
969
|
+
return data;
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
static VALUE llama_model_imatrix_data_get_name(VALUE self) {
|
|
973
|
+
struct llama_model_imatrix_data* data = get_llama_model_imatrix_data(self);
|
|
974
|
+
return rb_utf8_str_new_cstr(data->name);
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
static VALUE llama_model_imatrix_data_get_size(VALUE self) {
|
|
978
|
+
struct llama_model_imatrix_data* data = get_llama_model_imatrix_data(self);
|
|
979
|
+
return SIZET2NUM(data->size);
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
static VALUE llama_model_imatrix_data_get_data(VALUE self) {
|
|
983
|
+
struct llama_model_imatrix_data* data = get_llama_model_imatrix_data(self);
|
|
984
|
+
VALUE ary = rb_ary_new2(data->size);
|
|
985
|
+
for (size_t i = 0; i < data->size; i++) {
|
|
986
|
+
rb_ary_store(ary, i, DBL2NUM(data->data[i]));
|
|
987
|
+
}
|
|
988
|
+
return ary;
|
|
989
|
+
}
|
|
990
|
+
|
|
895
991
|
/* llama_model_quantize_params */
|
|
896
992
|
static void llama_model_quantize_params_free(void *ptr) {
|
|
897
993
|
if (ptr) {
|
|
@@ -3279,22 +3375,6 @@ static VALUE rb_llama_sampler_reset(VALUE self, VALUE sampler) {
|
|
|
3279
3375
|
return Qnil;
|
|
3280
3376
|
}
|
|
3281
3377
|
|
|
3282
|
-
/**
|
|
3283
|
-
* @overload llama_memory_breakdown_print(context)
|
|
3284
|
-
* @param [LlamaContext] context
|
|
3285
|
-
* @return [NilClass]
|
|
3286
|
-
*/
|
|
3287
|
-
static VALUE rb_llama_memory_breakdown_print(VALUE self, VALUE ctx) {
|
|
3288
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
|
3289
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
|
3290
|
-
return Qnil;
|
|
3291
|
-
}
|
|
3292
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
|
3293
|
-
llama_memory_breakdown_print(context_wrapper->context);
|
|
3294
|
-
RB_GC_GUARD(ctx);
|
|
3295
|
-
return Qnil;
|
|
3296
|
-
}
|
|
3297
|
-
|
|
3298
3378
|
/**
|
|
3299
3379
|
* @overload llama_sampler_clone(sampler)
|
|
3300
3380
|
* @param [LlamaSampler] sampler
|
|
@@ -4196,6 +4276,7 @@ void Init_llama_cpp(void) {
|
|
|
4196
4276
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ2_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0));
|
|
4197
4277
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_MXFP4_MOE", INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE));
|
|
4198
4278
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_NVFP4", INT2NUM(LLAMA_FTYPE_MOSTLY_NVFP4));
|
|
4279
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_Q1_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q1_0));
|
|
4199
4280
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
|
4200
4281
|
/* llama_rope_scaling_type */
|
|
4201
4282
|
/* Document-const: LlamaCpp::LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED */
|
|
@@ -4228,6 +4309,7 @@ void Init_llama_cpp(void) {
|
|
|
4228
4309
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
|
4229
4310
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
|
4230
4311
|
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
|
4312
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_TENSOR", INT2NUM(LLAMA_SPLIT_MODE_TENSOR));
|
|
4231
4313
|
|
|
4232
4314
|
rb_define_module_function(rb_mLlamaCpp, "llama_flash_attn_type_name", rb_llama_flash_attn_type_name, 1);
|
|
4233
4315
|
|
|
@@ -4811,6 +4893,45 @@ void Init_llama_cpp(void) {
|
|
|
4811
4893
|
/* TODO: ggml_abort_callback abort_callback */
|
|
4812
4894
|
/* TODO: void* abort_callback_data */
|
|
4813
4895
|
|
|
4896
|
+
/**
|
|
4897
|
+
* Document-class: LlamaCpp::LlamaModelTensorOverride
|
|
4898
|
+
* "struct llama_model_tensor_override" wrapper class
|
|
4899
|
+
*/
|
|
4900
|
+
rb_cLlamaModelTensorOverride = rb_define_class_under(rb_mLlamaCpp, "LlamaModelTensorOverride", rb_cObject);
|
|
4901
|
+
rb_define_alloc_func(rb_cLlamaModelTensorOverride, llama_model_tensor_override_alloc);
|
|
4902
|
+
/**
|
|
4903
|
+
* Document-method: pattern
|
|
4904
|
+
* @return [String]
|
|
4905
|
+
*/
|
|
4906
|
+
rb_define_method(rb_cLlamaModelTensorOverride, "pattern", RUBY_METHOD_FUNC(llama_model_tensor_override_get_pattern), 0);
|
|
4907
|
+
/**
|
|
4908
|
+
* Document-method: type
|
|
4909
|
+
* @return [Integer]
|
|
4910
|
+
*/
|
|
4911
|
+
rb_define_method(rb_cLlamaModelTensorOverride, "type", RUBY_METHOD_FUNC(llama_model_tensor_override_get_type), 0);
|
|
4912
|
+
|
|
4913
|
+
/**
|
|
4914
|
+
* Document-class: LlamaCpp::LlamaModelImatrixData
|
|
4915
|
+
* "struct llama_model_i_matrix_data" wrapper class
|
|
4916
|
+
*/
|
|
4917
|
+
rb_cLlamaModelImatrixData = rb_define_class_under(rb_mLlamaCpp, "LlamaModelImatrixData", rb_cObject);
|
|
4918
|
+
rb_define_alloc_func(rb_cLlamaModelImatrixData, llama_model_imatrix_data_alloc);
|
|
4919
|
+
/**
|
|
4920
|
+
* Document-method: name
|
|
4921
|
+
* @return [String]
|
|
4922
|
+
*/
|
|
4923
|
+
rb_define_method(rb_cLlamaModelImatrixData, "name", RUBY_METHOD_FUNC(llama_model_imatrix_data_get_name), 0);
|
|
4924
|
+
/**
|
|
4925
|
+
* Document-method: size
|
|
4926
|
+
* @return [Integer]
|
|
4927
|
+
*/
|
|
4928
|
+
rb_define_method(rb_cLlamaModelImatrixData, "size", RUBY_METHOD_FUNC(llama_model_imatrix_data_get_size), 0);
|
|
4929
|
+
/**
|
|
4930
|
+
* Document-method: data
|
|
4931
|
+
* @return [Array<Float>]
|
|
4932
|
+
*/
|
|
4933
|
+
rb_define_method(rb_cLlamaModelImatrixData, "data", RUBY_METHOD_FUNC(llama_model_imatrix_data_get_data), 0);
|
|
4934
|
+
|
|
4814
4935
|
/**
|
|
4815
4936
|
* Document-class: LlamaCpp::LlamaModelQuantizeParams
|
|
4816
4937
|
* "struct llama_model_quantize_params" wrapper class
|
|
@@ -4927,10 +5048,10 @@ void Init_llama_cpp(void) {
|
|
|
4927
5048
|
* @return [Boolean]
|
|
4928
5049
|
*/
|
|
4929
5050
|
rb_define_method(rb_cLlamaModelQuantizeParams, "dry_run=", RUBY_METHOD_FUNC(llama_model_quantize_params_set_dry_run), 1);
|
|
4930
|
-
/* TODO:
|
|
4931
|
-
/* TODO:
|
|
4932
|
-
/* TODO:
|
|
4933
|
-
/* TODO:
|
|
5051
|
+
/* TODO: const struct llama_model_imatrix_data* imatrix */
|
|
5052
|
+
/* TODO: const struct llama_model_kv_override* kv_overrides */
|
|
5053
|
+
/* TODO: const struct llama_model_tensor_override* tt_overrides */
|
|
5054
|
+
/* TODO: const int32_t* prune_layers */
|
|
4934
5055
|
|
|
4935
5056
|
/**
|
|
4936
5057
|
* Document-class: LlamaCpp::LlamaLogitBias
|
|
@@ -5022,12 +5143,6 @@ void Init_llama_cpp(void) {
|
|
|
5022
5143
|
/* llama_free */
|
|
5023
5144
|
rb_define_module_function(rb_mLlamaCpp, "llama_free", rb_llama_free, 1);
|
|
5024
5145
|
|
|
5025
|
-
/* llama_params_fit_status */
|
|
5026
|
-
/* Document-const: LlamaCpp::LLAMA_PARAMS_FIT_STATUS_SUCCESS */
|
|
5027
|
-
rb_define_const(rb_mLlamaCpp, "LLAMA_PARAMS_FIT_STATUS_SUCCESS", INT2NUM(LLAMA_PARAMS_FIT_STATUS_SUCCESS));
|
|
5028
|
-
rb_define_const(rb_mLlamaCpp, "LLAMA_PARAMS_FIT_STATUS_FAILURE", INT2NUM(LLAMA_PARAMS_FIT_STATUS_FAILURE));
|
|
5029
|
-
rb_define_const(rb_mLlamaCpp, "LLAMA_PARAMS_FIT_STATUS_ERROR", INT2NUM(LLAMA_PARAMS_FIT_STATUS_ERROR));
|
|
5030
|
-
|
|
5031
5146
|
/* TODO: llama_params_fit */
|
|
5032
5147
|
|
|
5033
5148
|
/* llama_time_us */
|
|
@@ -5564,9 +5679,6 @@ void Init_llama_cpp(void) {
|
|
|
5564
5679
|
/* llama_perf_sampler_reset */
|
|
5565
5680
|
rb_define_module_function(rb_mLlamaCpp, "llama_perf_sampler_reset", rb_llama_perf_sampler_reset, 1);
|
|
5566
5681
|
|
|
5567
|
-
/* llama_memory_breakdown_print */
|
|
5568
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_memory_breakdown_print", rb_llama_memory_breakdown_print, 1);
|
|
5569
|
-
|
|
5570
5682
|
/* TODO: typedef bool (*llama_opt_param_filter) */
|
|
5571
5683
|
/* TODO: bool llama_opt_param_filter_all */
|
|
5572
5684
|
/* TODO: struct llama_opt_params */
|
data/lib/llama_cpp/version.rb
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
|
4
4
|
module LlamaCpp
|
|
5
5
|
# The version of llama_cpp.rb you install.
|
|
6
|
-
VERSION = '0.
|
|
6
|
+
VERSION = '0.25.0'
|
|
7
7
|
|
|
8
8
|
# The supported version of llama.cpp.
|
|
9
|
-
LLAMA_CPP_VERSION = '
|
|
9
|
+
LLAMA_CPP_VERSION = 'b8920'
|
|
10
10
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llama_cpp
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.25.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- yoshoku
|
|
@@ -33,7 +33,7 @@ metadata:
|
|
|
33
33
|
homepage_uri: https://github.com/yoshoku/llama_cpp.rb
|
|
34
34
|
source_code_uri: https://github.com/yoshoku/llama_cpp.rb
|
|
35
35
|
changelog_uri: https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md
|
|
36
|
-
documentation_uri: https://gemdocs.org/gems/llama_cpp/0.
|
|
36
|
+
documentation_uri: https://gemdocs.org/gems/llama_cpp/0.25.0/
|
|
37
37
|
rubygems_mfa_required: 'true'
|
|
38
38
|
rdoc_options: []
|
|
39
39
|
require_paths:
|
|
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
49
49
|
- !ruby/object:Gem::Version
|
|
50
50
|
version: '0'
|
|
51
51
|
requirements: []
|
|
52
|
-
rubygems_version: 4.0.
|
|
52
|
+
rubygems_version: 4.0.6
|
|
53
53
|
specification_version: 4
|
|
54
54
|
summary: Ruby bindings for the llama.cpp.
|
|
55
55
|
test_files: []
|