llama_cpp 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/llama_cpp/extconf.rb +3 -11
- data/ext/llama_cpp/llama_cpp.cpp +147 -3
- data/ext/llama_cpp/src/ggml-cuda.cu +288 -92
- data/ext/llama_cpp/src/ggml-impl.h +237 -0
- data/ext/llama_cpp/src/ggml-metal.m +58 -37
- data/ext/llama_cpp/src/ggml-metal.metal +162 -34
- data/ext/llama_cpp/src/{k_quants.c → ggml-quants.c} +3329 -1099
- data/ext/llama_cpp/src/{k_quants.h → ggml-quants.h} +81 -22
- data/ext/llama_cpp/src/ggml.c +939 -3333
- data/ext/llama_cpp/src/ggml.h +25 -4
- data/ext/llama_cpp/src/llama.cpp +1819 -2554
- data/ext/llama_cpp/src/llama.h +32 -12
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +23 -2
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dae7507ce41f18e3fd0fb2d7445275a387a3914068aa9eef922f260de699970a
|
4
|
+
data.tar.gz: d66cc2629aeca3285bc10988f8c410fb8cf5b7f1fe6f835b5dc60e9dcab4be9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e3e92aa38413877620947ec7996494cd720a3c211fcdf1973ce0d7a9a7e8803e293e2ce2f601b11e35858c5b4ef6b00d716069e322ea8d6b4c93412990fd746
|
7
|
+
data.tar.gz: 20a1e9e0e5812da9b00787afbf0f3aa0b762c8168f54ce3b7f2f25ff5b61cca5b2e7ab5faa065fbc3e266468d1c5747b8e0779fc7e073cc66240d1f3085e71c7
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## [[0.9.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.9.0...v0.9.1)] - 2023-11-03
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1429 to b1472
|
4
|
+
- Rename `kv_cahe_tokens_rm` method to `kv_cahce_clear` in Context.
|
5
|
+
- Add `sample_min_p method` to Context.
|
6
|
+
- Add `rope_scaling_type`, `rope_freq_base`, `rope_freq_scale`, `yarn_ext_factor`, `yarn_attn_factor`, `yarn_beta_fast`, `yarn_beta_slow`, and `yarn_orig_ctx` to ContextParams.
|
7
|
+
- Add `pure` to ModelQuantizeParams.
|
8
|
+
- Add contstants for RoPE scaling type.
|
9
|
+
|
1
10
|
## [[0.9.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.8.0...v0.9.0)] - 2023-10-28
|
2
11
|
|
3
12
|
- Fix missing object file for ggml-backend when building with metal and cublas options.
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -5,7 +5,7 @@ require 'fileutils'
|
|
5
5
|
|
6
6
|
abort 'libstdc++ is not found.' unless have_library('stdc++')
|
7
7
|
|
8
|
-
$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c llama.cpp llama_cpp.cpp]
|
8
|
+
$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
10
|
$srcs << 'ggml-mpi.c' if with_config('mpi')
|
11
11
|
$CFLAGS << ' -w -DNDEBUG'
|
@@ -18,12 +18,6 @@ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>',
|
|
18
18
|
$CXXFLAGS << ' -pthread'
|
19
19
|
end
|
20
20
|
|
21
|
-
unless with_config('no_k_quants')
|
22
|
-
$CFLAGS << ' -DGGML_USE_K_QUANTS'
|
23
|
-
$CXXFLAGS << ' -DGGML_USE_K_QUANTS'
|
24
|
-
$srcs << 'k_quants.c'
|
25
|
-
end
|
26
|
-
|
27
21
|
if with_config('qkk_64')
|
28
22
|
$CFLAGS << ' -DGGML_QKK_64'
|
29
23
|
$CXXFLAGS << ' -DGGML_QKK_64'
|
@@ -53,16 +47,14 @@ if with_config('metal')
|
|
53
47
|
$CFLAGS << ' -DGGML_USE_METAL'
|
54
48
|
$CXXFLAGS << ' -DGGML_USE_METAL'
|
55
49
|
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
56
|
-
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
|
57
|
-
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
50
|
+
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-metal.o llama.o llama_cpp.o]
|
58
51
|
end
|
59
52
|
|
60
53
|
if with_config('cublas')
|
61
54
|
$CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
62
55
|
$CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
63
56
|
$LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
|
64
|
-
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
|
65
|
-
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
57
|
+
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-cuda.o llama.o llama_cpp.o]
|
66
58
|
end
|
67
59
|
|
68
60
|
if with_config('clblast')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -796,10 +796,22 @@ public:
|
|
796
796
|
rb_define_method(rb_cLLaMAContextParams, "n_threads", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads), 0);
|
797
797
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch=", RUBY_METHOD_FUNC(_llama_context_params_set_n_threads_batch), 1);
|
798
798
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads_batch), 0);
|
799
|
+
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_scaling_type), 1);
|
800
|
+
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
|
799
801
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
|
800
802
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
|
801
803
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
|
802
804
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
|
805
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_ext_factor), 1);
|
806
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_ext_factor), 0);
|
807
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_attn_factor), 1);
|
808
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_attn_factor), 0);
|
809
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_fast), 1);
|
810
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_fast), 0);
|
811
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_slow), 1);
|
812
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
|
813
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
|
814
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
|
803
815
|
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
804
816
|
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
805
817
|
rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
|
@@ -883,6 +895,18 @@ private:
|
|
883
895
|
return INT2NUM(ptr->params.n_threads_batch);
|
884
896
|
}
|
885
897
|
|
898
|
+
// rope_scaling_type
|
899
|
+
static VALUE _llama_context_params_set_rope_scaling_type(VALUE self, VALUE scaling_type) {
|
900
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
901
|
+
ptr->params.rope_scaling_type = NUM2INT(scaling_type);
|
902
|
+
return INT2NUM(ptr->params.rope_scaling_type);
|
903
|
+
}
|
904
|
+
|
905
|
+
static VALUE _llama_context_params_get_rope_scaling_type(VALUE self) {
|
906
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
907
|
+
return INT2NUM(ptr->params.rope_scaling_type);
|
908
|
+
}
|
909
|
+
|
886
910
|
// rope_freq_base
|
887
911
|
static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
|
888
912
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -907,6 +931,66 @@ private:
|
|
907
931
|
return DBL2NUM(ptr->params.rope_freq_scale);
|
908
932
|
}
|
909
933
|
|
934
|
+
// yarn_ext_factor
|
935
|
+
static VALUE _llama_context_params_set_yarn_ext_factor(VALUE self, VALUE yarn_ext_factor) {
|
936
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
937
|
+
ptr->params.yarn_ext_factor = NUM2DBL(yarn_ext_factor);
|
938
|
+
return DBL2NUM(ptr->params.yarn_ext_factor);
|
939
|
+
}
|
940
|
+
|
941
|
+
static VALUE _llama_context_params_get_yarn_ext_factor(VALUE self) {
|
942
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
943
|
+
return DBL2NUM(ptr->params.yarn_ext_factor);
|
944
|
+
}
|
945
|
+
|
946
|
+
// yarn_attn_factor
|
947
|
+
static VALUE _llama_context_params_set_yarn_attn_factor(VALUE self, VALUE yarn_attn_factor) {
|
948
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
949
|
+
ptr->params.yarn_attn_factor = NUM2DBL(yarn_attn_factor);
|
950
|
+
return DBL2NUM(ptr->params.yarn_attn_factor);
|
951
|
+
}
|
952
|
+
|
953
|
+
static VALUE _llama_context_params_get_yarn_attn_factor(VALUE self) {
|
954
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
955
|
+
return DBL2NUM(ptr->params.yarn_attn_factor);
|
956
|
+
}
|
957
|
+
|
958
|
+
// yarn_beta_fast
|
959
|
+
static VALUE _llama_context_params_set_yarn_beta_fast(VALUE self, VALUE yarn_beta_fast) {
|
960
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
961
|
+
ptr->params.yarn_beta_fast = NUM2DBL(yarn_beta_fast);
|
962
|
+
return DBL2NUM(ptr->params.yarn_beta_fast);
|
963
|
+
}
|
964
|
+
|
965
|
+
static VALUE _llama_context_params_get_yarn_beta_fast(VALUE self) {
|
966
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
967
|
+
return DBL2NUM(ptr->params.yarn_beta_fast);
|
968
|
+
}
|
969
|
+
|
970
|
+
// yarn_beta_slow
|
971
|
+
static VALUE _llama_context_params_set_yarn_beta_slow(VALUE self, VALUE yarn_beta_slow) {
|
972
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
973
|
+
ptr->params.yarn_beta_slow = NUM2DBL(yarn_beta_slow);
|
974
|
+
return DBL2NUM(ptr->params.yarn_beta_slow);
|
975
|
+
}
|
976
|
+
|
977
|
+
static VALUE _llama_context_params_get_yarn_beta_slow(VALUE self) {
|
978
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
979
|
+
return DBL2NUM(ptr->params.yarn_beta_slow);
|
980
|
+
}
|
981
|
+
|
982
|
+
// yarn_orig_ctx
|
983
|
+
static VALUE _llama_context_params_set_yarn_orig_ctx(VALUE self, VALUE yarn_orig_ctx) {
|
984
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
985
|
+
ptr->params.yarn_orig_ctx = NUM2UINT(yarn_orig_ctx);
|
986
|
+
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
987
|
+
}
|
988
|
+
|
989
|
+
static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
|
990
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
991
|
+
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
992
|
+
}
|
993
|
+
|
910
994
|
// mul_mat_q
|
911
995
|
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
912
996
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1011,6 +1095,8 @@ public:
|
|
1011
1095
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
|
1012
1096
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
|
1013
1097
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
|
1098
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "pure=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_pure), 1);
|
1099
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "pure", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_pure), 0);
|
1014
1100
|
}
|
1015
1101
|
|
1016
1102
|
private:
|
@@ -1083,6 +1169,18 @@ private:
|
|
1083
1169
|
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1084
1170
|
return ptr->params.only_copy ? Qtrue : Qfalse;
|
1085
1171
|
}
|
1172
|
+
|
1173
|
+
// pure
|
1174
|
+
static VALUE _llama_model_quantize_params_set_pure(VALUE self, VALUE pure) {
|
1175
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1176
|
+
ptr->params.pure = RTEST(pure) ? true : false;
|
1177
|
+
return ptr->params.pure ? Qtrue : Qfalse;
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
static VALUE _llama_model_quantize_params_get_pure(VALUE self) {
|
1181
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1182
|
+
return ptr->params.pure ? Qtrue : Qfalse;
|
1183
|
+
}
|
1086
1184
|
};
|
1087
1185
|
|
1088
1186
|
const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
|
@@ -1741,7 +1839,7 @@ public:
|
|
1741
1839
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
1742
1840
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
1743
1841
|
rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
|
1744
|
-
rb_define_method(rb_cLLaMAContext, "
|
1842
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_clear", RUBY_METHOD_FUNC(_llama_context_kv_cache_clear), 0);
|
1745
1843
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
|
1746
1844
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
|
1747
1845
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
|
@@ -1754,6 +1852,7 @@ public:
|
|
1754
1852
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
1755
1853
|
rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
|
1756
1854
|
rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
|
1855
|
+
rb_define_method(rb_cLLaMAContext, "sample_min_p", RUBY_METHOD_FUNC(_llama_context_sample_min_p), -1);
|
1757
1856
|
rb_define_method(rb_cLLaMAContext, "sample_tail_free", RUBY_METHOD_FUNC(_llama_context_sample_tail_free), -1);
|
1758
1857
|
rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
|
1759
1858
|
rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
|
@@ -2032,13 +2131,13 @@ private:
|
|
2032
2131
|
return INT2NUM(llama_get_kv_cache_token_count(ptr->ctx));
|
2033
2132
|
}
|
2034
2133
|
|
2035
|
-
static VALUE
|
2134
|
+
static VALUE _llama_context_kv_cache_clear(VALUE self) {
|
2036
2135
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2037
2136
|
if (ptr->ctx == NULL) {
|
2038
2137
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2039
2138
|
return Qnil;
|
2040
2139
|
}
|
2041
|
-
|
2140
|
+
llama_kv_cache_clear(ptr->ctx);
|
2042
2141
|
return Qnil;
|
2043
2142
|
}
|
2044
2143
|
|
@@ -2386,6 +2485,45 @@ private:
|
|
2386
2485
|
return Qnil;
|
2387
2486
|
}
|
2388
2487
|
|
2488
|
+
static VALUE _llama_context_sample_min_p(int argc, VALUE* argv, VALUE self) {
|
2489
|
+
VALUE kw_args = Qnil;
|
2490
|
+
ID kw_table[2] = { rb_intern("prob"), rb_intern("min_keep") };
|
2491
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
2492
|
+
VALUE candidates = Qnil;
|
2493
|
+
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
2494
|
+
rb_get_kwargs(kw_args, kw_table, 1, 1, kw_values);
|
2495
|
+
|
2496
|
+
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
2497
|
+
rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
|
2498
|
+
return Qnil;
|
2499
|
+
}
|
2500
|
+
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
2501
|
+
rb_raise(rb_eArgError, "prob must be a float");
|
2502
|
+
return Qnil;
|
2503
|
+
}
|
2504
|
+
if (kw_values[1] != Qundef && !RB_INTEGER_TYPE_P(kw_values[1])) {
|
2505
|
+
rb_raise(rb_eArgError, "min_keep must be an integer");
|
2506
|
+
return Qnil;
|
2507
|
+
}
|
2508
|
+
|
2509
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
2510
|
+
if (ctx_ptr->ctx == NULL) {
|
2511
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2512
|
+
return Qnil;
|
2513
|
+
}
|
2514
|
+
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
2515
|
+
if (cnd_ptr->array.data == nullptr) {
|
2516
|
+
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
2517
|
+
return Qnil;
|
2518
|
+
}
|
2519
|
+
const float prob = NUM2DBL(kw_values[0]);
|
2520
|
+
const size_t min_keep = kw_values[1] != Qundef ? NUM2SIZET(kw_values[1]) : 1;
|
2521
|
+
|
2522
|
+
llama_sample_min_p(ctx_ptr->ctx, &(cnd_ptr->array), prob, min_keep);
|
2523
|
+
|
2524
|
+
return Qnil;
|
2525
|
+
}
|
2526
|
+
|
2389
2527
|
static VALUE _llama_context_sample_tail_free(int argc, VALUE* argv, VALUE self) {
|
2390
2528
|
VALUE kw_args = Qnil;
|
2391
2529
|
ID kw_table[2] = { rb_intern("z"), rb_intern("min_keep") };
|
@@ -2881,6 +3019,12 @@ extern "C" void Init_llama_cpp(void) {
|
|
2881
3019
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
2882
3020
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
2883
3021
|
|
3022
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_UNSPECIFIED));
|
3023
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_NONE", INT2NUM(LLAMA_ROPE_SCALING_NONE));
|
3024
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_LINEAR));
|
3025
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_YARN", INT2NUM(LLAMA_ROPE_SCALING_YARN));
|
3026
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_MAX_VALUE));
|
3027
|
+
|
2884
3028
|
std::stringstream ss_magic;
|
2885
3029
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
2886
3030
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|