llama_cpp 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/llama_cpp/extconf.rb +3 -11
- data/ext/llama_cpp/llama_cpp.cpp +147 -3
- data/ext/llama_cpp/src/ggml-cuda.cu +288 -92
- data/ext/llama_cpp/src/ggml-impl.h +237 -0
- data/ext/llama_cpp/src/ggml-metal.m +58 -37
- data/ext/llama_cpp/src/ggml-metal.metal +162 -34
- data/ext/llama_cpp/src/{k_quants.c → ggml-quants.c} +3329 -1099
- data/ext/llama_cpp/src/{k_quants.h → ggml-quants.h} +81 -22
- data/ext/llama_cpp/src/ggml.c +939 -3333
- data/ext/llama_cpp/src/ggml.h +25 -4
- data/ext/llama_cpp/src/llama.cpp +1819 -2554
- data/ext/llama_cpp/src/llama.h +32 -12
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +23 -2
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dae7507ce41f18e3fd0fb2d7445275a387a3914068aa9eef922f260de699970a
|
4
|
+
data.tar.gz: d66cc2629aeca3285bc10988f8c410fb8cf5b7f1fe6f835b5dc60e9dcab4be9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e3e92aa38413877620947ec7996494cd720a3c211fcdf1973ce0d7a9a7e8803e293e2ce2f601b11e35858c5b4ef6b00d716069e322ea8d6b4c93412990fd746
|
7
|
+
data.tar.gz: 20a1e9e0e5812da9b00787afbf0f3aa0b762c8168f54ce3b7f2f25ff5b61cca5b2e7ab5faa065fbc3e266468d1c5747b8e0779fc7e073cc66240d1f3085e71c7
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## [[0.9.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.9.0...v0.9.1)] - 2023-11-03
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1429 to b1472
|
4
|
+
- Rename `kv_cahe_tokens_rm` method to `kv_cahce_clear` in Context.
|
5
|
+
- Add `sample_min_p method` to Context.
|
6
|
+
- Add `rope_scaling_type`, `rope_freq_base`, `rope_freq_scale`, `yarn_ext_factor`, `yarn_attn_factor`, `yarn_beta_fast`, `yarn_beta_slow`, and `yarn_orig_ctx` to ContextParams.
|
7
|
+
- Add `pure` to ModelQuantizeParams.
|
8
|
+
- Add contstants for RoPE scaling type.
|
9
|
+
|
1
10
|
## [[0.9.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.8.0...v0.9.0)] - 2023-10-28
|
2
11
|
|
3
12
|
- Fix missing object file for ggml-backend when building with metal and cublas options.
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -5,7 +5,7 @@ require 'fileutils'
|
|
5
5
|
|
6
6
|
abort 'libstdc++ is not found.' unless have_library('stdc++')
|
7
7
|
|
8
|
-
$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c llama.cpp llama_cpp.cpp]
|
8
|
+
$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
10
|
$srcs << 'ggml-mpi.c' if with_config('mpi')
|
11
11
|
$CFLAGS << ' -w -DNDEBUG'
|
@@ -18,12 +18,6 @@ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>',
|
|
18
18
|
$CXXFLAGS << ' -pthread'
|
19
19
|
end
|
20
20
|
|
21
|
-
unless with_config('no_k_quants')
|
22
|
-
$CFLAGS << ' -DGGML_USE_K_QUANTS'
|
23
|
-
$CXXFLAGS << ' -DGGML_USE_K_QUANTS'
|
24
|
-
$srcs << 'k_quants.c'
|
25
|
-
end
|
26
|
-
|
27
21
|
if with_config('qkk_64')
|
28
22
|
$CFLAGS << ' -DGGML_QKK_64'
|
29
23
|
$CXXFLAGS << ' -DGGML_QKK_64'
|
@@ -53,16 +47,14 @@ if with_config('metal')
|
|
53
47
|
$CFLAGS << ' -DGGML_USE_METAL'
|
54
48
|
$CXXFLAGS << ' -DGGML_USE_METAL'
|
55
49
|
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
56
|
-
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
|
57
|
-
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
50
|
+
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-metal.o llama.o llama_cpp.o]
|
58
51
|
end
|
59
52
|
|
60
53
|
if with_config('cublas')
|
61
54
|
$CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
62
55
|
$CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
63
56
|
$LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
|
64
|
-
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
|
65
|
-
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
57
|
+
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-cuda.o llama.o llama_cpp.o]
|
66
58
|
end
|
67
59
|
|
68
60
|
if with_config('clblast')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -796,10 +796,22 @@ public:
|
|
796
796
|
rb_define_method(rb_cLLaMAContextParams, "n_threads", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads), 0);
|
797
797
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch=", RUBY_METHOD_FUNC(_llama_context_params_set_n_threads_batch), 1);
|
798
798
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads_batch), 0);
|
799
|
+
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_scaling_type), 1);
|
800
|
+
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
|
799
801
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
|
800
802
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
|
801
803
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
|
802
804
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
|
805
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_ext_factor), 1);
|
806
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_ext_factor), 0);
|
807
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_attn_factor), 1);
|
808
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_attn_factor), 0);
|
809
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_fast), 1);
|
810
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_fast), 0);
|
811
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_slow), 1);
|
812
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
|
813
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
|
814
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
|
803
815
|
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
804
816
|
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
805
817
|
rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
|
@@ -883,6 +895,18 @@ private:
|
|
883
895
|
return INT2NUM(ptr->params.n_threads_batch);
|
884
896
|
}
|
885
897
|
|
898
|
+
// rope_scaling_type
|
899
|
+
static VALUE _llama_context_params_set_rope_scaling_type(VALUE self, VALUE scaling_type) {
|
900
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
901
|
+
ptr->params.rope_scaling_type = NUM2INT(scaling_type);
|
902
|
+
return INT2NUM(ptr->params.rope_scaling_type);
|
903
|
+
}
|
904
|
+
|
905
|
+
static VALUE _llama_context_params_get_rope_scaling_type(VALUE self) {
|
906
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
907
|
+
return INT2NUM(ptr->params.rope_scaling_type);
|
908
|
+
}
|
909
|
+
|
886
910
|
// rope_freq_base
|
887
911
|
static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
|
888
912
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -907,6 +931,66 @@ private:
|
|
907
931
|
return DBL2NUM(ptr->params.rope_freq_scale);
|
908
932
|
}
|
909
933
|
|
934
|
+
// yarn_ext_factor
|
935
|
+
static VALUE _llama_context_params_set_yarn_ext_factor(VALUE self, VALUE yarn_ext_factor) {
|
936
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
937
|
+
ptr->params.yarn_ext_factor = NUM2DBL(yarn_ext_factor);
|
938
|
+
return DBL2NUM(ptr->params.yarn_ext_factor);
|
939
|
+
}
|
940
|
+
|
941
|
+
static VALUE _llama_context_params_get_yarn_ext_factor(VALUE self) {
|
942
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
943
|
+
return DBL2NUM(ptr->params.yarn_ext_factor);
|
944
|
+
}
|
945
|
+
|
946
|
+
// yarn_attn_factor
|
947
|
+
static VALUE _llama_context_params_set_yarn_attn_factor(VALUE self, VALUE yarn_attn_factor) {
|
948
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
949
|
+
ptr->params.yarn_attn_factor = NUM2DBL(yarn_attn_factor);
|
950
|
+
return DBL2NUM(ptr->params.yarn_attn_factor);
|
951
|
+
}
|
952
|
+
|
953
|
+
static VALUE _llama_context_params_get_yarn_attn_factor(VALUE self) {
|
954
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
955
|
+
return DBL2NUM(ptr->params.yarn_attn_factor);
|
956
|
+
}
|
957
|
+
|
958
|
+
// yarn_beta_fast
|
959
|
+
static VALUE _llama_context_params_set_yarn_beta_fast(VALUE self, VALUE yarn_beta_fast) {
|
960
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
961
|
+
ptr->params.yarn_beta_fast = NUM2DBL(yarn_beta_fast);
|
962
|
+
return DBL2NUM(ptr->params.yarn_beta_fast);
|
963
|
+
}
|
964
|
+
|
965
|
+
static VALUE _llama_context_params_get_yarn_beta_fast(VALUE self) {
|
966
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
967
|
+
return DBL2NUM(ptr->params.yarn_beta_fast);
|
968
|
+
}
|
969
|
+
|
970
|
+
// yarn_beta_slow
|
971
|
+
static VALUE _llama_context_params_set_yarn_beta_slow(VALUE self, VALUE yarn_beta_slow) {
|
972
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
973
|
+
ptr->params.yarn_beta_slow = NUM2DBL(yarn_beta_slow);
|
974
|
+
return DBL2NUM(ptr->params.yarn_beta_slow);
|
975
|
+
}
|
976
|
+
|
977
|
+
static VALUE _llama_context_params_get_yarn_beta_slow(VALUE self) {
|
978
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
979
|
+
return DBL2NUM(ptr->params.yarn_beta_slow);
|
980
|
+
}
|
981
|
+
|
982
|
+
// yarn_orig_ctx
|
983
|
+
static VALUE _llama_context_params_set_yarn_orig_ctx(VALUE self, VALUE yarn_orig_ctx) {
|
984
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
985
|
+
ptr->params.yarn_orig_ctx = NUM2UINT(yarn_orig_ctx);
|
986
|
+
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
987
|
+
}
|
988
|
+
|
989
|
+
static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
|
990
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
991
|
+
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
992
|
+
}
|
993
|
+
|
910
994
|
// mul_mat_q
|
911
995
|
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
912
996
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1011,6 +1095,8 @@ public:
|
|
1011
1095
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
|
1012
1096
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
|
1013
1097
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
|
1098
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "pure=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_pure), 1);
|
1099
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "pure", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_pure), 0);
|
1014
1100
|
}
|
1015
1101
|
|
1016
1102
|
private:
|
@@ -1083,6 +1169,18 @@ private:
|
|
1083
1169
|
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1084
1170
|
return ptr->params.only_copy ? Qtrue : Qfalse;
|
1085
1171
|
}
|
1172
|
+
|
1173
|
+
// pure
|
1174
|
+
static VALUE _llama_model_quantize_params_set_pure(VALUE self, VALUE pure) {
|
1175
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1176
|
+
ptr->params.pure = RTEST(pure) ? true : false;
|
1177
|
+
return ptr->params.pure ? Qtrue : Qfalse;
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
static VALUE _llama_model_quantize_params_get_pure(VALUE self) {
|
1181
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1182
|
+
return ptr->params.pure ? Qtrue : Qfalse;
|
1183
|
+
}
|
1086
1184
|
};
|
1087
1185
|
|
1088
1186
|
const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
|
@@ -1741,7 +1839,7 @@ public:
|
|
1741
1839
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
1742
1840
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
1743
1841
|
rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
|
1744
|
-
rb_define_method(rb_cLLaMAContext, "
|
1842
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_clear", RUBY_METHOD_FUNC(_llama_context_kv_cache_clear), 0);
|
1745
1843
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
|
1746
1844
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
|
1747
1845
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
|
@@ -1754,6 +1852,7 @@ public:
|
|
1754
1852
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
1755
1853
|
rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
|
1756
1854
|
rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
|
1855
|
+
rb_define_method(rb_cLLaMAContext, "sample_min_p", RUBY_METHOD_FUNC(_llama_context_sample_min_p), -1);
|
1757
1856
|
rb_define_method(rb_cLLaMAContext, "sample_tail_free", RUBY_METHOD_FUNC(_llama_context_sample_tail_free), -1);
|
1758
1857
|
rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
|
1759
1858
|
rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
|
@@ -2032,13 +2131,13 @@ private:
|
|
2032
2131
|
return INT2NUM(llama_get_kv_cache_token_count(ptr->ctx));
|
2033
2132
|
}
|
2034
2133
|
|
2035
|
-
static VALUE
|
2134
|
+
static VALUE _llama_context_kv_cache_clear(VALUE self) {
|
2036
2135
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2037
2136
|
if (ptr->ctx == NULL) {
|
2038
2137
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2039
2138
|
return Qnil;
|
2040
2139
|
}
|
2041
|
-
|
2140
|
+
llama_kv_cache_clear(ptr->ctx);
|
2042
2141
|
return Qnil;
|
2043
2142
|
}
|
2044
2143
|
|
@@ -2386,6 +2485,45 @@ private:
|
|
2386
2485
|
return Qnil;
|
2387
2486
|
}
|
2388
2487
|
|
2488
|
+
static VALUE _llama_context_sample_min_p(int argc, VALUE* argv, VALUE self) {
|
2489
|
+
VALUE kw_args = Qnil;
|
2490
|
+
ID kw_table[2] = { rb_intern("prob"), rb_intern("min_keep") };
|
2491
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
2492
|
+
VALUE candidates = Qnil;
|
2493
|
+
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
2494
|
+
rb_get_kwargs(kw_args, kw_table, 1, 1, kw_values);
|
2495
|
+
|
2496
|
+
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
2497
|
+
rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
|
2498
|
+
return Qnil;
|
2499
|
+
}
|
2500
|
+
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
2501
|
+
rb_raise(rb_eArgError, "prob must be a float");
|
2502
|
+
return Qnil;
|
2503
|
+
}
|
2504
|
+
if (kw_values[1] != Qundef && !RB_INTEGER_TYPE_P(kw_values[1])) {
|
2505
|
+
rb_raise(rb_eArgError, "min_keep must be an integer");
|
2506
|
+
return Qnil;
|
2507
|
+
}
|
2508
|
+
|
2509
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
2510
|
+
if (ctx_ptr->ctx == NULL) {
|
2511
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2512
|
+
return Qnil;
|
2513
|
+
}
|
2514
|
+
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
2515
|
+
if (cnd_ptr->array.data == nullptr) {
|
2516
|
+
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
2517
|
+
return Qnil;
|
2518
|
+
}
|
2519
|
+
const float prob = NUM2DBL(kw_values[0]);
|
2520
|
+
const size_t min_keep = kw_values[1] != Qundef ? NUM2SIZET(kw_values[1]) : 1;
|
2521
|
+
|
2522
|
+
llama_sample_min_p(ctx_ptr->ctx, &(cnd_ptr->array), prob, min_keep);
|
2523
|
+
|
2524
|
+
return Qnil;
|
2525
|
+
}
|
2526
|
+
|
2389
2527
|
static VALUE _llama_context_sample_tail_free(int argc, VALUE* argv, VALUE self) {
|
2390
2528
|
VALUE kw_args = Qnil;
|
2391
2529
|
ID kw_table[2] = { rb_intern("z"), rb_intern("min_keep") };
|
@@ -2881,6 +3019,12 @@ extern "C" void Init_llama_cpp(void) {
|
|
2881
3019
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
2882
3020
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
2883
3021
|
|
3022
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_UNSPECIFIED));
|
3023
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_NONE", INT2NUM(LLAMA_ROPE_SCALING_NONE));
|
3024
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_LINEAR));
|
3025
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_YARN", INT2NUM(LLAMA_ROPE_SCALING_YARN));
|
3026
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_MAX_VALUE));
|
3027
|
+
|
2884
3028
|
std::stringstream ss_magic;
|
2885
3029
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
2886
3030
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|