llama_cpp 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/examples/chat.rb +8 -6
- data/ext/llama_cpp/extconf.rb +3 -11
- data/ext/llama_cpp/llama_cpp.cpp +228 -165
- data/ext/llama_cpp/src/ggml-cuda.cu +441 -77
- data/ext/llama_cpp/src/ggml-impl.h +237 -0
- data/ext/llama_cpp/src/ggml-metal.m +71 -42
- data/ext/llama_cpp/src/ggml-metal.metal +171 -35
- data/ext/llama_cpp/src/ggml-opencl.cpp +161 -169
- data/ext/llama_cpp/src/{k_quants.c → ggml-quants.c} +3329 -1099
- data/ext/llama_cpp/src/{k_quants.h → ggml-quants.h} +81 -22
- data/ext/llama_cpp/src/ggml.c +1303 -3419
- data/ext/llama_cpp/src/ggml.h +33 -11
- data/ext/llama_cpp/src/llama.cpp +1925 -2655
- data/ext/llama_cpp/src/llama.h +48 -33
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +4 -4
- data/sig/llama_cpp.rbs +34 -14
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dae7507ce41f18e3fd0fb2d7445275a387a3914068aa9eef922f260de699970a
|
4
|
+
data.tar.gz: d66cc2629aeca3285bc10988f8c410fb8cf5b7f1fe6f835b5dc60e9dcab4be9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e3e92aa38413877620947ec7996494cd720a3c211fcdf1973ce0d7a9a7e8803e293e2ce2f601b11e35858c5b4ef6b00d716069e322ea8d6b4c93412990fd746
|
7
|
+
data.tar.gz: 20a1e9e0e5812da9b00787afbf0f3aa0b762c8168f54ce3b7f2f25ff5b61cca5b2e7ab5faa065fbc3e266468d1c5747b8e0779fc7e073cc66240d1f3085e71c7
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
## [[0.9.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.9.0...v0.9.1)] - 2023-11-03
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1429 to b1472
|
4
|
+
- Rename `kv_cahe_tokens_rm` method to `kv_cahce_clear` in Context.
|
5
|
+
- Add `sample_min_p method` to Context.
|
6
|
+
- Add `rope_scaling_type`, `rope_freq_base`, `rope_freq_scale`, `yarn_ext_factor`, `yarn_attn_factor`, `yarn_beta_fast`, `yarn_beta_slow`, and `yarn_orig_ctx` to ContextParams.
|
7
|
+
- Add `pure` to ModelQuantizeParams.
|
8
|
+
- Add contstants for RoPE scaling type.
|
9
|
+
|
10
|
+
## [[0.9.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.8.0...v0.9.0)] - 2023-10-28
|
11
|
+
|
12
|
+
- Fix missing object file for ggml-backend when building with metal and cublas options.
|
13
|
+
|
14
|
+
**Breaking Changes**
|
15
|
+
- Bump bundled llama.cpp from b1405 to b1429
|
16
|
+
- Move following methods from Context to Model:
|
17
|
+
- text, score, type, token_bos, token_eos, token_nl, token_prefix, token_middle, token_suffix, and token_eos.
|
18
|
+
- Add `sample_repetition_penalties` method, which integrates sample_frequency_and_presence_penalties and sample_repetition_penalty methods.
|
19
|
+
|
1
20
|
## [[0.8.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.7.1...v0.8.0)] - 2023-10-21
|
2
21
|
|
3
22
|
**Breaking Changes**
|
data/examples/chat.rb
CHANGED
@@ -83,10 +83,12 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
83
83
|
candidates = LLaMACpp::TokenDataArray.new(base_candidates)
|
84
84
|
|
85
85
|
last_n_repeat = [last_n_tokens.size, options[:repeat_last_n], n_ctx].min
|
86
|
-
context.
|
87
|
-
|
88
|
-
|
89
|
-
|
86
|
+
context.sample_repetition_penalties(
|
87
|
+
candidates,
|
88
|
+
last_n_tokens[-last_n_repeat..],
|
89
|
+
penalty_repeat: options[:repeat_penalty],
|
90
|
+
penalty_freq: options[:frequency_penalty],
|
91
|
+
penalty_present: options[:presence_penalty]
|
90
92
|
)
|
91
93
|
|
92
94
|
context.sample_top_k(candidates, k: options[:top_k])
|
@@ -99,8 +101,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
99
101
|
last_n_tokens.shift
|
100
102
|
last_n_tokens.push(id)
|
101
103
|
|
102
|
-
if id == context.token_eos
|
103
|
-
id = context.token_nl
|
104
|
+
if id == context.model.token_eos
|
105
|
+
id = context.model.token_nl
|
104
106
|
unless antiprompt.empty?
|
105
107
|
first_antiprompt = context.model.tokenize(text: antiprompt, add_bos: false)
|
106
108
|
embd_input.concat(first_antiprompt)
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -5,7 +5,7 @@ require 'fileutils'
|
|
5
5
|
|
6
6
|
abort 'libstdc++ is not found.' unless have_library('stdc++')
|
7
7
|
|
8
|
-
$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c llama.cpp llama_cpp.cpp]
|
8
|
+
$srcs = %w[ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
10
|
$srcs << 'ggml-mpi.c' if with_config('mpi')
|
11
11
|
$CFLAGS << ' -w -DNDEBUG'
|
@@ -18,12 +18,6 @@ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>',
|
|
18
18
|
$CXXFLAGS << ' -pthread'
|
19
19
|
end
|
20
20
|
|
21
|
-
unless with_config('no_k_quants')
|
22
|
-
$CFLAGS << ' -DGGML_USE_K_QUANTS'
|
23
|
-
$CXXFLAGS << ' -DGGML_USE_K_QUANTS'
|
24
|
-
$srcs << 'k_quants.c'
|
25
|
-
end
|
26
|
-
|
27
21
|
if with_config('qkk_64')
|
28
22
|
$CFLAGS << ' -DGGML_QKK_64'
|
29
23
|
$CXXFLAGS << ' -DGGML_QKK_64'
|
@@ -53,16 +47,14 @@ if with_config('metal')
|
|
53
47
|
$CFLAGS << ' -DGGML_USE_METAL'
|
54
48
|
$CXXFLAGS << ' -DGGML_USE_METAL'
|
55
49
|
$LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
56
|
-
$objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
|
57
|
-
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
50
|
+
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-metal.o llama.o llama_cpp.o]
|
58
51
|
end
|
59
52
|
|
60
53
|
if with_config('cublas')
|
61
54
|
$CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
62
55
|
$CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
|
63
56
|
$LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
|
64
|
-
$objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
|
65
|
-
$objs << 'k_quants.o' unless with_config('no_k_quants')
|
57
|
+
$objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-quants.o ggml-cuda.o llama.o llama_cpp.o]
|
66
58
|
end
|
67
59
|
|
68
60
|
if with_config('clblast')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -796,10 +796,22 @@ public:
|
|
796
796
|
rb_define_method(rb_cLLaMAContextParams, "n_threads", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads), 0);
|
797
797
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch=", RUBY_METHOD_FUNC(_llama_context_params_set_n_threads_batch), 1);
|
798
798
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads_batch), 0);
|
799
|
+
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_scaling_type), 1);
|
800
|
+
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
|
799
801
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
|
800
802
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
|
801
803
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
|
802
804
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
|
805
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_ext_factor), 1);
|
806
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_ext_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_ext_factor), 0);
|
807
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_attn_factor), 1);
|
808
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_attn_factor", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_attn_factor), 0);
|
809
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_fast), 1);
|
810
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_fast", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_fast), 0);
|
811
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_beta_slow), 1);
|
812
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_beta_slow", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_beta_slow), 0);
|
813
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_yarn_orig_ctx), 1);
|
814
|
+
rb_define_method(rb_cLLaMAContextParams, "yarn_orig_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_yarn_orig_ctx), 0);
|
803
815
|
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
804
816
|
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
805
817
|
rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
|
@@ -883,6 +895,18 @@ private:
|
|
883
895
|
return INT2NUM(ptr->params.n_threads_batch);
|
884
896
|
}
|
885
897
|
|
898
|
+
// rope_scaling_type
|
899
|
+
static VALUE _llama_context_params_set_rope_scaling_type(VALUE self, VALUE scaling_type) {
|
900
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
901
|
+
ptr->params.rope_scaling_type = NUM2INT(scaling_type);
|
902
|
+
return INT2NUM(ptr->params.rope_scaling_type);
|
903
|
+
}
|
904
|
+
|
905
|
+
static VALUE _llama_context_params_get_rope_scaling_type(VALUE self) {
|
906
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
907
|
+
return INT2NUM(ptr->params.rope_scaling_type);
|
908
|
+
}
|
909
|
+
|
886
910
|
// rope_freq_base
|
887
911
|
static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
|
888
912
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -907,6 +931,66 @@ private:
|
|
907
931
|
return DBL2NUM(ptr->params.rope_freq_scale);
|
908
932
|
}
|
909
933
|
|
934
|
+
// yarn_ext_factor
|
935
|
+
static VALUE _llama_context_params_set_yarn_ext_factor(VALUE self, VALUE yarn_ext_factor) {
|
936
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
937
|
+
ptr->params.yarn_ext_factor = NUM2DBL(yarn_ext_factor);
|
938
|
+
return DBL2NUM(ptr->params.yarn_ext_factor);
|
939
|
+
}
|
940
|
+
|
941
|
+
static VALUE _llama_context_params_get_yarn_ext_factor(VALUE self) {
|
942
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
943
|
+
return DBL2NUM(ptr->params.yarn_ext_factor);
|
944
|
+
}
|
945
|
+
|
946
|
+
// yarn_attn_factor
|
947
|
+
static VALUE _llama_context_params_set_yarn_attn_factor(VALUE self, VALUE yarn_attn_factor) {
|
948
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
949
|
+
ptr->params.yarn_attn_factor = NUM2DBL(yarn_attn_factor);
|
950
|
+
return DBL2NUM(ptr->params.yarn_attn_factor);
|
951
|
+
}
|
952
|
+
|
953
|
+
static VALUE _llama_context_params_get_yarn_attn_factor(VALUE self) {
|
954
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
955
|
+
return DBL2NUM(ptr->params.yarn_attn_factor);
|
956
|
+
}
|
957
|
+
|
958
|
+
// yarn_beta_fast
|
959
|
+
static VALUE _llama_context_params_set_yarn_beta_fast(VALUE self, VALUE yarn_beta_fast) {
|
960
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
961
|
+
ptr->params.yarn_beta_fast = NUM2DBL(yarn_beta_fast);
|
962
|
+
return DBL2NUM(ptr->params.yarn_beta_fast);
|
963
|
+
}
|
964
|
+
|
965
|
+
static VALUE _llama_context_params_get_yarn_beta_fast(VALUE self) {
|
966
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
967
|
+
return DBL2NUM(ptr->params.yarn_beta_fast);
|
968
|
+
}
|
969
|
+
|
970
|
+
// yarn_beta_slow
|
971
|
+
static VALUE _llama_context_params_set_yarn_beta_slow(VALUE self, VALUE yarn_beta_slow) {
|
972
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
973
|
+
ptr->params.yarn_beta_slow = NUM2DBL(yarn_beta_slow);
|
974
|
+
return DBL2NUM(ptr->params.yarn_beta_slow);
|
975
|
+
}
|
976
|
+
|
977
|
+
static VALUE _llama_context_params_get_yarn_beta_slow(VALUE self) {
|
978
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
979
|
+
return DBL2NUM(ptr->params.yarn_beta_slow);
|
980
|
+
}
|
981
|
+
|
982
|
+
// yarn_orig_ctx
|
983
|
+
static VALUE _llama_context_params_set_yarn_orig_ctx(VALUE self, VALUE yarn_orig_ctx) {
|
984
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
985
|
+
ptr->params.yarn_orig_ctx = NUM2UINT(yarn_orig_ctx);
|
986
|
+
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
987
|
+
}
|
988
|
+
|
989
|
+
static VALUE _llama_context_params_get_yarn_orig_ctx(VALUE self) {
|
990
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
991
|
+
return UINT2NUM(ptr->params.yarn_orig_ctx);
|
992
|
+
}
|
993
|
+
|
910
994
|
// mul_mat_q
|
911
995
|
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
912
996
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1011,6 +1095,8 @@ public:
|
|
1011
1095
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
|
1012
1096
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_only_copy), 1);
|
1013
1097
|
rb_define_method(rb_cLLaMAModelQuantizeParams, "only_copy", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_only_copy), 0);
|
1098
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "pure=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_pure), 1);
|
1099
|
+
rb_define_method(rb_cLLaMAModelQuantizeParams, "pure", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_pure), 0);
|
1014
1100
|
}
|
1015
1101
|
|
1016
1102
|
private:
|
@@ -1083,6 +1169,18 @@ private:
|
|
1083
1169
|
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1084
1170
|
return ptr->params.only_copy ? Qtrue : Qfalse;
|
1085
1171
|
}
|
1172
|
+
|
1173
|
+
// pure
|
1174
|
+
static VALUE _llama_model_quantize_params_set_pure(VALUE self, VALUE pure) {
|
1175
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1176
|
+
ptr->params.pure = RTEST(pure) ? true : false;
|
1177
|
+
return ptr->params.pure ? Qtrue : Qfalse;
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
static VALUE _llama_model_quantize_params_get_pure(VALUE self) {
|
1181
|
+
LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
|
1182
|
+
return ptr->params.pure ? Qtrue : Qfalse;
|
1183
|
+
}
|
1086
1184
|
};
|
1087
1185
|
|
1088
1186
|
const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
|
@@ -1148,6 +1246,16 @@ public:
|
|
1148
1246
|
rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
|
1149
1247
|
rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
|
1150
1248
|
rb_define_method(rb_cLLaMAModel, "n_params", RUBY_METHOD_FUNC(_llama_model_get_model_n_params), 0);
|
1249
|
+
rb_define_method(rb_cLLaMAModel, "text", RUBY_METHOD_FUNC(_llama_model_get_text), 1);
|
1250
|
+
rb_define_method(rb_cLLaMAModel, "score", RUBY_METHOD_FUNC(_llama_model_get_score), 1);
|
1251
|
+
rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_type), 1);
|
1252
|
+
rb_define_method(rb_cLLaMAModel, "token_bos", RUBY_METHOD_FUNC(_llama_model_token_bos), 0);
|
1253
|
+
rb_define_method(rb_cLLaMAModel, "token_eos", RUBY_METHOD_FUNC(_llama_model_token_eos), 0);
|
1254
|
+
rb_define_method(rb_cLLaMAModel, "token_nl", RUBY_METHOD_FUNC(_llama_model_token_nl), 0);
|
1255
|
+
rb_define_method(rb_cLLaMAModel, "token_prefix", RUBY_METHOD_FUNC(_llama_model_token_prefix), 0);
|
1256
|
+
rb_define_method(rb_cLLaMAModel, "token_middle", RUBY_METHOD_FUNC(_llama_model_token_middle), 0);
|
1257
|
+
rb_define_method(rb_cLLaMAModel, "token_suffix", RUBY_METHOD_FUNC(_llama_model_token_suffix), 0);
|
1258
|
+
rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
|
1151
1259
|
}
|
1152
1260
|
|
1153
1261
|
private:
|
@@ -1396,6 +1504,62 @@ private:
|
|
1396
1504
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1397
1505
|
return UINT2NUM(llama_model_n_params(ptr->model));
|
1398
1506
|
}
|
1507
|
+
|
1508
|
+
static VALUE _llama_model_get_text(VALUE self, VALUE token_) {
|
1509
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1510
|
+
const llama_token token = NUM2INT(token_);
|
1511
|
+
const char* text = llama_token_get_text(ptr->model, token);
|
1512
|
+
return rb_utf8_str_new_cstr(text);
|
1513
|
+
}
|
1514
|
+
|
1515
|
+
static VALUE _llama_model_get_score(VALUE self, VALUE token_) {
|
1516
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1517
|
+
const llama_token token = NUM2INT(token_);
|
1518
|
+
const float score = llama_token_get_score(ptr->model, token);
|
1519
|
+
return DBL2NUM(score);
|
1520
|
+
}
|
1521
|
+
|
1522
|
+
static VALUE _llama_model_get_type(VALUE self, VALUE token_) {
|
1523
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1524
|
+
const llama_token token = NUM2INT(token_);
|
1525
|
+
const int type = llama_token_get_type(ptr->model, token);
|
1526
|
+
return INT2NUM(type);
|
1527
|
+
}
|
1528
|
+
|
1529
|
+
static VALUE _llama_model_token_bos(VALUE self) {
|
1530
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1531
|
+
return INT2NUM(llama_token_bos(ptr->model));
|
1532
|
+
}
|
1533
|
+
|
1534
|
+
static VALUE _llama_model_token_eos(VALUE self) {
|
1535
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1536
|
+
return INT2NUM(llama_token_eos(ptr->model));
|
1537
|
+
}
|
1538
|
+
|
1539
|
+
static VALUE _llama_model_token_nl(VALUE self) {
|
1540
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1541
|
+
return INT2NUM(llama_token_nl(ptr->model));
|
1542
|
+
}
|
1543
|
+
|
1544
|
+
static VALUE _llama_model_token_prefix(VALUE self) {
|
1545
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1546
|
+
return INT2NUM(llama_token_prefix(ptr->model));
|
1547
|
+
}
|
1548
|
+
|
1549
|
+
static VALUE _llama_model_token_middle(VALUE self) {
|
1550
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1551
|
+
return INT2NUM(llama_token_middle(ptr->model));
|
1552
|
+
}
|
1553
|
+
|
1554
|
+
static VALUE _llama_model_token_suffix(VALUE self) {
|
1555
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1556
|
+
return INT2NUM(llama_token_suffix(ptr->model));
|
1557
|
+
}
|
1558
|
+
|
1559
|
+
static VALUE _llama_model_token_eot(VALUE self) {
|
1560
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1561
|
+
return INT2NUM(llama_token_eot(ptr->model));
|
1562
|
+
}
|
1399
1563
|
};
|
1400
1564
|
|
1401
1565
|
const rb_data_type_t RbLLaMAModel::llama_model_type = {
|
@@ -1670,22 +1834,12 @@ public:
|
|
1670
1834
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
1671
1835
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
1672
1836
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
1673
|
-
rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
|
1674
|
-
rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
|
1675
|
-
rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
|
1676
|
-
rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
|
1677
|
-
rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
|
1678
|
-
rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
|
1679
|
-
rb_define_method(rb_cLLaMAContext, "token_prefix", RUBY_METHOD_FUNC(_llama_context_token_prefix), 0);
|
1680
|
-
rb_define_method(rb_cLLaMAContext, "token_middle", RUBY_METHOD_FUNC(_llama_context_token_middle), 0);
|
1681
|
-
rb_define_method(rb_cLLaMAContext, "token_suffix", RUBY_METHOD_FUNC(_llama_context_token_suffix), 0);
|
1682
|
-
rb_define_method(rb_cLLaMAContext, "token_eot", RUBY_METHOD_FUNC(_llama_context_token_eot), 0);
|
1683
1837
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
1684
1838
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
1685
1839
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
1686
1840
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
1687
1841
|
rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
|
1688
|
-
rb_define_method(rb_cLLaMAContext, "
|
1842
|
+
rb_define_method(rb_cLLaMAContext, "kv_cache_clear", RUBY_METHOD_FUNC(_llama_context_kv_cache_clear), 0);
|
1689
1843
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_rm", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_rm), 3);
|
1690
1844
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_cp", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_cp), 4);
|
1691
1845
|
rb_define_method(rb_cLLaMAContext, "kv_cache_seq_keep", RUBY_METHOD_FUNC(_llama_context_kv_cache_seq_keep), 1);
|
@@ -1693,12 +1847,12 @@ public:
|
|
1693
1847
|
rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
|
1694
1848
|
rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
|
1695
1849
|
rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
|
1696
|
-
rb_define_method(rb_cLLaMAContext, "
|
1697
|
-
rb_define_method(rb_cLLaMAContext, "sample_frequency_and_presence_penalties", RUBY_METHOD_FUNC(_llama_context_sample_frequency_and_presence_penalties), -1);
|
1850
|
+
rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
|
1698
1851
|
rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
|
1699
1852
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
1700
1853
|
rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
|
1701
1854
|
rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
|
1855
|
+
rb_define_method(rb_cLLaMAContext, "sample_min_p", RUBY_METHOD_FUNC(_llama_context_sample_min_p), -1);
|
1702
1856
|
rb_define_method(rb_cLLaMAContext, "sample_tail_free", RUBY_METHOD_FUNC(_llama_context_sample_tail_free), -1);
|
1703
1857
|
rb_define_method(rb_cLLaMAContext, "sample_typical", RUBY_METHOD_FUNC(_llama_context_sample_typical), -1);
|
1704
1858
|
rb_define_method(rb_cLLaMAContext, "sample_temp", RUBY_METHOD_FUNC(_llama_context_sample_temp), -1);
|
@@ -1927,102 +2081,6 @@ private:
|
|
1927
2081
|
return output;
|
1928
2082
|
}
|
1929
2083
|
|
1930
|
-
static VALUE _llama_context_text(VALUE self, VALUE token_) {
|
1931
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1932
|
-
if (ptr->ctx == NULL) {
|
1933
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1934
|
-
return Qnil;
|
1935
|
-
}
|
1936
|
-
const llama_token token = NUM2INT(token_);
|
1937
|
-
const char* text = llama_token_get_text(ptr->ctx, token);
|
1938
|
-
return rb_utf8_str_new_cstr(text);
|
1939
|
-
}
|
1940
|
-
|
1941
|
-
static VALUE _llama_context_score(VALUE self, VALUE token_) {
|
1942
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1943
|
-
if (ptr->ctx == NULL) {
|
1944
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1945
|
-
return Qnil;
|
1946
|
-
}
|
1947
|
-
const llama_token token = NUM2INT(token_);
|
1948
|
-
const float score = llama_token_get_score(ptr->ctx, token);
|
1949
|
-
return DBL2NUM(score);
|
1950
|
-
}
|
1951
|
-
|
1952
|
-
static VALUE _llama_context_type(VALUE self, VALUE token_) {
|
1953
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1954
|
-
if (ptr->ctx == NULL) {
|
1955
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1956
|
-
return Qnil;
|
1957
|
-
}
|
1958
|
-
const llama_token token = NUM2INT(token_);
|
1959
|
-
const int type = llama_token_get_type(ptr->ctx, token);
|
1960
|
-
return INT2NUM(type);
|
1961
|
-
}
|
1962
|
-
|
1963
|
-
static VALUE _llama_context_token_bos(VALUE self) {
|
1964
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1965
|
-
if (ptr->ctx == NULL) {
|
1966
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1967
|
-
return Qnil;
|
1968
|
-
}
|
1969
|
-
return INT2NUM(llama_token_bos(ptr->ctx));
|
1970
|
-
}
|
1971
|
-
|
1972
|
-
static VALUE _llama_context_token_eos(VALUE self) {
|
1973
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1974
|
-
if (ptr->ctx == NULL) {
|
1975
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1976
|
-
return Qnil;
|
1977
|
-
}
|
1978
|
-
return INT2NUM(llama_token_eos(ptr->ctx));
|
1979
|
-
}
|
1980
|
-
|
1981
|
-
static VALUE _llama_context_token_nl(VALUE self) {
|
1982
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1983
|
-
if (ptr->ctx == NULL) {
|
1984
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1985
|
-
return Qnil;
|
1986
|
-
}
|
1987
|
-
return INT2NUM(llama_token_nl(ptr->ctx));
|
1988
|
-
}
|
1989
|
-
|
1990
|
-
static VALUE _llama_context_token_prefix(VALUE self) {
|
1991
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1992
|
-
if (ptr->ctx == NULL) {
|
1993
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1994
|
-
return Qnil;
|
1995
|
-
}
|
1996
|
-
return INT2NUM(llama_token_prefix(ptr->ctx));
|
1997
|
-
}
|
1998
|
-
|
1999
|
-
static VALUE _llama_context_token_middle(VALUE self) {
|
2000
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2001
|
-
if (ptr->ctx == NULL) {
|
2002
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2003
|
-
return Qnil;
|
2004
|
-
}
|
2005
|
-
return INT2NUM(llama_token_middle(ptr->ctx));
|
2006
|
-
}
|
2007
|
-
|
2008
|
-
static VALUE _llama_context_token_suffix(VALUE self) {
|
2009
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2010
|
-
if (ptr->ctx == NULL) {
|
2011
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2012
|
-
return Qnil;
|
2013
|
-
}
|
2014
|
-
return INT2NUM(llama_token_suffix(ptr->ctx));
|
2015
|
-
}
|
2016
|
-
|
2017
|
-
static VALUE _llama_context_token_eot(VALUE self) {
|
2018
|
-
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2019
|
-
if (ptr->ctx == NULL) {
|
2020
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2021
|
-
return Qnil;
|
2022
|
-
}
|
2023
|
-
return INT2NUM(llama_token_eot(ptr->ctx));
|
2024
|
-
}
|
2025
|
-
|
2026
2084
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
2027
2085
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2028
2086
|
if (ptr->ctx == NULL) {
|
@@ -2073,13 +2131,13 @@ private:
|
|
2073
2131
|
return INT2NUM(llama_get_kv_cache_token_count(ptr->ctx));
|
2074
2132
|
}
|
2075
2133
|
|
2076
|
-
static VALUE
|
2134
|
+
static VALUE _llama_context_kv_cache_clear(VALUE self) {
|
2077
2135
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2078
2136
|
if (ptr->ctx == NULL) {
|
2079
2137
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2080
2138
|
return Qnil;
|
2081
2139
|
}
|
2082
|
-
|
2140
|
+
llama_kv_cache_clear(ptr->ctx);
|
2083
2141
|
return Qnil;
|
2084
2142
|
}
|
2085
2143
|
|
@@ -2231,14 +2289,14 @@ private:
|
|
2231
2289
|
return Qnil;
|
2232
2290
|
}
|
2233
2291
|
|
2234
|
-
static VALUE
|
2292
|
+
static VALUE _llama_context_sample_repetition_penalties(int argc, VALUE* argv, VALUE self) {
|
2235
2293
|
VALUE kw_args = Qnil;
|
2236
|
-
ID kw_table[
|
2237
|
-
VALUE kw_values[
|
2294
|
+
ID kw_table[3] = { rb_intern("penalty_repeat"), rb_intern("penalty_freq"), rb_intern("penalty_present") };
|
2295
|
+
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
2238
2296
|
VALUE candidates = Qnil;
|
2239
2297
|
VALUE last_n_tokens = Qnil;
|
2240
2298
|
rb_scan_args(argc, argv, "2:", &candidates, &last_n_tokens, &kw_args);
|
2241
|
-
rb_get_kwargs(kw_args, kw_table,
|
2299
|
+
rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
|
2242
2300
|
|
2243
2301
|
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
2244
2302
|
rb_raise(rb_eArgError, "candidates must be a TokenDataArray");
|
@@ -2249,56 +2307,15 @@ private:
|
|
2249
2307
|
return Qnil;
|
2250
2308
|
}
|
2251
2309
|
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
2252
|
-
rb_raise(rb_eArgError, "
|
2253
|
-
return Qnil;
|
2254
|
-
}
|
2255
|
-
|
2256
|
-
const size_t last_tokens_size = RARRAY_LEN(last_n_tokens);
|
2257
|
-
std::vector<llama_token> last_n_tokens_data(last_tokens_size);
|
2258
|
-
for (size_t i = 0; i < last_tokens_size; i++) {
|
2259
|
-
last_n_tokens_data[i] = NUM2INT(rb_ary_entry(last_n_tokens, i));
|
2260
|
-
}
|
2261
|
-
|
2262
|
-
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
2263
|
-
if (ctx_ptr->ctx == NULL) {
|
2264
|
-
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2310
|
+
rb_raise(rb_eArgError, "penalty_repeat must be a float");
|
2265
2311
|
return Qnil;
|
2266
2312
|
}
|
2267
|
-
|
2268
|
-
|
2269
|
-
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
2270
|
-
return Qnil;
|
2271
|
-
}
|
2272
|
-
const float penalty = NUM2DBL(kw_values[0]);
|
2273
|
-
|
2274
|
-
llama_sample_repetition_penalty(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size, penalty);
|
2275
|
-
|
2276
|
-
return Qnil;
|
2277
|
-
}
|
2278
|
-
|
2279
|
-
static VALUE _llama_context_sample_frequency_and_presence_penalties(int argc, VALUE* argv, VALUE self) {
|
2280
|
-
VALUE kw_args = Qnil;
|
2281
|
-
ID kw_table[2] = { rb_intern("frequency"), rb_intern("presence") };
|
2282
|
-
VALUE kw_values[2] = { Qundef, Qundef };
|
2283
|
-
VALUE candidates = Qnil;
|
2284
|
-
VALUE last_n_tokens = Qnil;
|
2285
|
-
rb_scan_args(argc, argv, "2:", &candidates, &last_n_tokens, &kw_args);
|
2286
|
-
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
2287
|
-
|
2288
|
-
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
2289
|
-
rb_raise(rb_eArgError, "candidates must be a TokenDataArray");
|
2290
|
-
return Qnil;
|
2291
|
-
}
|
2292
|
-
if (!RB_TYPE_P(last_n_tokens, T_ARRAY)) {
|
2293
|
-
rb_raise(rb_eArgError, "last_n_tokens must be an Array");
|
2294
|
-
return Qnil;
|
2295
|
-
}
|
2296
|
-
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
2297
|
-
rb_raise(rb_eArgError, "frequency must be a float");
|
2313
|
+
if (!RB_FLOAT_TYPE_P(kw_values[1])) {
|
2314
|
+
rb_raise(rb_eArgError, "penalty_freq must be a float");
|
2298
2315
|
return Qnil;
|
2299
2316
|
}
|
2300
|
-
if (!RB_FLOAT_TYPE_P(kw_values[
|
2301
|
-
rb_raise(rb_eArgError, "
|
2317
|
+
if (!RB_FLOAT_TYPE_P(kw_values[2])) {
|
2318
|
+
rb_raise(rb_eArgError, "penalty_present must be a float");
|
2302
2319
|
return Qnil;
|
2303
2320
|
}
|
2304
2321
|
|
@@ -2318,11 +2335,12 @@ private:
|
|
2318
2335
|
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
2319
2336
|
return Qnil;
|
2320
2337
|
}
|
2338
|
+
const float penalty_repeat = NUM2DBL(kw_values[0]);
|
2339
|
+
const float penalty_freq = NUM2DBL(kw_values[1]);
|
2340
|
+
const float penalty_present = NUM2DBL(kw_values[2]);
|
2321
2341
|
|
2322
|
-
|
2323
|
-
|
2324
|
-
|
2325
|
-
llama_sample_frequency_and_presence_penalties(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size, alpha_frequency, alpha_presence);
|
2342
|
+
llama_sample_repetition_penalties(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size,
|
2343
|
+
penalty_repeat, penalty_freq, penalty_present);
|
2326
2344
|
|
2327
2345
|
return Qnil;
|
2328
2346
|
}
|
@@ -2467,6 +2485,45 @@ private:
|
|
2467
2485
|
return Qnil;
|
2468
2486
|
}
|
2469
2487
|
|
2488
|
+
static VALUE _llama_context_sample_min_p(int argc, VALUE* argv, VALUE self) {
|
2489
|
+
VALUE kw_args = Qnil;
|
2490
|
+
ID kw_table[2] = { rb_intern("prob"), rb_intern("min_keep") };
|
2491
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
2492
|
+
VALUE candidates = Qnil;
|
2493
|
+
rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
|
2494
|
+
rb_get_kwargs(kw_args, kw_table, 1, 1, kw_values);
|
2495
|
+
|
2496
|
+
if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
|
2497
|
+
rb_raise(rb_eArgError, "1st argument must be a TokenDataArray");
|
2498
|
+
return Qnil;
|
2499
|
+
}
|
2500
|
+
if (!RB_FLOAT_TYPE_P(kw_values[0])) {
|
2501
|
+
rb_raise(rb_eArgError, "prob must be a float");
|
2502
|
+
return Qnil;
|
2503
|
+
}
|
2504
|
+
if (kw_values[1] != Qundef && !RB_INTEGER_TYPE_P(kw_values[1])) {
|
2505
|
+
rb_raise(rb_eArgError, "min_keep must be an integer");
|
2506
|
+
return Qnil;
|
2507
|
+
}
|
2508
|
+
|
2509
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
2510
|
+
if (ctx_ptr->ctx == NULL) {
|
2511
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2512
|
+
return Qnil;
|
2513
|
+
}
|
2514
|
+
LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
|
2515
|
+
if (cnd_ptr->array.data == nullptr) {
|
2516
|
+
rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
|
2517
|
+
return Qnil;
|
2518
|
+
}
|
2519
|
+
const float prob = NUM2DBL(kw_values[0]);
|
2520
|
+
const size_t min_keep = kw_values[1] != Qundef ? NUM2SIZET(kw_values[1]) : 1;
|
2521
|
+
|
2522
|
+
llama_sample_min_p(ctx_ptr->ctx, &(cnd_ptr->array), prob, min_keep);
|
2523
|
+
|
2524
|
+
return Qnil;
|
2525
|
+
}
|
2526
|
+
|
2470
2527
|
static VALUE _llama_context_sample_tail_free(int argc, VALUE* argv, VALUE self) {
|
2471
2528
|
VALUE kw_args = Qnil;
|
2472
2529
|
ID kw_table[2] = { rb_intern("z"), rb_intern("min_keep") };
|
@@ -2962,6 +3019,12 @@ extern "C" void Init_llama_cpp(void) {
|
|
2962
3019
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_RNG_UPPER", INT2NUM(LLAMA_GRETYPE_CHAR_RNG_UPPER));
|
2963
3020
|
rb_define_const(rb_mLLaMACpp, "LLAMA_GRETYPE_CHAR_ALT", INT2NUM(LLAMA_GRETYPE_CHAR_ALT));
|
2964
3021
|
|
3022
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_UNSPECIFIED", INT2NUM(LLAMA_ROPE_SCALING_UNSPECIFIED));
|
3023
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_NONE", INT2NUM(LLAMA_ROPE_SCALING_NONE));
|
3024
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_LINEAR", INT2NUM(LLAMA_ROPE_SCALING_LINEAR));
|
3025
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_YARN", INT2NUM(LLAMA_ROPE_SCALING_YARN));
|
3026
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ROPE_SCALING_MAX_VALUE", INT2NUM(LLAMA_ROPE_SCALING_MAX_VALUE));
|
3027
|
+
|
2965
3028
|
std::stringstream ss_magic;
|
2966
3029
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
2967
3030
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|