llama_cpp 0.17.1 → 0.17.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/llama_cpp/llama_cpp.cpp +114 -5
- data/ext/llama_cpp/llama_cpp.h +1 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +13 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3da0d7b5c81ad7e21d2761f4d78fd8f892abea918a05c4e37a1a0e7d84f65a5
|
4
|
+
data.tar.gz: 5bc2d81ecf2c722084ee6cb44aab2a851283962780b5a963004c4ff4e4a85051
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 119188683fdb32b0dce2664038b1fe05a7e4e75df64f2316e50dc19706ee300ac90b59e3a5cce33995fc5d8511f5b3bb8bdf918ce9da51d0d0a81fbde2f6bc58
|
7
|
+
data.tar.gz: c2699dbcefaf135ee8e0520014bba3c6f671569c617c7ff69bb3aff057d16aeff330fd58750c8c9815c7def686bd6e2876c760944f2ae8045042f203056e5cdb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## [[0.17.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.1...v0.17.2)] - 2024-07-14
|
2
|
+
|
3
|
+
- Change supported llama.cpp version to b3358.
|
4
|
+
- Add vocabulary pre-tokenization type constants.
|
5
|
+
- Add attention type constants.
|
6
|
+
- Add `attention_type` accessor to `ContextParams`.
|
7
|
+
- Add `lstrip` and `special` keyword arguments to `token_to_piece` method in `Model`.
|
8
|
+
- Add `has_encoder?`, `decoder_start_token`, and `detokenize` methods to `Model`.
|
9
|
+
- Add `encode` method to `Context`.
|
10
|
+
|
1
11
|
## [[0.17.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.0...v0.17.1)] - 2024-07-06
|
2
12
|
|
3
13
|
- Update usage section on README.
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -978,6 +978,8 @@ public:
|
|
978
978
|
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
|
979
979
|
rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
|
980
980
|
rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
|
981
|
+
rb_define_method(rb_cLLaMAContextParams, "attention_type=", RUBY_METHOD_FUNC(_llama_context_params_set_attention_type), 1);
|
982
|
+
rb_define_method(rb_cLLaMAContextParams, "attention_type", RUBY_METHOD_FUNC(_llama_context_params_get_attention_type), 0);
|
981
983
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
|
982
984
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
|
983
985
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
|
@@ -1129,6 +1131,18 @@ private:
|
|
1129
1131
|
return INT2NUM(ptr->params.pooling_type);
|
1130
1132
|
}
|
1131
1133
|
|
1134
|
+
// attention_type
|
1135
|
+
static VALUE _llama_context_params_set_attention_type(VALUE self, VALUE scaling_type) {
|
1136
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1137
|
+
ptr->params.attention_type = static_cast<enum llama_attention_type>(NUM2INT(scaling_type));
|
1138
|
+
return INT2NUM(ptr->params.attention_type);
|
1139
|
+
}
|
1140
|
+
|
1141
|
+
static VALUE _llama_context_params_get_attention_type(VALUE self) {
|
1142
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1143
|
+
return INT2NUM(ptr->params.attention_type);
|
1144
|
+
}
|
1145
|
+
|
1132
1146
|
// rope_freq_base
|
1133
1147
|
static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
|
1134
1148
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1516,7 +1530,7 @@ public:
|
|
1516
1530
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
1517
1531
|
rb_define_method(rb_cLLaMAModel, "n_layer", RUBY_METHOD_FUNC(_llama_model_get_model_n_layer), 0);
|
1518
1532
|
rb_define_method(rb_cLLaMAModel, "rope_freq_scale_train", RUBY_METHOD_FUNC(_llama_model_rope_freq_scale_train), 0);
|
1519
|
-
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), 1);
|
1533
|
+
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), -1);
|
1520
1534
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize), -1);
|
1521
1535
|
rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
|
1522
1536
|
rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
|
@@ -1538,6 +1552,9 @@ public:
|
|
1538
1552
|
rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
|
1539
1553
|
rb_define_method(rb_cLLaMAModel, "token_is_eog?", RUBY_METHOD_FUNC(_llama_model_token_is_eog), 1);
|
1540
1554
|
rb_define_method(rb_cLLaMAModel, "token_is_control?", RUBY_METHOD_FUNC(_llama_model_token_is_control), 1);
|
1555
|
+
rb_define_method(rb_cLLaMAModel, "has_encoder?", RUBY_METHOD_FUNC(_llama_model_has_encoder), 0);
|
1556
|
+
rb_define_method(rb_cLLaMAModel, "decoder_start_token", RUBY_METHOD_FUNC(_llama_model_decoder_start_token), 0);
|
1557
|
+
rb_define_method(rb_cLLaMAModel, "detokenize", RUBY_METHOD_FUNC(_llama_model_detokenize), -1);
|
1541
1558
|
}
|
1542
1559
|
|
1543
1560
|
private:
|
@@ -1677,18 +1694,33 @@ private:
|
|
1677
1694
|
return DBL2NUM(llama_rope_freq_scale_train(ptr->model));
|
1678
1695
|
}
|
1679
1696
|
|
1680
|
-
static VALUE _llama_model_token_to_piece(VALUE
|
1697
|
+
static VALUE _llama_model_token_to_piece(int argc, VALUE* argv, VALUE self) {
|
1698
|
+
VALUE kw_args = Qnil;
|
1699
|
+
ID kw_table[2] = { rb_intern("lstrip"), rb_intern("special") };
|
1700
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
1701
|
+
VALUE token_ = Qnil;
|
1702
|
+
rb_scan_args(argc, argv, "1:", &token_, &kw_args);
|
1703
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
1704
|
+
|
1681
1705
|
if (!RB_INTEGER_TYPE_P(token_)) {
|
1682
1706
|
rb_raise(rb_eArgError, "token must be an integer");
|
1683
1707
|
return Qnil;
|
1684
1708
|
}
|
1709
|
+
if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
|
1710
|
+
rb_raise(rb_eArgError, "lstrip must be an integer");
|
1711
|
+
return Qnil;
|
1712
|
+
}
|
1713
|
+
|
1685
1714
|
const llama_token token = NUM2INT(token_);
|
1715
|
+
const int32_t lstrip = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 0;
|
1716
|
+
const bool special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
1717
|
+
|
1686
1718
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1687
1719
|
std::vector<char> result(8, 0);
|
1688
|
-
const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(),
|
1720
|
+
const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
|
1689
1721
|
if (n_tokens < 0) {
|
1690
1722
|
result.resize(-n_tokens);
|
1691
|
-
const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(),
|
1723
|
+
const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
|
1692
1724
|
if (check != -n_tokens) {
|
1693
1725
|
rb_raise(rb_eRuntimeError, "failed to convert");
|
1694
1726
|
return Qnil;
|
@@ -1865,6 +1897,58 @@ private:
|
|
1865
1897
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1866
1898
|
return llama_token_is_control(ptr->model, token) ? Qtrue : Qfalse;
|
1867
1899
|
}
|
1900
|
+
|
1901
|
+
static VALUE _llama_model_has_encoder(VALUE self) {
|
1902
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1903
|
+
return llama_model_has_encoder(ptr->model) ? Qtrue : Qfalse;
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
static VALUE _llama_model_decoder_start_token(VALUE self) {
|
1907
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1908
|
+
return INT2NUM(llama_model_decoder_start_token(ptr->model));
|
1909
|
+
}
|
1910
|
+
|
1911
|
+
static VALUE _llama_model_detokenize(int argc, VALUE* argv, VALUE self) {
|
1912
|
+
VALUE kw_args = Qnil;
|
1913
|
+
ID kw_table[2] = { rb_intern("remove_special"), rb_intern("unparse_special") };
|
1914
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
1915
|
+
VALUE tokens_ = Qnil;
|
1916
|
+
rb_scan_args(argc, argv, "1:", &tokens_, &kw_args);
|
1917
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
1918
|
+
|
1919
|
+
if (!RB_TYPE_P(tokens_, T_ARRAY)) {
|
1920
|
+
rb_raise(rb_eArgError, "tokens must be an array");
|
1921
|
+
return Qnil;
|
1922
|
+
}
|
1923
|
+
|
1924
|
+
const int32_t n_tokens = RARRAY_LEN(tokens_);
|
1925
|
+
llama_token* tokens = ALLOCA_N(llama_token, n_tokens);
|
1926
|
+
for (int32_t i = 0; i < n_tokens; i++) {
|
1927
|
+
tokens[i] = NUM2INT(rb_ary_entry(tokens_, i));
|
1928
|
+
}
|
1929
|
+
|
1930
|
+
std::string text;
|
1931
|
+
text.resize(std::max(text.capacity(), static_cast<unsigned long>(n_tokens)));
|
1932
|
+
const int32_t text_len_max = text.size();
|
1933
|
+
|
1934
|
+
bool remove_special = kw_values[0] != Qundef ? RTEST(kw_values[0]) : false;
|
1935
|
+
bool unparse_special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
1936
|
+
|
1937
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1938
|
+
std::string result;
|
1939
|
+
int32_t n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
|
1940
|
+
if (n_chars < 0) {
|
1941
|
+
text.resize(-n_chars);
|
1942
|
+
n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
|
1943
|
+
if (n_chars <= text.size()) {
|
1944
|
+
rb_raise(rb_eRuntimeError, "Failed to detokenize");
|
1945
|
+
return Qnil;
|
1946
|
+
}
|
1947
|
+
}
|
1948
|
+
|
1949
|
+
text.resize(n_chars);
|
1950
|
+
return rb_utf8_str_new_cstr(text.c_str());
|
1951
|
+
}
|
1868
1952
|
};
|
1869
1953
|
|
1870
1954
|
const rb_data_type_t RbLLaMAModel::llama_model_type = {
|
@@ -2134,6 +2218,7 @@ public:
|
|
2134
2218
|
rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
|
2135
2219
|
rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
|
2136
2220
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
2221
|
+
rb_define_method(rb_cLLaMAContext, "encode", RUBY_METHOD_FUNC(_llama_context_encode), 1);
|
2137
2222
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
2138
2223
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
2139
2224
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
@@ -2228,6 +2313,24 @@ private:
|
|
2228
2313
|
return Qnil;
|
2229
2314
|
}
|
2230
2315
|
|
2316
|
+
static VALUE _llama_context_encode(VALUE self, VALUE batch) {
|
2317
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2318
|
+
if (ptr->ctx == NULL) {
|
2319
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2320
|
+
return Qnil;
|
2321
|
+
}
|
2322
|
+
if (!rb_obj_is_kind_of(batch, rb_cLLaMABatch)) {
|
2323
|
+
rb_raise(rb_eArgError, "batch must be a Batch");
|
2324
|
+
return Qnil;
|
2325
|
+
}
|
2326
|
+
LLaMABatchWrapper* batch_ptr = RbLLaMABatch::get_llama_batch(batch);
|
2327
|
+
if (llama_encode(ptr->ctx, batch_ptr->batch) < 0) {
|
2328
|
+
rb_raise(rb_eRuntimeError, "Failed to encode");
|
2329
|
+
return Qnil;
|
2330
|
+
}
|
2331
|
+
return Qnil;
|
2332
|
+
}
|
2333
|
+
|
2231
2334
|
static VALUE _llama_context_decode(VALUE self, VALUE batch) {
|
2232
2335
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2233
2336
|
if (ptr->ctx == NULL) {
|
@@ -2774,7 +2877,7 @@ private:
|
|
2774
2877
|
ID kw_table[3] = { rb_intern("logits"), rb_intern("logits_guidance"), rb_intern("scale") };
|
2775
2878
|
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
2776
2879
|
rb_scan_args(argc, argv, ":", &kw_args);
|
2777
|
-
rb_get_kwargs(kw_args, kw_table,
|
2880
|
+
rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
|
2778
2881
|
|
2779
2882
|
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
2780
2883
|
rb_raise(rb_eArgError, "logits must be an Array");
|
@@ -3513,6 +3616,8 @@ extern "C" void Init_llama_cpp(void) {
|
|
3513
3616
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
|
3514
3617
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
|
3515
3618
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
|
3619
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM3));
|
3620
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM4", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM4));
|
3516
3621
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
|
3517
3622
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_JAIS", INT2NUM(LLAMA_VOCAB_PRE_TYPE_JAIS));
|
3518
3623
|
|
@@ -3594,6 +3699,10 @@ extern "C" void Init_llama_cpp(void) {
|
|
3594
3699
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
|
3595
3700
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
|
3596
3701
|
|
3702
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
|
3703
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
|
3704
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
|
3705
|
+
|
3597
3706
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
3598
3707
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
3599
3708
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
data/ext/llama_cpp/llama_cpp.h
CHANGED
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.17.
|
6
|
+
VERSION = '0.17.2'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b3358'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -33,6 +33,8 @@ module LLaMACpp
|
|
33
33
|
LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
|
34
34
|
LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
|
35
35
|
LLAMA_VOCAB_PRE_TYPE_PORO: Integer
|
36
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
|
37
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
|
36
38
|
LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
|
37
39
|
LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
|
38
40
|
|
@@ -104,6 +106,10 @@ module LLaMACpp
|
|
104
106
|
LLAMA_POOLING_TYPE_CLS: Integer
|
105
107
|
LLAMA_POOLING_TYPE_LAST: Integer
|
106
108
|
|
109
|
+
LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
|
110
|
+
LLAMA_ATTENTION_TYPE_CAUSAL: Integer
|
111
|
+
LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
|
112
|
+
|
107
113
|
LLAMA_SPLIT_MODE_NONE: Integer
|
108
114
|
LLAMA_SPLIT_MODE_LAYER: Integer
|
109
115
|
LLAMA_SPLIT_MODE_ROW: Integer
|
@@ -158,7 +164,7 @@ module LLaMACpp
|
|
158
164
|
def n_embd: () -> Integer
|
159
165
|
def n_layer: () -> Integer
|
160
166
|
def rope_freq_scale_train: () -> Float
|
161
|
-
def token_to_piece: (Integer) -> String
|
167
|
+
def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
|
162
168
|
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
|
163
169
|
def desc: () -> String
|
164
170
|
def size: () -> Integer
|
@@ -180,6 +186,9 @@ module LLaMACpp
|
|
180
186
|
def token_eot: () -> Integer
|
181
187
|
def token_is_eog?: (Integer) -> bool
|
182
188
|
def token_is_control?: (Integer) -> bool
|
189
|
+
def has_encoder?: () -> bool
|
190
|
+
def decoder_start_token: () -> Integer
|
191
|
+
def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
|
183
192
|
end
|
184
193
|
|
185
194
|
class Timings
|
@@ -261,6 +270,7 @@ module LLaMACpp
|
|
261
270
|
def embeddings: () -> Array[Float]
|
262
271
|
def embeddings_ith: (Integer) -> Array[Float]
|
263
272
|
def embeddings_seq: (Integer) -> Array[Float]
|
273
|
+
def encode: (::LLaMACpp::Batch) -> void
|
264
274
|
def decode: (::LLaMACpp::Batch) -> void
|
265
275
|
def logits: () -> Array[Float]
|
266
276
|
def set_embeddings: (bool) -> void
|
@@ -330,6 +340,8 @@ module LLaMACpp
|
|
330
340
|
def rope_scaling_type: () -> Integer
|
331
341
|
def pooling_type=: (Integer) -> Integer
|
332
342
|
def pooling_type: () -> Integer
|
343
|
+
def attention_type=: (Integer) -> Integer
|
344
|
+
def attention_type: () -> Integer
|
333
345
|
def rope_freq_base=: (Float) -> Float
|
334
346
|
def rope_freq_base: () -> Float
|
335
347
|
def rope_freq_scale=: (Float) -> Float
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|