llama_cpp 0.17.1 → 0.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/llama_cpp/llama_cpp.cpp +114 -5
- data/ext/llama_cpp/llama_cpp.h +1 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +13 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3da0d7b5c81ad7e21d2761f4d78fd8f892abea918a05c4e37a1a0e7d84f65a5
|
4
|
+
data.tar.gz: 5bc2d81ecf2c722084ee6cb44aab2a851283962780b5a963004c4ff4e4a85051
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 119188683fdb32b0dce2664038b1fe05a7e4e75df64f2316e50dc19706ee300ac90b59e3a5cce33995fc5d8511f5b3bb8bdf918ce9da51d0d0a81fbde2f6bc58
|
7
|
+
data.tar.gz: c2699dbcefaf135ee8e0520014bba3c6f671569c617c7ff69bb3aff057d16aeff330fd58750c8c9815c7def686bd6e2876c760944f2ae8045042f203056e5cdb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## [[0.17.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.1...v0.17.2)] - 2024-07-14
|
2
|
+
|
3
|
+
- Change supported llama.cpp version to b3358.
|
4
|
+
- Add vocabulary pre-tokenization type constants.
|
5
|
+
- Add attention type constants.
|
6
|
+
- Add `attention_type` accessor to `ContextParams`.
|
7
|
+
- Add `lstrip` and `special` keyword arguments to `token_to_piece` method in `Model`.
|
8
|
+
- Add `has_encoder?`, `decoder_start_token`, and `detokenize` methods to `Model`.
|
9
|
+
- Add `encode` method to `Context`.
|
10
|
+
|
1
11
|
## [[0.17.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.0...v0.17.1)] - 2024-07-06
|
2
12
|
|
3
13
|
- Update usage section on README.
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -978,6 +978,8 @@ public:
|
|
978
978
|
rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
|
979
979
|
rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
|
980
980
|
rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
|
981
|
+
rb_define_method(rb_cLLaMAContextParams, "attention_type=", RUBY_METHOD_FUNC(_llama_context_params_set_attention_type), 1);
|
982
|
+
rb_define_method(rb_cLLaMAContextParams, "attention_type", RUBY_METHOD_FUNC(_llama_context_params_get_attention_type), 0);
|
981
983
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
|
982
984
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
|
983
985
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
|
@@ -1129,6 +1131,18 @@ private:
|
|
1129
1131
|
return INT2NUM(ptr->params.pooling_type);
|
1130
1132
|
}
|
1131
1133
|
|
1134
|
+
// attention_type
|
1135
|
+
static VALUE _llama_context_params_set_attention_type(VALUE self, VALUE scaling_type) {
|
1136
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1137
|
+
ptr->params.attention_type = static_cast<enum llama_attention_type>(NUM2INT(scaling_type));
|
1138
|
+
return INT2NUM(ptr->params.attention_type);
|
1139
|
+
}
|
1140
|
+
|
1141
|
+
static VALUE _llama_context_params_get_attention_type(VALUE self) {
|
1142
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1143
|
+
return INT2NUM(ptr->params.attention_type);
|
1144
|
+
}
|
1145
|
+
|
1132
1146
|
// rope_freq_base
|
1133
1147
|
static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
|
1134
1148
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -1516,7 +1530,7 @@ public:
|
|
1516
1530
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
1517
1531
|
rb_define_method(rb_cLLaMAModel, "n_layer", RUBY_METHOD_FUNC(_llama_model_get_model_n_layer), 0);
|
1518
1532
|
rb_define_method(rb_cLLaMAModel, "rope_freq_scale_train", RUBY_METHOD_FUNC(_llama_model_rope_freq_scale_train), 0);
|
1519
|
-
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), 1);
|
1533
|
+
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), -1);
|
1520
1534
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize), -1);
|
1521
1535
|
rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
|
1522
1536
|
rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
|
@@ -1538,6 +1552,9 @@ public:
|
|
1538
1552
|
rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
|
1539
1553
|
rb_define_method(rb_cLLaMAModel, "token_is_eog?", RUBY_METHOD_FUNC(_llama_model_token_is_eog), 1);
|
1540
1554
|
rb_define_method(rb_cLLaMAModel, "token_is_control?", RUBY_METHOD_FUNC(_llama_model_token_is_control), 1);
|
1555
|
+
rb_define_method(rb_cLLaMAModel, "has_encoder?", RUBY_METHOD_FUNC(_llama_model_has_encoder), 0);
|
1556
|
+
rb_define_method(rb_cLLaMAModel, "decoder_start_token", RUBY_METHOD_FUNC(_llama_model_decoder_start_token), 0);
|
1557
|
+
rb_define_method(rb_cLLaMAModel, "detokenize", RUBY_METHOD_FUNC(_llama_model_detokenize), -1);
|
1541
1558
|
}
|
1542
1559
|
|
1543
1560
|
private:
|
@@ -1677,18 +1694,33 @@ private:
|
|
1677
1694
|
return DBL2NUM(llama_rope_freq_scale_train(ptr->model));
|
1678
1695
|
}
|
1679
1696
|
|
1680
|
-
static VALUE _llama_model_token_to_piece(VALUE
|
1697
|
+
static VALUE _llama_model_token_to_piece(int argc, VALUE* argv, VALUE self) {
|
1698
|
+
VALUE kw_args = Qnil;
|
1699
|
+
ID kw_table[2] = { rb_intern("lstrip"), rb_intern("special") };
|
1700
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
1701
|
+
VALUE token_ = Qnil;
|
1702
|
+
rb_scan_args(argc, argv, "1:", &token_, &kw_args);
|
1703
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
1704
|
+
|
1681
1705
|
if (!RB_INTEGER_TYPE_P(token_)) {
|
1682
1706
|
rb_raise(rb_eArgError, "token must be an integer");
|
1683
1707
|
return Qnil;
|
1684
1708
|
}
|
1709
|
+
if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
|
1710
|
+
rb_raise(rb_eArgError, "lstrip must be an integer");
|
1711
|
+
return Qnil;
|
1712
|
+
}
|
1713
|
+
|
1685
1714
|
const llama_token token = NUM2INT(token_);
|
1715
|
+
const int32_t lstrip = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 0;
|
1716
|
+
const bool special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
1717
|
+
|
1686
1718
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1687
1719
|
std::vector<char> result(8, 0);
|
1688
|
-
const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(),
|
1720
|
+
const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
|
1689
1721
|
if (n_tokens < 0) {
|
1690
1722
|
result.resize(-n_tokens);
|
1691
|
-
const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(),
|
1723
|
+
const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
|
1692
1724
|
if (check != -n_tokens) {
|
1693
1725
|
rb_raise(rb_eRuntimeError, "failed to convert");
|
1694
1726
|
return Qnil;
|
@@ -1865,6 +1897,58 @@ private:
|
|
1865
1897
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1866
1898
|
return llama_token_is_control(ptr->model, token) ? Qtrue : Qfalse;
|
1867
1899
|
}
|
1900
|
+
|
1901
|
+
static VALUE _llama_model_has_encoder(VALUE self) {
|
1902
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1903
|
+
return llama_model_has_encoder(ptr->model) ? Qtrue : Qfalse;
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
static VALUE _llama_model_decoder_start_token(VALUE self) {
|
1907
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1908
|
+
return INT2NUM(llama_model_decoder_start_token(ptr->model));
|
1909
|
+
}
|
1910
|
+
|
1911
|
+
static VALUE _llama_model_detokenize(int argc, VALUE* argv, VALUE self) {
|
1912
|
+
VALUE kw_args = Qnil;
|
1913
|
+
ID kw_table[2] = { rb_intern("remove_special"), rb_intern("unparse_special") };
|
1914
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
1915
|
+
VALUE tokens_ = Qnil;
|
1916
|
+
rb_scan_args(argc, argv, "1:", &tokens_, &kw_args);
|
1917
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
1918
|
+
|
1919
|
+
if (!RB_TYPE_P(tokens_, T_ARRAY)) {
|
1920
|
+
rb_raise(rb_eArgError, "tokens must be an array");
|
1921
|
+
return Qnil;
|
1922
|
+
}
|
1923
|
+
|
1924
|
+
const int32_t n_tokens = RARRAY_LEN(tokens_);
|
1925
|
+
llama_token* tokens = ALLOCA_N(llama_token, n_tokens);
|
1926
|
+
for (int32_t i = 0; i < n_tokens; i++) {
|
1927
|
+
tokens[i] = NUM2INT(rb_ary_entry(tokens_, i));
|
1928
|
+
}
|
1929
|
+
|
1930
|
+
std::string text;
|
1931
|
+
text.resize(std::max(text.capacity(), static_cast<unsigned long>(n_tokens)));
|
1932
|
+
const int32_t text_len_max = text.size();
|
1933
|
+
|
1934
|
+
bool remove_special = kw_values[0] != Qundef ? RTEST(kw_values[0]) : false;
|
1935
|
+
bool unparse_special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
1936
|
+
|
1937
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1938
|
+
std::string result;
|
1939
|
+
int32_t n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
|
1940
|
+
if (n_chars < 0) {
|
1941
|
+
text.resize(-n_chars);
|
1942
|
+
n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
|
1943
|
+
if (n_chars <= text.size()) {
|
1944
|
+
rb_raise(rb_eRuntimeError, "Failed to detokenize");
|
1945
|
+
return Qnil;
|
1946
|
+
}
|
1947
|
+
}
|
1948
|
+
|
1949
|
+
text.resize(n_chars);
|
1950
|
+
return rb_utf8_str_new_cstr(text.c_str());
|
1951
|
+
}
|
1868
1952
|
};
|
1869
1953
|
|
1870
1954
|
const rb_data_type_t RbLLaMAModel::llama_model_type = {
|
@@ -2134,6 +2218,7 @@ public:
|
|
2134
2218
|
rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
|
2135
2219
|
rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
|
2136
2220
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
2221
|
+
rb_define_method(rb_cLLaMAContext, "encode", RUBY_METHOD_FUNC(_llama_context_encode), 1);
|
2137
2222
|
rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
|
2138
2223
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
2139
2224
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
@@ -2228,6 +2313,24 @@ private:
|
|
2228
2313
|
return Qnil;
|
2229
2314
|
}
|
2230
2315
|
|
2316
|
+
static VALUE _llama_context_encode(VALUE self, VALUE batch) {
|
2317
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2318
|
+
if (ptr->ctx == NULL) {
|
2319
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2320
|
+
return Qnil;
|
2321
|
+
}
|
2322
|
+
if (!rb_obj_is_kind_of(batch, rb_cLLaMABatch)) {
|
2323
|
+
rb_raise(rb_eArgError, "batch must be a Batch");
|
2324
|
+
return Qnil;
|
2325
|
+
}
|
2326
|
+
LLaMABatchWrapper* batch_ptr = RbLLaMABatch::get_llama_batch(batch);
|
2327
|
+
if (llama_encode(ptr->ctx, batch_ptr->batch) < 0) {
|
2328
|
+
rb_raise(rb_eRuntimeError, "Failed to encode");
|
2329
|
+
return Qnil;
|
2330
|
+
}
|
2331
|
+
return Qnil;
|
2332
|
+
}
|
2333
|
+
|
2231
2334
|
static VALUE _llama_context_decode(VALUE self, VALUE batch) {
|
2232
2335
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2233
2336
|
if (ptr->ctx == NULL) {
|
@@ -2774,7 +2877,7 @@ private:
|
|
2774
2877
|
ID kw_table[3] = { rb_intern("logits"), rb_intern("logits_guidance"), rb_intern("scale") };
|
2775
2878
|
VALUE kw_values[3] = { Qundef, Qundef, Qundef };
|
2776
2879
|
rb_scan_args(argc, argv, ":", &kw_args);
|
2777
|
-
rb_get_kwargs(kw_args, kw_table,
|
2880
|
+
rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
|
2778
2881
|
|
2779
2882
|
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
2780
2883
|
rb_raise(rb_eArgError, "logits must be an Array");
|
@@ -3513,6 +3616,8 @@ extern "C" void Init_llama_cpp(void) {
|
|
3513
3616
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
|
3514
3617
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
|
3515
3618
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
|
3619
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM3));
|
3620
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM4", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM4));
|
3516
3621
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
|
3517
3622
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_JAIS", INT2NUM(LLAMA_VOCAB_PRE_TYPE_JAIS));
|
3518
3623
|
|
@@ -3594,6 +3699,10 @@ extern "C" void Init_llama_cpp(void) {
|
|
3594
3699
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
|
3595
3700
|
rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
|
3596
3701
|
|
3702
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
|
3703
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
|
3704
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
|
3705
|
+
|
3597
3706
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
|
3598
3707
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
|
3599
3708
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
|
data/ext/llama_cpp/llama_cpp.h
CHANGED
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.17.
|
6
|
+
VERSION = '0.17.2'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b3358'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -33,6 +33,8 @@ module LLaMACpp
|
|
33
33
|
LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
|
34
34
|
LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
|
35
35
|
LLAMA_VOCAB_PRE_TYPE_PORO: Integer
|
36
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
|
37
|
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
|
36
38
|
LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
|
37
39
|
LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
|
38
40
|
|
@@ -104,6 +106,10 @@ module LLaMACpp
|
|
104
106
|
LLAMA_POOLING_TYPE_CLS: Integer
|
105
107
|
LLAMA_POOLING_TYPE_LAST: Integer
|
106
108
|
|
109
|
+
LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
|
110
|
+
LLAMA_ATTENTION_TYPE_CAUSAL: Integer
|
111
|
+
LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
|
112
|
+
|
107
113
|
LLAMA_SPLIT_MODE_NONE: Integer
|
108
114
|
LLAMA_SPLIT_MODE_LAYER: Integer
|
109
115
|
LLAMA_SPLIT_MODE_ROW: Integer
|
@@ -158,7 +164,7 @@ module LLaMACpp
|
|
158
164
|
def n_embd: () -> Integer
|
159
165
|
def n_layer: () -> Integer
|
160
166
|
def rope_freq_scale_train: () -> Float
|
161
|
-
def token_to_piece: (Integer) -> String
|
167
|
+
def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
|
162
168
|
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
|
163
169
|
def desc: () -> String
|
164
170
|
def size: () -> Integer
|
@@ -180,6 +186,9 @@ module LLaMACpp
|
|
180
186
|
def token_eot: () -> Integer
|
181
187
|
def token_is_eog?: (Integer) -> bool
|
182
188
|
def token_is_control?: (Integer) -> bool
|
189
|
+
def has_encoder?: () -> bool
|
190
|
+
def decoder_start_token: () -> Integer
|
191
|
+
def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
|
183
192
|
end
|
184
193
|
|
185
194
|
class Timings
|
@@ -261,6 +270,7 @@ module LLaMACpp
|
|
261
270
|
def embeddings: () -> Array[Float]
|
262
271
|
def embeddings_ith: (Integer) -> Array[Float]
|
263
272
|
def embeddings_seq: (Integer) -> Array[Float]
|
273
|
+
def encode: (::LLaMACpp::Batch) -> void
|
264
274
|
def decode: (::LLaMACpp::Batch) -> void
|
265
275
|
def logits: () -> Array[Float]
|
266
276
|
def set_embeddings: (bool) -> void
|
@@ -330,6 +340,8 @@ module LLaMACpp
|
|
330
340
|
def rope_scaling_type: () -> Integer
|
331
341
|
def pooling_type=: (Integer) -> Integer
|
332
342
|
def pooling_type: () -> Integer
|
343
|
+
def attention_type=: (Integer) -> Integer
|
344
|
+
def attention_type: () -> Integer
|
333
345
|
def rope_freq_base=: (Float) -> Float
|
334
346
|
def rope_freq_base: () -> Float
|
335
347
|
def rope_freq_scale=: (Float) -> Float
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|