llama_cpp 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +49 -3
- data/ext/llama_cpp/src/ggml-cuda.cu +122 -72
- data/ext/llama_cpp/src/ggml-metal.m +4 -5
- data/ext/llama_cpp/src/ggml-metal.metal +9 -2
- data/ext/llama_cpp/src/ggml-opencl.cpp +119 -53
- data/ext/llama_cpp/src/ggml.c +755 -320
- data/ext/llama_cpp/src/ggml.h +13 -0
- data/ext/llama_cpp/src/k_quants.c +744 -2
- data/ext/llama_cpp/src/llama.cpp +779 -113
- data/ext/llama_cpp/src/llama.h +22 -6
- data/ext/llama_cpp/src/unicode.h +462 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +5 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 144a7130adb5ac32d31699bce809a6de6c3a6ecf8cfccca36ebdee436c28b645
|
4
|
+
data.tar.gz: d00b2c2db583e6e38d472033c7348f22e9614febdb633c4e454ca49e00d2fec6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c30854fef304e0258250d9285bac8ab3ea014950d1638e88682029763a3e90eae36da1b3757b2441ff5a7a798401ee1e731bcfc014e7e651811726d7afea224
|
7
|
+
data.tar.gz: 10ea5bb5bf5d85a7e7030b514e2eb38650e9ce8a97ab339f63538b637d3c85293b406fea66c055a00f919c457a9a2af5c8f5710d0d31d702fe7e6f703b52933d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## [[0.7.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.6.0...v0.7.0)] - 2023-10-07
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1292 to b1334.
|
4
|
+
- Refactor `generate` module function.
|
5
|
+
|
6
|
+
**Breaking Changes**
|
7
|
+
- Change to return UTF-8 String on `token_to_piece` and `desc` methods in `Model` and `text` method in `Context`.
|
8
|
+
|
1
9
|
## [[0.6.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.3...v0.6.0)] - 2023-09-30
|
2
10
|
|
3
11
|
**Breaking Changes**
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -1127,6 +1127,7 @@ public:
|
|
1127
1127
|
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
1128
1128
|
rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
|
1129
1129
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
1130
|
+
rb_define_method(rb_cLLaMAModel, "rope_freq_scale_train", RUBY_METHOD_FUNC(_llama_model_rope_freq_scale_train), 0);
|
1130
1131
|
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), 1);
|
1131
1132
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize), -1);
|
1132
1133
|
rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
|
@@ -1288,6 +1289,11 @@ private:
|
|
1288
1289
|
return INT2NUM(llama_n_embd(ptr->model));
|
1289
1290
|
}
|
1290
1291
|
|
1292
|
+
static VALUE _llama_model_rope_freq_scale_train(VALUE self) {
|
1293
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1294
|
+
return DBL2NUM(llama_rope_freq_scale_train(ptr->model));
|
1295
|
+
}
|
1296
|
+
|
1291
1297
|
static VALUE _llama_model_token_to_piece(VALUE self, VALUE token_) {
|
1292
1298
|
if (!RB_INTEGER_TYPE_P(token_)) {
|
1293
1299
|
rb_raise(rb_eArgError, "token must be an integer");
|
@@ -1308,7 +1314,7 @@ private:
|
|
1308
1314
|
result.resize(n_tokens);
|
1309
1315
|
}
|
1310
1316
|
std::string ret(result.data(), result.size());
|
1311
|
-
return
|
1317
|
+
return rb_utf8_str_new_cstr(ret.c_str());
|
1312
1318
|
}
|
1313
1319
|
|
1314
1320
|
static VALUE _llama_model_tokenize(int argc, VALUE* argv, VALUE self) {
|
@@ -1358,7 +1364,7 @@ private:
|
|
1358
1364
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1359
1365
|
char buf[128];
|
1360
1366
|
llama_model_desc(ptr->model, buf, sizeof(buf));
|
1361
|
-
return
|
1367
|
+
return rb_utf8_str_new_cstr(buf);
|
1362
1368
|
}
|
1363
1369
|
|
1364
1370
|
static VALUE _llama_model_get_model_size(VALUE self) {
|
@@ -1650,6 +1656,10 @@ public:
|
|
1650
1656
|
rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
|
1651
1657
|
rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
|
1652
1658
|
rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
|
1659
|
+
rb_define_method(rb_cLLaMAContext, "token_prefix", RUBY_METHOD_FUNC(_llama_context_token_prefix), 0);
|
1660
|
+
rb_define_method(rb_cLLaMAContext, "token_middle", RUBY_METHOD_FUNC(_llama_context_token_middle), 0);
|
1661
|
+
rb_define_method(rb_cLLaMAContext, "token_suffix", RUBY_METHOD_FUNC(_llama_context_token_suffix), 0);
|
1662
|
+
rb_define_method(rb_cLLaMAContext, "token_eot", RUBY_METHOD_FUNC(_llama_context_token_eot), 0);
|
1653
1663
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
1654
1664
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
1655
1665
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
@@ -1905,7 +1915,7 @@ private:
|
|
1905
1915
|
}
|
1906
1916
|
const llama_token token = NUM2INT(token_);
|
1907
1917
|
const char* text = llama_token_get_text(ptr->ctx, token);
|
1908
|
-
return
|
1918
|
+
return rb_utf8_str_new_cstr(text);
|
1909
1919
|
}
|
1910
1920
|
|
1911
1921
|
static VALUE _llama_context_score(VALUE self, VALUE token_) {
|
@@ -1957,6 +1967,42 @@ private:
|
|
1957
1967
|
return INT2NUM(llama_token_nl(ptr->ctx));
|
1958
1968
|
}
|
1959
1969
|
|
1970
|
+
static VALUE _llama_context_token_prefix(VALUE self) {
|
1971
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1972
|
+
if (ptr->ctx == NULL) {
|
1973
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1974
|
+
return Qnil;
|
1975
|
+
}
|
1976
|
+
return INT2NUM(llama_token_prefix(ptr->ctx));
|
1977
|
+
}
|
1978
|
+
|
1979
|
+
static VALUE _llama_context_token_middle(VALUE self) {
|
1980
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1981
|
+
if (ptr->ctx == NULL) {
|
1982
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1983
|
+
return Qnil;
|
1984
|
+
}
|
1985
|
+
return INT2NUM(llama_token_middle(ptr->ctx));
|
1986
|
+
}
|
1987
|
+
|
1988
|
+
static VALUE _llama_context_token_suffix(VALUE self) {
|
1989
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1990
|
+
if (ptr->ctx == NULL) {
|
1991
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1992
|
+
return Qnil;
|
1993
|
+
}
|
1994
|
+
return INT2NUM(llama_token_suffix(ptr->ctx));
|
1995
|
+
}
|
1996
|
+
|
1997
|
+
static VALUE _llama_context_token_eot(VALUE self) {
|
1998
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1999
|
+
if (ptr->ctx == NULL) {
|
2000
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2001
|
+
return Qnil;
|
2002
|
+
}
|
2003
|
+
return INT2NUM(llama_token_eot(ptr->ctx));
|
2004
|
+
}
|
2005
|
+
|
1960
2006
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
1961
2007
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1962
2008
|
if (ptr->ctx == NULL) {
|