llama_cpp 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +49 -3
- data/ext/llama_cpp/src/ggml-cuda.cu +122 -72
- data/ext/llama_cpp/src/ggml-metal.m +4 -5
- data/ext/llama_cpp/src/ggml-metal.metal +9 -2
- data/ext/llama_cpp/src/ggml-opencl.cpp +119 -53
- data/ext/llama_cpp/src/ggml.c +755 -320
- data/ext/llama_cpp/src/ggml.h +13 -0
- data/ext/llama_cpp/src/k_quants.c +744 -2
- data/ext/llama_cpp/src/llama.cpp +779 -113
- data/ext/llama_cpp/src/llama.h +22 -6
- data/ext/llama_cpp/src/unicode.h +462 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +5 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 144a7130adb5ac32d31699bce809a6de6c3a6ecf8cfccca36ebdee436c28b645
|
4
|
+
data.tar.gz: d00b2c2db583e6e38d472033c7348f22e9614febdb633c4e454ca49e00d2fec6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2c30854fef304e0258250d9285bac8ab3ea014950d1638e88682029763a3e90eae36da1b3757b2441ff5a7a798401ee1e731bcfc014e7e651811726d7afea224
|
7
|
+
data.tar.gz: 10ea5bb5bf5d85a7e7030b514e2eb38650e9ce8a97ab339f63538b637d3c85293b406fea66c055a00f919c457a9a2af5c8f5710d0d31d702fe7e6f703b52933d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## [[0.7.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.6.0...v0.7.0)] - 2023-10-07
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1292 to b1334.
|
4
|
+
- Refactor `generate` module function.
|
5
|
+
|
6
|
+
**Breaking Changes**
|
7
|
+
- Change to return UTF-8 String on `token_to_piece` and `desc` methods in `Model` and `text` method in `Context`.
|
8
|
+
|
1
9
|
## [[0.6.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.3...v0.6.0)] - 2023-09-30
|
2
10
|
|
3
11
|
**Breaking Changes**
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -1127,6 +1127,7 @@ public:
|
|
1127
1127
|
rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_model_n_vocab), 0);
|
1128
1128
|
rb_define_method(rb_cLLaMAModel, "n_ctx_train", RUBY_METHOD_FUNC(_llama_model_get_model_n_ctx_train), 0);
|
1129
1129
|
rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
|
1130
|
+
rb_define_method(rb_cLLaMAModel, "rope_freq_scale_train", RUBY_METHOD_FUNC(_llama_model_rope_freq_scale_train), 0);
|
1130
1131
|
rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), 1);
|
1131
1132
|
rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize), -1);
|
1132
1133
|
rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
|
@@ -1288,6 +1289,11 @@ private:
|
|
1288
1289
|
return INT2NUM(llama_n_embd(ptr->model));
|
1289
1290
|
}
|
1290
1291
|
|
1292
|
+
static VALUE _llama_model_rope_freq_scale_train(VALUE self) {
|
1293
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1294
|
+
return DBL2NUM(llama_rope_freq_scale_train(ptr->model));
|
1295
|
+
}
|
1296
|
+
|
1291
1297
|
static VALUE _llama_model_token_to_piece(VALUE self, VALUE token_) {
|
1292
1298
|
if (!RB_INTEGER_TYPE_P(token_)) {
|
1293
1299
|
rb_raise(rb_eArgError, "token must be an integer");
|
@@ -1308,7 +1314,7 @@ private:
|
|
1308
1314
|
result.resize(n_tokens);
|
1309
1315
|
}
|
1310
1316
|
std::string ret(result.data(), result.size());
|
1311
|
-
return
|
1317
|
+
return rb_utf8_str_new_cstr(ret.c_str());
|
1312
1318
|
}
|
1313
1319
|
|
1314
1320
|
static VALUE _llama_model_tokenize(int argc, VALUE* argv, VALUE self) {
|
@@ -1358,7 +1364,7 @@ private:
|
|
1358
1364
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1359
1365
|
char buf[128];
|
1360
1366
|
llama_model_desc(ptr->model, buf, sizeof(buf));
|
1361
|
-
return
|
1367
|
+
return rb_utf8_str_new_cstr(buf);
|
1362
1368
|
}
|
1363
1369
|
|
1364
1370
|
static VALUE _llama_model_get_model_size(VALUE self) {
|
@@ -1650,6 +1656,10 @@ public:
|
|
1650
1656
|
rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
|
1651
1657
|
rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
|
1652
1658
|
rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
|
1659
|
+
rb_define_method(rb_cLLaMAContext, "token_prefix", RUBY_METHOD_FUNC(_llama_context_token_prefix), 0);
|
1660
|
+
rb_define_method(rb_cLLaMAContext, "token_middle", RUBY_METHOD_FUNC(_llama_context_token_middle), 0);
|
1661
|
+
rb_define_method(rb_cLLaMAContext, "token_suffix", RUBY_METHOD_FUNC(_llama_context_token_suffix), 0);
|
1662
|
+
rb_define_method(rb_cLLaMAContext, "token_eot", RUBY_METHOD_FUNC(_llama_context_token_eot), 0);
|
1653
1663
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
1654
1664
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
1655
1665
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
@@ -1905,7 +1915,7 @@ private:
|
|
1905
1915
|
}
|
1906
1916
|
const llama_token token = NUM2INT(token_);
|
1907
1917
|
const char* text = llama_token_get_text(ptr->ctx, token);
|
1908
|
-
return
|
1918
|
+
return rb_utf8_str_new_cstr(text);
|
1909
1919
|
}
|
1910
1920
|
|
1911
1921
|
static VALUE _llama_context_score(VALUE self, VALUE token_) {
|
@@ -1957,6 +1967,42 @@ private:
|
|
1957
1967
|
return INT2NUM(llama_token_nl(ptr->ctx));
|
1958
1968
|
}
|
1959
1969
|
|
1970
|
+
static VALUE _llama_context_token_prefix(VALUE self) {
|
1971
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1972
|
+
if (ptr->ctx == NULL) {
|
1973
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1974
|
+
return Qnil;
|
1975
|
+
}
|
1976
|
+
return INT2NUM(llama_token_prefix(ptr->ctx));
|
1977
|
+
}
|
1978
|
+
|
1979
|
+
static VALUE _llama_context_token_middle(VALUE self) {
|
1980
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1981
|
+
if (ptr->ctx == NULL) {
|
1982
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1983
|
+
return Qnil;
|
1984
|
+
}
|
1985
|
+
return INT2NUM(llama_token_middle(ptr->ctx));
|
1986
|
+
}
|
1987
|
+
|
1988
|
+
static VALUE _llama_context_token_suffix(VALUE self) {
|
1989
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1990
|
+
if (ptr->ctx == NULL) {
|
1991
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1992
|
+
return Qnil;
|
1993
|
+
}
|
1994
|
+
return INT2NUM(llama_token_suffix(ptr->ctx));
|
1995
|
+
}
|
1996
|
+
|
1997
|
+
static VALUE _llama_context_token_eot(VALUE self) {
|
1998
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1999
|
+
if (ptr->ctx == NULL) {
|
2000
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2001
|
+
return Qnil;
|
2002
|
+
}
|
2003
|
+
return INT2NUM(llama_token_eot(ptr->ctx));
|
2004
|
+
}
|
2005
|
+
|
1960
2006
|
static VALUE _llama_context_n_ctx(VALUE self) {
|
1961
2007
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1962
2008
|
if (ptr->ctx == NULL) {
|