llama_cpp 0.9.2 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/ext/llama_cpp/src/ggml-alloc.c +378 -208
- data/ext/llama_cpp/src/ggml-alloc.h +68 -16
- data/ext/llama_cpp/src/ggml-backend-impl.h +87 -0
- data/ext/llama_cpp/src/ggml-backend.c +578 -13
- data/ext/llama_cpp/src/ggml-backend.h +70 -77
- data/ext/llama_cpp/src/ggml-cuda.cu +194 -8
- data/ext/llama_cpp/src/ggml-impl.h +13 -7
- data/ext/llama_cpp/src/ggml-metal.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.m +113 -32
- data/ext/llama_cpp/src/ggml-metal.metal +107 -1
- data/ext/llama_cpp/src/ggml-quants.c +173 -73
- data/ext/llama_cpp/src/ggml.c +826 -1482
- data/ext/llama_cpp/src/ggml.h +63 -45
- data/ext/llama_cpp/src/llama.cpp +364 -38
- data/ext/llama_cpp/src/llama.h +6 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2491ee80a5e822375f140b7d465a6783be62ef9f98aa510495723bd2d80b3f81
|
4
|
+
data.tar.gz: ad9ddbda1470602b976231edae030efd1ef0d854b41e0ce509e9b07ec78113e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a82ed440ae2bbe20f2c3818f22f88f1c5cab659060ad085a43ee657d1e60919acb74b9aac9b1d027fe84ddb30d170efc0e3799d33deddc59b4d34300332a798
|
7
|
+
data.tar.gz: 164b4356580f0d2f17582fb84d59f0fbb9f816ac18921ea67d7cdda7f484620b605fdb88111ee32c1a42400c0770c520841304f7c2230ba577f4df1e5db453a0
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## [[0.9.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.9.2...v0.9.3)] - 2023-11-18
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from b1500 to b1523.
|
4
|
+
- Add `add_bos_token?` method to Model.
|
5
|
+
- Add `add_eos_token?` method to Model.
|
6
|
+
|
1
7
|
## [[0.9.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.9.1...v0.9.2)] - 2023-11-11
|
2
8
|
|
3
9
|
- Bump bundled llama.cpp from b1472 to b1500.
|
@@ -6,7 +12,7 @@
|
|
6
12
|
|
7
13
|
- Bump bundled llama.cpp from b1429 to b1472
|
8
14
|
- Rename `kv_cahe_tokens_rm` method to `kv_cahce_clear` in Context.
|
9
|
-
- Add `sample_min_p method
|
15
|
+
- Add `sample_min_p` method to Context.
|
10
16
|
- Add `rope_scaling_type`, `rope_freq_base`, `rope_freq_scale`, `yarn_ext_factor`, `yarn_attn_factor`, `yarn_beta_fast`, `yarn_beta_slow`, and `yarn_orig_ctx` to ContextParams.
|
11
17
|
- Add `pure` to ModelQuantizeParams.
|
12
18
|
- Add contstants for RoPE scaling type.
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -1252,6 +1252,8 @@ public:
|
|
1252
1252
|
rb_define_method(rb_cLLaMAModel, "token_bos", RUBY_METHOD_FUNC(_llama_model_token_bos), 0);
|
1253
1253
|
rb_define_method(rb_cLLaMAModel, "token_eos", RUBY_METHOD_FUNC(_llama_model_token_eos), 0);
|
1254
1254
|
rb_define_method(rb_cLLaMAModel, "token_nl", RUBY_METHOD_FUNC(_llama_model_token_nl), 0);
|
1255
|
+
rb_define_method(rb_cLLaMAModel, "add_bos_token?", RUBY_METHOD_FUNC(_llama_model_add_bos_token), 0);
|
1256
|
+
rb_define_method(rb_cLLaMAModel, "add_eos_token?", RUBY_METHOD_FUNC(_llama_model_add_eos_token), 0);
|
1255
1257
|
rb_define_method(rb_cLLaMAModel, "token_prefix", RUBY_METHOD_FUNC(_llama_model_token_prefix), 0);
|
1256
1258
|
rb_define_method(rb_cLLaMAModel, "token_middle", RUBY_METHOD_FUNC(_llama_model_token_middle), 0);
|
1257
1259
|
rb_define_method(rb_cLLaMAModel, "token_suffix", RUBY_METHOD_FUNC(_llama_model_token_suffix), 0);
|
@@ -1541,6 +1543,16 @@ private:
|
|
1541
1543
|
return INT2NUM(llama_token_nl(ptr->model));
|
1542
1544
|
}
|
1543
1545
|
|
1546
|
+
static VALUE _llama_model_add_bos_token(VALUE self) {
|
1547
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1548
|
+
return llama_add_bos_token(ptr->model) ? Qtrue : Qfalse;
|
1549
|
+
}
|
1550
|
+
|
1551
|
+
static VALUE _llama_model_add_eos_token(VALUE self) {
|
1552
|
+
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1553
|
+
return llama_add_eos_token(ptr->model) ? Qtrue : Qfalse;
|
1554
|
+
}
|
1555
|
+
|
1544
1556
|
static VALUE _llama_model_token_prefix(VALUE self) {
|
1545
1557
|
LLaMAModelWrapper* ptr = get_llama_model(self);
|
1546
1558
|
return INT2NUM(llama_token_prefix(ptr->model));
|