llama_cpp 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +60 -6
- data/ext/llama_cpp/src/ggml-cuda.h +2 -0
- data/ext/llama_cpp/src/ggml-opencl.c +246 -133
- data/ext/llama_cpp/src/ggml.c +362 -137
- data/ext/llama_cpp/src/ggml.h +13 -3
- data/ext/llama_cpp/src/llama-util.h +23 -23
- data/ext/llama_cpp/src/llama.cpp +173 -102
- data/ext/llama_cpp/src/llama.h +30 -17
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -0
- data/sig/llama_cpp.rbs +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fe968c9231c20e614fafe89bc521c313ab68401fedd2d803743b18ccc234a28
|
4
|
+
data.tar.gz: a4916ec0f52b3e131175141f30bd3a70f37859207e732948d2fe7baac98a4b0c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa99138a7a591a7e602e6aa040ccec057dcad09e52c6646edd0def9c0e3ea1aee6796bc32fa05dc9c384af1b8c72a3f5c2077de918d2e0a229901c97732023c1
|
7
|
+
data.tar.gz: 1e4399f4b75fcbe69da61ce23d2cf45594e5502e7d6ea6f9b7f0930ca155bcfb4481f81944496031e79c8ef0e48be20a6797d8f9b41967404e2a54330a93c261
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
|
4
|
+
|
5
|
+
**Breaking Changes**
|
6
|
+
|
7
|
+
- Bump bundled llama.cpp from master-6986c78 to master-265db98
|
8
|
+
- bump LLAMA_FILE_VERSION to 3
|
9
|
+
|
3
10
|
## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
|
4
11
|
|
5
12
|
- Add load_session_file method to Context
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -24,6 +24,13 @@ if with_config('openblas')
|
|
24
24
|
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
25
25
|
end
|
26
26
|
|
27
|
+
if with_config('blis')
|
28
|
+
abort 'libblis is not found.' unless have_library('blis')
|
29
|
+
abort 'cblas.h is not found.' unless have_header('cblas.h')
|
30
|
+
|
31
|
+
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
32
|
+
end
|
33
|
+
|
27
34
|
if with_config('accelerate')
|
28
35
|
abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
|
29
36
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -524,7 +524,14 @@ private:
|
|
524
524
|
VALUE filename = kw_values[0];
|
525
525
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
526
526
|
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
527
|
-
|
527
|
+
|
528
|
+
try {
|
529
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
530
|
+
} catch (const std::runtime_error& e) {
|
531
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
532
|
+
return Qnil;
|
533
|
+
}
|
534
|
+
|
528
535
|
if (ctx_ptr->ctx == NULL) {
|
529
536
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
530
537
|
return Qnil;
|
@@ -788,7 +795,14 @@ private:
|
|
788
795
|
|
789
796
|
VALUE filename = kw_values[0];
|
790
797
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
791
|
-
|
798
|
+
|
799
|
+
try {
|
800
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
801
|
+
} catch (const std::runtime_error& e) {
|
802
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
803
|
+
return Qnil;
|
804
|
+
}
|
805
|
+
|
792
806
|
if (ctx_ptr->ctx == NULL) {
|
793
807
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
794
808
|
return Qnil;
|
@@ -1407,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
|
|
1407
1421
|
|
1408
1422
|
// module functions
|
1409
1423
|
|
1424
|
+
static VALUE rb_llama_llama_init_backend(VALUE self) {
|
1425
|
+
llama_init_backend();
|
1426
|
+
return Qnil;
|
1427
|
+
}
|
1428
|
+
|
1410
1429
|
static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
1411
1430
|
VALUE kw_args = Qnil;
|
1412
1431
|
ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
|
@@ -1477,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
1477
1496
|
RbLLaMAContext::define_class(rb_mLLaMACpp);
|
1478
1497
|
RbLLaMAContextParams::define_class(rb_mLLaMACpp);
|
1479
1498
|
|
1499
|
+
rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
|
1480
1500
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
1481
1501
|
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
1482
1502
|
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
@@ -1494,11 +1514,45 @@ extern "C" void Init_llama_cpp(void) {
|
|
1494
1514
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
|
1495
1515
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
|
1496
1516
|
|
1497
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1498
1517
|
std::stringstream ss_magic;
|
1518
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
1519
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
1520
|
+
|
1521
|
+
ss_magic.str("");
|
1522
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1523
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
1524
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
1525
|
+
|
1526
|
+
ss_magic.str("");
|
1527
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1528
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
1529
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
1530
|
+
|
1531
|
+
ss_magic.str("");
|
1532
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1533
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
1534
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
1535
|
+
|
1536
|
+
ss_magic.str("");
|
1537
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1538
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
1539
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
1540
|
+
|
1541
|
+
ss_magic.str("");
|
1542
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1499
1543
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
1500
1544
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1545
|
+
|
1546
|
+
ss_magic.str("");
|
1547
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1548
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
1549
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
1550
|
+
|
1551
|
+
ss_magic.str("");
|
1552
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1553
|
+
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
1554
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1555
|
+
|
1556
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1557
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
1504
1558
|
}
|
@@ -6,6 +6,7 @@ extern "C" {
|
|
6
6
|
|
7
7
|
void ggml_init_cublas(void);
|
8
8
|
|
9
|
+
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
9
10
|
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
10
11
|
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
11
12
|
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
@@ -15,6 +16,7 @@ void * ggml_cuda_host_malloc(size_t size);
|
|
15
16
|
void ggml_cuda_host_free(void * ptr);
|
16
17
|
|
17
18
|
void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
|
19
|
+
void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
|
18
20
|
|
19
21
|
#ifdef __cplusplus
|
20
22
|
}
|