llama_cpp 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/extconf.rb +8 -2
- data/ext/llama_cpp/llama_cpp.cpp +60 -6
- data/ext/llama_cpp/src/ggml-cuda.h +2 -0
- data/ext/llama_cpp/src/ggml-opencl.cpp +1034 -0
- data/ext/llama_cpp/src/ggml-opencl.h +8 -10
- data/ext/llama_cpp/src/ggml.c +398 -184
- data/ext/llama_cpp/src/ggml.h +14 -3
- data/ext/llama_cpp/src/llama-util.h +23 -23
- data/ext/llama_cpp/src/llama.cpp +191 -92
- data/ext/llama_cpp/src/llama.h +30 -17
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -0
- data/sig/llama_cpp.rbs +1 -0
- metadata +3 -3
- data/ext/llama_cpp/src/ggml-opencl.c +0 -361
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed569e816938dfca79c345228cf592eb81827c04acfeae3c8e26e0635bbc518b
|
4
|
+
data.tar.gz: f4a899df0cf450370d7dc75e486a17617f1af0cbcacd9d9a8c7d3bde10016441
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f3d38eed6628e8d68efc741fe00024fb0c5199fb2e1a33d6f04d9299e1c59deb969e3eafe36190ade84522e70ddca50956fbee9b6406edc5d613f654889a83a
|
7
|
+
data.tar.gz: 0b1705a8d70564a59ad6472b03dc0241727766d4121e26a2e9c3c0d4725ddf2ccf65cb8f4a862688661ea9fa2b1c8858cd6e5e722821e6c2c30c91401475ef74
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.1.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.2...v0.1.3)] - 2023-05-27
|
4
|
+
|
5
|
+
- Bump bundled llama.cpp from master-265db98 to master-66874d4
|
6
|
+
|
7
|
+
## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
|
8
|
+
|
9
|
+
**Breaking Changes**
|
10
|
+
|
11
|
+
- Bump bundled llama.cpp from master-6986c78 to master-265db98
|
12
|
+
- bump LLAMA_FILE_VERSION to 3
|
13
|
+
|
3
14
|
## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
|
4
15
|
|
5
16
|
- Add load_session_file method to Context
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -5,8 +5,7 @@ require 'mkmf'
|
|
5
5
|
abort 'libstdc++ is not found.' unless have_library('stdc++')
|
6
6
|
|
7
7
|
$srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
|
8
|
-
$srcs << 'ggml-opencl.
|
9
|
-
|
8
|
+
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
9
|
$CFLAGS << ' -w'
|
11
10
|
$CXXFLAGS << ' -std=c++11'
|
12
11
|
$INCFLAGS << ' -I$(srcdir)/src'
|
@@ -24,6 +23,13 @@ if with_config('openblas')
|
|
24
23
|
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
25
24
|
end
|
26
25
|
|
26
|
+
if with_config('blis')
|
27
|
+
abort 'libblis is not found.' unless have_library('blis')
|
28
|
+
abort 'cblas.h is not found.' unless have_header('cblas.h')
|
29
|
+
|
30
|
+
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
31
|
+
end
|
32
|
+
|
27
33
|
if with_config('accelerate')
|
28
34
|
abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
|
29
35
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -524,7 +524,14 @@ private:
|
|
524
524
|
VALUE filename = kw_values[0];
|
525
525
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
526
526
|
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
527
|
-
|
527
|
+
|
528
|
+
try {
|
529
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
530
|
+
} catch (const std::runtime_error& e) {
|
531
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
532
|
+
return Qnil;
|
533
|
+
}
|
534
|
+
|
528
535
|
if (ctx_ptr->ctx == NULL) {
|
529
536
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
530
537
|
return Qnil;
|
@@ -788,7 +795,14 @@ private:
|
|
788
795
|
|
789
796
|
VALUE filename = kw_values[0];
|
790
797
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
791
|
-
|
798
|
+
|
799
|
+
try {
|
800
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
801
|
+
} catch (const std::runtime_error& e) {
|
802
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
803
|
+
return Qnil;
|
804
|
+
}
|
805
|
+
|
792
806
|
if (ctx_ptr->ctx == NULL) {
|
793
807
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
794
808
|
return Qnil;
|
@@ -1407,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
|
|
1407
1421
|
|
1408
1422
|
// module functions
|
1409
1423
|
|
1424
|
+
static VALUE rb_llama_llama_init_backend(VALUE self) {
|
1425
|
+
llama_init_backend();
|
1426
|
+
return Qnil;
|
1427
|
+
}
|
1428
|
+
|
1410
1429
|
static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
1411
1430
|
VALUE kw_args = Qnil;
|
1412
1431
|
ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
|
@@ -1477,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
1477
1496
|
RbLLaMAContext::define_class(rb_mLLaMACpp);
|
1478
1497
|
RbLLaMAContextParams::define_class(rb_mLLaMACpp);
|
1479
1498
|
|
1499
|
+
rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
|
1480
1500
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
1481
1501
|
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
1482
1502
|
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
@@ -1494,11 +1514,45 @@ extern "C" void Init_llama_cpp(void) {
|
|
1494
1514
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
|
1495
1515
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
|
1496
1516
|
|
1497
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1498
1517
|
std::stringstream ss_magic;
|
1518
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
1519
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
1520
|
+
|
1521
|
+
ss_magic.str("");
|
1522
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1523
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
1524
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
1525
|
+
|
1526
|
+
ss_magic.str("");
|
1527
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1528
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
1529
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
1530
|
+
|
1531
|
+
ss_magic.str("");
|
1532
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1533
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
1534
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
1535
|
+
|
1536
|
+
ss_magic.str("");
|
1537
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1538
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
1539
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
1540
|
+
|
1541
|
+
ss_magic.str("");
|
1542
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1499
1543
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
1500
1544
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1545
|
+
|
1546
|
+
ss_magic.str("");
|
1547
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1548
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
1549
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
1550
|
+
|
1551
|
+
ss_magic.str("");
|
1552
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1553
|
+
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
1554
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1555
|
+
|
1556
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1557
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
1504
1558
|
}
|
@@ -6,6 +6,7 @@ extern "C" {
|
|
6
6
|
|
7
7
|
void ggml_init_cublas(void);
|
8
8
|
|
9
|
+
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
9
10
|
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
10
11
|
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
11
12
|
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
@@ -15,6 +16,7 @@ void * ggml_cuda_host_malloc(size_t size);
|
|
15
16
|
void ggml_cuda_host_free(void * ptr);
|
16
17
|
|
17
18
|
void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
|
19
|
+
void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
|
18
20
|
|
19
21
|
#ifdef __cplusplus
|
20
22
|
}
|