llama_cpp 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/ext/llama_cpp/extconf.rb +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +153 -21
- data/ext/llama_cpp/src/ggml-cuda.h +4 -0
- data/ext/llama_cpp/src/ggml-opencl.c +291 -215
- data/ext/llama_cpp/src/ggml.c +4428 -2143
- data/ext/llama_cpp/src/ggml.h +216 -13
- data/ext/llama_cpp/src/llama-util.h +23 -23
- data/ext/llama_cpp/src/llama.cpp +300 -149
- data/ext/llama_cpp/src/llama.h +38 -25
- data/lib/llama_cpp/client.rb +1 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -0
- data/sig/llama_cpp.rbs +4 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fe968c9231c20e614fafe89bc521c313ab68401fedd2d803743b18ccc234a28
|
4
|
+
data.tar.gz: a4916ec0f52b3e131175141f30bd3a70f37859207e732948d2fe7baac98a4b0c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa99138a7a591a7e602e6aa040ccec057dcad09e52c6646edd0def9c0e3ea1aee6796bc32fa05dc9c384af1b8c72a3f5c2077de918d2e0a229901c97732023c1
|
7
|
+
data.tar.gz: 1e4399f4b75fcbe69da61ce23d2cf45594e5502e7d6ea6f9b7f0930ca155bcfb4481f81944496031e79c8ef0e48be20a6797d8f9b41967404e2a54330a93c261
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,22 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
|
4
|
+
|
5
|
+
**Breaking Changes**
|
6
|
+
|
7
|
+
- Bump bundled llama.cpp from master-6986c78 to master-265db98
|
8
|
+
- bump LLAMA_FILE_VERSION to 3
|
9
|
+
|
10
|
+
## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
|
11
|
+
|
12
|
+
- Add load_session_file method to Context
|
13
|
+
- Add save_session_file method to Context
|
14
|
+
|
15
|
+
**Breaking Changes**
|
16
|
+
|
17
|
+
- Bump bundled llama.cpp from master-173d0e6 to master-6986c78
|
18
|
+
- bump LLAMA_FILE_VERSION to 2
|
19
|
+
|
3
20
|
## [[0.1.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.7...v0.1.0)] - 2023-05-20
|
4
21
|
|
5
22
|
**Breaking Changes**
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -24,6 +24,13 @@ if with_config('openblas')
|
|
24
24
|
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
25
25
|
end
|
26
26
|
|
27
|
+
if with_config('blis')
|
28
|
+
abort 'libblis is not found.' unless have_library('blis')
|
29
|
+
abort 'cblas.h is not found.' unless have_header('cblas.h')
|
30
|
+
|
31
|
+
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
32
|
+
end
|
33
|
+
|
27
34
|
if with_config('accelerate')
|
28
35
|
abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
|
29
36
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -292,8 +292,6 @@ public:
|
|
292
292
|
// rb_define_method(rb_cLLaMAContextParams, "initialize", RUBY_METHOD_FUNC(_llama_context_params_init), 0);
|
293
293
|
rb_define_method(rb_cLLaMAContextParams, "n_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_n_ctx), 1);
|
294
294
|
rb_define_method(rb_cLLaMAContextParams, "n_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_n_ctx), 0);
|
295
|
-
rb_define_method(rb_cLLaMAContextParams, "n_parts=", RUBY_METHOD_FUNC(_llama_context_params_set_n_parts), 1);
|
296
|
-
rb_define_method(rb_cLLaMAContextParams, "n_parts", RUBY_METHOD_FUNC(_llama_context_params_get_n_parts), 0);
|
297
295
|
rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
|
298
296
|
rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
|
299
297
|
rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
|
@@ -331,18 +329,6 @@ private:
|
|
331
329
|
return INT2NUM(ptr->params.n_ctx);
|
332
330
|
};
|
333
331
|
|
334
|
-
// n_parts
|
335
|
-
static VALUE _llama_context_params_set_n_parts(VALUE self, VALUE n_parts) {
|
336
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
337
|
-
ptr->params.n_parts = NUM2INT(n_parts);
|
338
|
-
return INT2NUM(ptr->params.n_parts);
|
339
|
-
};
|
340
|
-
|
341
|
-
static VALUE _llama_context_params_get_n_parts(VALUE self) {
|
342
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
343
|
-
return INT2NUM(ptr->params.n_parts);
|
344
|
-
};
|
345
|
-
|
346
332
|
// seed
|
347
333
|
static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
|
348
334
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -494,6 +480,8 @@ public:
|
|
494
480
|
rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
|
495
481
|
rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
|
496
482
|
rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
|
483
|
+
rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
|
484
|
+
rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
|
497
485
|
rb_define_method(rb_cLLaMAContext, "sample_repetition_penalty", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalty), -1);
|
498
486
|
rb_define_method(rb_cLLaMAContext, "sample_frequency_and_presence_penalties", RUBY_METHOD_FUNC(_llama_context_sample_frequency_and_presence_penalties), -1);
|
499
487
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
@@ -536,7 +524,14 @@ private:
|
|
536
524
|
VALUE filename = kw_values[0];
|
537
525
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
538
526
|
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
539
|
-
|
527
|
+
|
528
|
+
try {
|
529
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
530
|
+
} catch (const std::runtime_error& e) {
|
531
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
532
|
+
return Qnil;
|
533
|
+
}
|
534
|
+
|
540
535
|
if (ctx_ptr->ctx == NULL) {
|
541
536
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
542
537
|
return Qnil;
|
@@ -800,7 +795,14 @@ private:
|
|
800
795
|
|
801
796
|
VALUE filename = kw_values[0];
|
802
797
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
803
|
-
|
798
|
+
|
799
|
+
try {
|
800
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
801
|
+
} catch (const std::runtime_error& e) {
|
802
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
803
|
+
return Qnil;
|
804
|
+
}
|
805
|
+
|
804
806
|
if (ctx_ptr->ctx == NULL) {
|
805
807
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
806
808
|
return Qnil;
|
@@ -870,6 +872,97 @@ private:
|
|
870
872
|
return Qnil;
|
871
873
|
};
|
872
874
|
|
875
|
+
static VALUE _llama_context_load_session_file(int argc, VALUE* argv, VALUE self) {
|
876
|
+
VALUE kw_args = Qnil;
|
877
|
+
ID kw_table[1] = { rb_intern("session_path") };
|
878
|
+
VALUE kw_values[1] = { Qundef };
|
879
|
+
VALUE candidates = Qnil;
|
880
|
+
VALUE last_n_tokens = Qnil;
|
881
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
882
|
+
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
883
|
+
|
884
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
885
|
+
rb_raise(rb_eArgError, "session_path must be a String");
|
886
|
+
return Qnil;
|
887
|
+
}
|
888
|
+
|
889
|
+
VALUE filename = kw_values[0];
|
890
|
+
|
891
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
892
|
+
if (ctx_ptr->ctx == NULL) {
|
893
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
894
|
+
return Qnil;
|
895
|
+
}
|
896
|
+
|
897
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
898
|
+
const int n_ctx = prms_ptr->params.n_ctx;
|
899
|
+
|
900
|
+
std::vector<llama_token> session_tokens(n_ctx);
|
901
|
+
size_t n_token_count_out = 0;
|
902
|
+
|
903
|
+
try {
|
904
|
+
bool res = llama_load_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), session_tokens.capacity(), &n_token_count_out);
|
905
|
+
if (!res) {
|
906
|
+
rb_raise(rb_eRuntimeError, "Failed to load session file");
|
907
|
+
return Qnil;
|
908
|
+
}
|
909
|
+
session_tokens.resize(n_token_count_out);
|
910
|
+
} catch (const std::runtime_error& e) {
|
911
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
912
|
+
return Qnil;
|
913
|
+
}
|
914
|
+
|
915
|
+
VALUE ary_session_tokens = rb_ary_new2(n_token_count_out);
|
916
|
+
for (size_t i = 0; i < n_token_count_out; i++) {
|
917
|
+
rb_ary_store(ary_session_tokens, i, INT2NUM(session_tokens[i]));
|
918
|
+
}
|
919
|
+
|
920
|
+
RB_GC_GUARD(filename);
|
921
|
+
return ary_session_tokens;
|
922
|
+
}
|
923
|
+
|
924
|
+
static VALUE _llama_context_save_session_file(int argc, VALUE* argv, VALUE self) {
|
925
|
+
VALUE kw_args = Qnil;
|
926
|
+
ID kw_table[2] = { rb_intern("session_path"), rb_intern("session_tokens") };
|
927
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
928
|
+
VALUE candidates = Qnil;
|
929
|
+
VALUE last_n_tokens = Qnil;
|
930
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
931
|
+
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
932
|
+
|
933
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
934
|
+
rb_raise(rb_eArgError, "session_path must be a String");
|
935
|
+
return Qnil;
|
936
|
+
}
|
937
|
+
if (!RB_TYPE_P(kw_values[1], T_ARRAY)) {
|
938
|
+
rb_raise(rb_eArgError, "session_tokens must be an Array");
|
939
|
+
return Qnil;
|
940
|
+
}
|
941
|
+
|
942
|
+
VALUE filename = kw_values[0];
|
943
|
+
const size_t sz_session_tokens = RARRAY_LEN(kw_values[1]);
|
944
|
+
std::vector<llama_token> session_tokens(sz_session_tokens);
|
945
|
+
for (size_t i = 0; i < sz_session_tokens; i++) {
|
946
|
+
session_tokens[i] = NUM2INT(rb_ary_entry(kw_values[1], i));
|
947
|
+
}
|
948
|
+
|
949
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
950
|
+
if (ctx_ptr->ctx == NULL) {
|
951
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
952
|
+
return Qnil;
|
953
|
+
}
|
954
|
+
|
955
|
+
bool res = llama_save_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), sz_session_tokens);
|
956
|
+
|
957
|
+
if (!res) {
|
958
|
+
rb_raise(rb_eRuntimeError, "Failed to save session file");
|
959
|
+
return Qnil;
|
960
|
+
}
|
961
|
+
|
962
|
+
RB_GC_GUARD(filename);
|
963
|
+
return Qnil;
|
964
|
+
}
|
965
|
+
|
873
966
|
static VALUE _llama_context_sample_repetition_penalty(int argc, VALUE* argv, VALUE self) {
|
874
967
|
VALUE kw_args = Qnil;
|
875
968
|
ID kw_table[1] = { rb_intern("penalty") };
|
@@ -1328,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
|
|
1328
1421
|
|
1329
1422
|
// module functions
|
1330
1423
|
|
1424
|
+
static VALUE rb_llama_llama_init_backend(VALUE self) {
|
1425
|
+
llama_init_backend();
|
1426
|
+
return Qnil;
|
1427
|
+
}
|
1428
|
+
|
1331
1429
|
static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
1332
1430
|
VALUE kw_args = Qnil;
|
1333
1431
|
ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
|
@@ -1398,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
1398
1496
|
RbLLaMAContext::define_class(rb_mLLaMACpp);
|
1399
1497
|
RbLLaMAContextParams::define_class(rb_mLLaMACpp);
|
1400
1498
|
|
1499
|
+
rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
|
1401
1500
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
1402
1501
|
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
1403
1502
|
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
@@ -1411,16 +1510,49 @@ extern "C" void Init_llama_cpp(void) {
|
|
1411
1510
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
|
1412
1511
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1));
|
1413
1512
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16));
|
1414
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_2", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_2));
|
1415
1513
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q8_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q8_0));
|
1416
1514
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
|
1417
1515
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
|
1418
1516
|
|
1419
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1420
1517
|
std::stringstream ss_magic;
|
1518
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
1519
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
1520
|
+
|
1521
|
+
ss_magic.str("");
|
1522
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1523
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
1524
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
1525
|
+
|
1526
|
+
ss_magic.str("");
|
1527
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1528
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
1529
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
1530
|
+
|
1531
|
+
ss_magic.str("");
|
1532
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1533
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
1534
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
1535
|
+
|
1536
|
+
ss_magic.str("");
|
1537
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1538
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
1539
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
1540
|
+
|
1541
|
+
ss_magic.str("");
|
1542
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1421
1543
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
1422
1544
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1423
|
-
|
1424
|
-
|
1425
|
-
|
1545
|
+
|
1546
|
+
ss_magic.str("");
|
1547
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1548
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
1549
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
1550
|
+
|
1551
|
+
ss_magic.str("");
|
1552
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1553
|
+
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
1554
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1555
|
+
|
1556
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1557
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
1426
1558
|
}
|
@@ -6,6 +6,7 @@ extern "C" {
|
|
6
6
|
|
7
7
|
void ggml_init_cublas(void);
|
8
8
|
|
9
|
+
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
9
10
|
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
10
11
|
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
11
12
|
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
@@ -14,6 +15,9 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
|
14
15
|
void * ggml_cuda_host_malloc(size_t size);
|
15
16
|
void ggml_cuda_host_free(void * ptr);
|
16
17
|
|
18
|
+
void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
|
19
|
+
void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
|
20
|
+
|
17
21
|
#ifdef __cplusplus
|
18
22
|
}
|
19
23
|
#endif
|