llama_cpp 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/ext/llama_cpp/extconf.rb +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +153 -21
- data/ext/llama_cpp/src/ggml-cuda.h +4 -0
- data/ext/llama_cpp/src/ggml-opencl.c +291 -215
- data/ext/llama_cpp/src/ggml.c +4428 -2143
- data/ext/llama_cpp/src/ggml.h +216 -13
- data/ext/llama_cpp/src/llama-util.h +23 -23
- data/ext/llama_cpp/src/llama.cpp +300 -149
- data/ext/llama_cpp/src/llama.h +38 -25
- data/lib/llama_cpp/client.rb +1 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -0
- data/sig/llama_cpp.rbs +4 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fe968c9231c20e614fafe89bc521c313ab68401fedd2d803743b18ccc234a28
|
4
|
+
data.tar.gz: a4916ec0f52b3e131175141f30bd3a70f37859207e732948d2fe7baac98a4b0c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa99138a7a591a7e602e6aa040ccec057dcad09e52c6646edd0def9c0e3ea1aee6796bc32fa05dc9c384af1b8c72a3f5c2077de918d2e0a229901c97732023c1
|
7
|
+
data.tar.gz: 1e4399f4b75fcbe69da61ce23d2cf45594e5502e7d6ea6f9b7f0930ca155bcfb4481f81944496031e79c8ef0e48be20a6797d8f9b41967404e2a54330a93c261
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,22 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
|
4
|
+
|
5
|
+
**Breaking Changes**
|
6
|
+
|
7
|
+
- Bump bundled llama.cpp from master-6986c78 to master-265db98
|
8
|
+
- bump LLAMA_FILE_VERSION to 3
|
9
|
+
|
10
|
+
## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
|
11
|
+
|
12
|
+
- Add load_session_file method to Context
|
13
|
+
- Add save_session_file method to Context
|
14
|
+
|
15
|
+
**Breaking Changes**
|
16
|
+
|
17
|
+
- Bump bundled llama.cpp from master-173d0e6 to master-6986c78
|
18
|
+
- bump LLAMA_FILE_VERSION to 2
|
19
|
+
|
3
20
|
## [[0.1.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.7...v0.1.0)] - 2023-05-20
|
4
21
|
|
5
22
|
**Breaking Changes**
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -24,6 +24,13 @@ if with_config('openblas')
|
|
24
24
|
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
25
25
|
end
|
26
26
|
|
27
|
+
if with_config('blis')
|
28
|
+
abort 'libblis is not found.' unless have_library('blis')
|
29
|
+
abort 'cblas.h is not found.' unless have_header('cblas.h')
|
30
|
+
|
31
|
+
$CFLAGS << ' -DGGML_USE_OPENBLAS'
|
32
|
+
end
|
33
|
+
|
27
34
|
if with_config('accelerate')
|
28
35
|
abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
|
29
36
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -292,8 +292,6 @@ public:
|
|
292
292
|
// rb_define_method(rb_cLLaMAContextParams, "initialize", RUBY_METHOD_FUNC(_llama_context_params_init), 0);
|
293
293
|
rb_define_method(rb_cLLaMAContextParams, "n_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_n_ctx), 1);
|
294
294
|
rb_define_method(rb_cLLaMAContextParams, "n_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_n_ctx), 0);
|
295
|
-
rb_define_method(rb_cLLaMAContextParams, "n_parts=", RUBY_METHOD_FUNC(_llama_context_params_set_n_parts), 1);
|
296
|
-
rb_define_method(rb_cLLaMAContextParams, "n_parts", RUBY_METHOD_FUNC(_llama_context_params_get_n_parts), 0);
|
297
295
|
rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
|
298
296
|
rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
|
299
297
|
rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
|
@@ -331,18 +329,6 @@ private:
|
|
331
329
|
return INT2NUM(ptr->params.n_ctx);
|
332
330
|
};
|
333
331
|
|
334
|
-
// n_parts
|
335
|
-
static VALUE _llama_context_params_set_n_parts(VALUE self, VALUE n_parts) {
|
336
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
337
|
-
ptr->params.n_parts = NUM2INT(n_parts);
|
338
|
-
return INT2NUM(ptr->params.n_parts);
|
339
|
-
};
|
340
|
-
|
341
|
-
static VALUE _llama_context_params_get_n_parts(VALUE self) {
|
342
|
-
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
343
|
-
return INT2NUM(ptr->params.n_parts);
|
344
|
-
};
|
345
|
-
|
346
332
|
// seed
|
347
333
|
static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
|
348
334
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -494,6 +480,8 @@ public:
|
|
494
480
|
rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
|
495
481
|
rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
|
496
482
|
rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
|
483
|
+
rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
|
484
|
+
rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
|
497
485
|
rb_define_method(rb_cLLaMAContext, "sample_repetition_penalty", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalty), -1);
|
498
486
|
rb_define_method(rb_cLLaMAContext, "sample_frequency_and_presence_penalties", RUBY_METHOD_FUNC(_llama_context_sample_frequency_and_presence_penalties), -1);
|
499
487
|
rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
|
@@ -536,7 +524,14 @@ private:
|
|
536
524
|
VALUE filename = kw_values[0];
|
537
525
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
538
526
|
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
539
|
-
|
527
|
+
|
528
|
+
try {
|
529
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
530
|
+
} catch (const std::runtime_error& e) {
|
531
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
532
|
+
return Qnil;
|
533
|
+
}
|
534
|
+
|
540
535
|
if (ctx_ptr->ctx == NULL) {
|
541
536
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
542
537
|
return Qnil;
|
@@ -800,7 +795,14 @@ private:
|
|
800
795
|
|
801
796
|
VALUE filename = kw_values[0];
|
802
797
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
803
|
-
|
798
|
+
|
799
|
+
try {
|
800
|
+
ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
|
801
|
+
} catch (const std::runtime_error& e) {
|
802
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
803
|
+
return Qnil;
|
804
|
+
}
|
805
|
+
|
804
806
|
if (ctx_ptr->ctx == NULL) {
|
805
807
|
rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
|
806
808
|
return Qnil;
|
@@ -870,6 +872,97 @@ private:
|
|
870
872
|
return Qnil;
|
871
873
|
};
|
872
874
|
|
875
|
+
static VALUE _llama_context_load_session_file(int argc, VALUE* argv, VALUE self) {
|
876
|
+
VALUE kw_args = Qnil;
|
877
|
+
ID kw_table[1] = { rb_intern("session_path") };
|
878
|
+
VALUE kw_values[1] = { Qundef };
|
879
|
+
VALUE candidates = Qnil;
|
880
|
+
VALUE last_n_tokens = Qnil;
|
881
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
882
|
+
rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
|
883
|
+
|
884
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
885
|
+
rb_raise(rb_eArgError, "session_path must be a String");
|
886
|
+
return Qnil;
|
887
|
+
}
|
888
|
+
|
889
|
+
VALUE filename = kw_values[0];
|
890
|
+
|
891
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
892
|
+
if (ctx_ptr->ctx == NULL) {
|
893
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
894
|
+
return Qnil;
|
895
|
+
}
|
896
|
+
|
897
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
898
|
+
const int n_ctx = prms_ptr->params.n_ctx;
|
899
|
+
|
900
|
+
std::vector<llama_token> session_tokens(n_ctx);
|
901
|
+
size_t n_token_count_out = 0;
|
902
|
+
|
903
|
+
try {
|
904
|
+
bool res = llama_load_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), session_tokens.capacity(), &n_token_count_out);
|
905
|
+
if (!res) {
|
906
|
+
rb_raise(rb_eRuntimeError, "Failed to load session file");
|
907
|
+
return Qnil;
|
908
|
+
}
|
909
|
+
session_tokens.resize(n_token_count_out);
|
910
|
+
} catch (const std::runtime_error& e) {
|
911
|
+
rb_raise(rb_eRuntimeError, "%s", e.what());
|
912
|
+
return Qnil;
|
913
|
+
}
|
914
|
+
|
915
|
+
VALUE ary_session_tokens = rb_ary_new2(n_token_count_out);
|
916
|
+
for (size_t i = 0; i < n_token_count_out; i++) {
|
917
|
+
rb_ary_store(ary_session_tokens, i, INT2NUM(session_tokens[i]));
|
918
|
+
}
|
919
|
+
|
920
|
+
RB_GC_GUARD(filename);
|
921
|
+
return ary_session_tokens;
|
922
|
+
}
|
923
|
+
|
924
|
+
static VALUE _llama_context_save_session_file(int argc, VALUE* argv, VALUE self) {
|
925
|
+
VALUE kw_args = Qnil;
|
926
|
+
ID kw_table[2] = { rb_intern("session_path"), rb_intern("session_tokens") };
|
927
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
928
|
+
VALUE candidates = Qnil;
|
929
|
+
VALUE last_n_tokens = Qnil;
|
930
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
931
|
+
rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
|
932
|
+
|
933
|
+
if (!RB_TYPE_P(kw_values[0], T_STRING)) {
|
934
|
+
rb_raise(rb_eArgError, "session_path must be a String");
|
935
|
+
return Qnil;
|
936
|
+
}
|
937
|
+
if (!RB_TYPE_P(kw_values[1], T_ARRAY)) {
|
938
|
+
rb_raise(rb_eArgError, "session_tokens must be an Array");
|
939
|
+
return Qnil;
|
940
|
+
}
|
941
|
+
|
942
|
+
VALUE filename = kw_values[0];
|
943
|
+
const size_t sz_session_tokens = RARRAY_LEN(kw_values[1]);
|
944
|
+
std::vector<llama_token> session_tokens(sz_session_tokens);
|
945
|
+
for (size_t i = 0; i < sz_session_tokens; i++) {
|
946
|
+
session_tokens[i] = NUM2INT(rb_ary_entry(kw_values[1], i));
|
947
|
+
}
|
948
|
+
|
949
|
+
LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
|
950
|
+
if (ctx_ptr->ctx == NULL) {
|
951
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
952
|
+
return Qnil;
|
953
|
+
}
|
954
|
+
|
955
|
+
bool res = llama_save_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), sz_session_tokens);
|
956
|
+
|
957
|
+
if (!res) {
|
958
|
+
rb_raise(rb_eRuntimeError, "Failed to save session file");
|
959
|
+
return Qnil;
|
960
|
+
}
|
961
|
+
|
962
|
+
RB_GC_GUARD(filename);
|
963
|
+
return Qnil;
|
964
|
+
}
|
965
|
+
|
873
966
|
static VALUE _llama_context_sample_repetition_penalty(int argc, VALUE* argv, VALUE self) {
|
874
967
|
VALUE kw_args = Qnil;
|
875
968
|
ID kw_table[1] = { rb_intern("penalty") };
|
@@ -1328,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
|
|
1328
1421
|
|
1329
1422
|
// module functions
|
1330
1423
|
|
1424
|
+
static VALUE rb_llama_llama_init_backend(VALUE self) {
|
1425
|
+
llama_init_backend();
|
1426
|
+
return Qnil;
|
1427
|
+
}
|
1428
|
+
|
1331
1429
|
static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
|
1332
1430
|
VALUE kw_args = Qnil;
|
1333
1431
|
ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
|
@@ -1398,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
1398
1496
|
RbLLaMAContext::define_class(rb_mLLaMACpp);
|
1399
1497
|
RbLLaMAContextParams::define_class(rb_mLLaMACpp);
|
1400
1498
|
|
1499
|
+
rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
|
1401
1500
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
1402
1501
|
rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
|
1403
1502
|
rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
|
@@ -1411,16 +1510,49 @@ extern "C" void Init_llama_cpp(void) {
|
|
1411
1510
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
|
1412
1511
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1));
|
1413
1512
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16));
|
1414
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_2", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_2));
|
1415
1513
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q8_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q8_0));
|
1416
1514
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
|
1417
1515
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
|
1418
1516
|
|
1419
|
-
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1420
1517
|
std::stringstream ss_magic;
|
1518
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
|
1519
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
|
1520
|
+
|
1521
|
+
ss_magic.str("");
|
1522
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1523
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
|
1524
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
|
1525
|
+
|
1526
|
+
ss_magic.str("");
|
1527
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1528
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
|
1529
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
|
1530
|
+
|
1531
|
+
ss_magic.str("");
|
1532
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1533
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
|
1534
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
|
1535
|
+
|
1536
|
+
ss_magic.str("");
|
1537
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1538
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
|
1539
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
|
1540
|
+
|
1541
|
+
ss_magic.str("");
|
1542
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1421
1543
|
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
|
1422
1544
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1423
|
-
|
1424
|
-
|
1425
|
-
|
1545
|
+
|
1546
|
+
ss_magic.str("");
|
1547
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1548
|
+
ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
|
1549
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
|
1550
|
+
|
1551
|
+
ss_magic.str("");
|
1552
|
+
ss_magic.clear(std::stringstream::goodbit);
|
1553
|
+
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
1554
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1555
|
+
|
1556
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1557
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
1426
1558
|
}
|
@@ -6,6 +6,7 @@ extern "C" {
|
|
6
6
|
|
7
7
|
void ggml_init_cublas(void);
|
8
8
|
|
9
|
+
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
9
10
|
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
10
11
|
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
11
12
|
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
@@ -14,6 +15,9 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
|
14
15
|
void * ggml_cuda_host_malloc(size_t size);
|
15
16
|
void ggml_cuda_host_free(void * ptr);
|
16
17
|
|
18
|
+
void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
|
19
|
+
void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
|
20
|
+
|
17
21
|
#ifdef __cplusplus
|
18
22
|
}
|
19
23
|
#endif
|