llama_cpp 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c4058abcb7afa897554fc75bb368caeea0e77429e01fb5f3a1949191c50f4de5
4
- data.tar.gz: 9929e94c02b5d9c21379a9275f08668e835f91d3d7be3570a2da9ab4ecbe6ad1
3
+ metadata.gz: 1fe968c9231c20e614fafe89bc521c313ab68401fedd2d803743b18ccc234a28
4
+ data.tar.gz: a4916ec0f52b3e131175141f30bd3a70f37859207e732948d2fe7baac98a4b0c
5
5
  SHA512:
6
- metadata.gz: ab267defd1769e7bf4599da199f50a7c5cc2355d2281ab7fd2ccd1a5ef196b716350cf8df9522a9185d02c8c3ad6a5d0f46f271fad0951440ab9b3fab4019932
7
- data.tar.gz: 16727a2ac2c68f7913749b656c26523e9eee0118b69ff06bbc0935f899eac1874f16395d9e72ed2caa853e9c61fb9f614ad5913fca623e356aa249308b2f3dda
6
+ metadata.gz: fa99138a7a591a7e602e6aa040ccec057dcad09e52c6646edd0def9c0e3ea1aee6796bc32fa05dc9c384af1b8c72a3f5c2077de918d2e0a229901c97732023c1
7
+ data.tar.gz: 1e4399f4b75fcbe69da61ce23d2cf45594e5502e7d6ea6f9b7f0930ca155bcfb4481f81944496031e79c8ef0e48be20a6797d8f9b41967404e2a54330a93c261
data/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
4
+
5
+ **Breaking Changes**
6
+
7
+ - Bump bundled llama.cpp from master-6986c78 to master-265db98
8
+ - bump LLAMA_FILE_VERSION to 3
9
+
10
+ ## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
11
+
12
+ - Add load_session_file method to Context
13
+ - Add save_session_file method to Context
14
+
15
+ **Breaking Changes**
16
+
17
+ - Bump bundled llama.cpp from master-173d0e6 to master-6986c78
18
+ - bump LLAMA_FILE_VERSION to 2
19
+
3
20
  ## [[0.1.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.7...v0.1.0)] - 2023-05-20
4
21
 
5
22
  **Breaking Changes**
@@ -24,6 +24,13 @@ if with_config('openblas')
24
24
  $CFLAGS << ' -DGGML_USE_OPENBLAS'
25
25
  end
26
26
 
27
+ if with_config('blis')
28
+ abort 'libblis is not found.' unless have_library('blis')
29
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
30
+
31
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
32
+ end
33
+
27
34
  if with_config('accelerate')
28
35
  abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
29
36
 
@@ -292,8 +292,6 @@ public:
292
292
  // rb_define_method(rb_cLLaMAContextParams, "initialize", RUBY_METHOD_FUNC(_llama_context_params_init), 0);
293
293
  rb_define_method(rb_cLLaMAContextParams, "n_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_n_ctx), 1);
294
294
  rb_define_method(rb_cLLaMAContextParams, "n_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_n_ctx), 0);
295
- rb_define_method(rb_cLLaMAContextParams, "n_parts=", RUBY_METHOD_FUNC(_llama_context_params_set_n_parts), 1);
296
- rb_define_method(rb_cLLaMAContextParams, "n_parts", RUBY_METHOD_FUNC(_llama_context_params_get_n_parts), 0);
297
295
  rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
298
296
  rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
299
297
  rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
@@ -331,18 +329,6 @@ private:
331
329
  return INT2NUM(ptr->params.n_ctx);
332
330
  };
333
331
 
334
- // n_parts
335
- static VALUE _llama_context_params_set_n_parts(VALUE self, VALUE n_parts) {
336
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
337
- ptr->params.n_parts = NUM2INT(n_parts);
338
- return INT2NUM(ptr->params.n_parts);
339
- };
340
-
341
- static VALUE _llama_context_params_get_n_parts(VALUE self) {
342
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
343
- return INT2NUM(ptr->params.n_parts);
344
- };
345
-
346
332
  // seed
347
333
  static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
348
334
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -494,6 +480,8 @@ public:
494
480
  rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
495
481
  rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
496
482
  rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
483
+ rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
484
+ rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
497
485
  rb_define_method(rb_cLLaMAContext, "sample_repetition_penalty", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalty), -1);
498
486
  rb_define_method(rb_cLLaMAContext, "sample_frequency_and_presence_penalties", RUBY_METHOD_FUNC(_llama_context_sample_frequency_and_presence_penalties), -1);
499
487
  rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
@@ -536,7 +524,14 @@ private:
536
524
  VALUE filename = kw_values[0];
537
525
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
538
526
  LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
539
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
527
+
528
+ try {
529
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
530
+ } catch (const std::runtime_error& e) {
531
+ rb_raise(rb_eRuntimeError, "%s", e.what());
532
+ return Qnil;
533
+ }
534
+
540
535
  if (ctx_ptr->ctx == NULL) {
541
536
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
542
537
  return Qnil;
@@ -800,7 +795,14 @@ private:
800
795
 
801
796
  VALUE filename = kw_values[0];
802
797
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
803
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
798
+
799
+ try {
800
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
801
+ } catch (const std::runtime_error& e) {
802
+ rb_raise(rb_eRuntimeError, "%s", e.what());
803
+ return Qnil;
804
+ }
805
+
804
806
  if (ctx_ptr->ctx == NULL) {
805
807
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
806
808
  return Qnil;
@@ -870,6 +872,97 @@ private:
870
872
  return Qnil;
871
873
  };
872
874
 
875
+ static VALUE _llama_context_load_session_file(int argc, VALUE* argv, VALUE self) {
876
+ VALUE kw_args = Qnil;
877
+ ID kw_table[1] = { rb_intern("session_path") };
878
+ VALUE kw_values[1] = { Qundef };
879
+ VALUE candidates = Qnil;
880
+ VALUE last_n_tokens = Qnil;
881
+ rb_scan_args(argc, argv, ":", &kw_args);
882
+ rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
883
+
884
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
885
+ rb_raise(rb_eArgError, "session_path must be a String");
886
+ return Qnil;
887
+ }
888
+
889
+ VALUE filename = kw_values[0];
890
+
891
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
892
+ if (ctx_ptr->ctx == NULL) {
893
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
894
+ return Qnil;
895
+ }
896
+
897
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
898
+ const int n_ctx = prms_ptr->params.n_ctx;
899
+
900
+ std::vector<llama_token> session_tokens(n_ctx);
901
+ size_t n_token_count_out = 0;
902
+
903
+ try {
904
+ bool res = llama_load_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), session_tokens.capacity(), &n_token_count_out);
905
+ if (!res) {
906
+ rb_raise(rb_eRuntimeError, "Failed to load session file");
907
+ return Qnil;
908
+ }
909
+ session_tokens.resize(n_token_count_out);
910
+ } catch (const std::runtime_error& e) {
911
+ rb_raise(rb_eRuntimeError, "%s", e.what());
912
+ return Qnil;
913
+ }
914
+
915
+ VALUE ary_session_tokens = rb_ary_new2(n_token_count_out);
916
+ for (size_t i = 0; i < n_token_count_out; i++) {
917
+ rb_ary_store(ary_session_tokens, i, INT2NUM(session_tokens[i]));
918
+ }
919
+
920
+ RB_GC_GUARD(filename);
921
+ return ary_session_tokens;
922
+ }
923
+
924
+ static VALUE _llama_context_save_session_file(int argc, VALUE* argv, VALUE self) {
925
+ VALUE kw_args = Qnil;
926
+ ID kw_table[2] = { rb_intern("session_path"), rb_intern("session_tokens") };
927
+ VALUE kw_values[2] = { Qundef, Qundef };
928
+ VALUE candidates = Qnil;
929
+ VALUE last_n_tokens = Qnil;
930
+ rb_scan_args(argc, argv, ":", &kw_args);
931
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
932
+
933
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
934
+ rb_raise(rb_eArgError, "session_path must be a String");
935
+ return Qnil;
936
+ }
937
+ if (!RB_TYPE_P(kw_values[1], T_ARRAY)) {
938
+ rb_raise(rb_eArgError, "session_tokens must be an Array");
939
+ return Qnil;
940
+ }
941
+
942
+ VALUE filename = kw_values[0];
943
+ const size_t sz_session_tokens = RARRAY_LEN(kw_values[1]);
944
+ std::vector<llama_token> session_tokens(sz_session_tokens);
945
+ for (size_t i = 0; i < sz_session_tokens; i++) {
946
+ session_tokens[i] = NUM2INT(rb_ary_entry(kw_values[1], i));
947
+ }
948
+
949
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
950
+ if (ctx_ptr->ctx == NULL) {
951
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
952
+ return Qnil;
953
+ }
954
+
955
+ bool res = llama_save_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), sz_session_tokens);
956
+
957
+ if (!res) {
958
+ rb_raise(rb_eRuntimeError, "Failed to save session file");
959
+ return Qnil;
960
+ }
961
+
962
+ RB_GC_GUARD(filename);
963
+ return Qnil;
964
+ }
965
+
873
966
  static VALUE _llama_context_sample_repetition_penalty(int argc, VALUE* argv, VALUE self) {
874
967
  VALUE kw_args = Qnil;
875
968
  ID kw_table[1] = { rb_intern("penalty") };
@@ -1328,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
1328
1421
 
1329
1422
  // module functions
1330
1423
 
1424
+ static VALUE rb_llama_llama_init_backend(VALUE self) {
1425
+ llama_init_backend();
1426
+ return Qnil;
1427
+ }
1428
+
1331
1429
  static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
1332
1430
  VALUE kw_args = Qnil;
1333
1431
  ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
@@ -1398,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
1398
1496
  RbLLaMAContext::define_class(rb_mLLaMACpp);
1399
1497
  RbLLaMAContextParams::define_class(rb_mLLaMACpp);
1400
1498
 
1499
+ rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
1401
1500
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
1402
1501
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
1403
1502
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
@@ -1411,16 +1510,49 @@ extern "C" void Init_llama_cpp(void) {
1411
1510
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
1412
1511
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1));
1413
1512
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16));
1414
- rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_2", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_2));
1415
1513
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q8_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q8_0));
1416
1514
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
1417
1515
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
1418
1516
 
1419
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1420
1517
  std::stringstream ss_magic;
1518
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
1519
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
1520
+
1521
+ ss_magic.str("");
1522
+ ss_magic.clear(std::stringstream::goodbit);
1523
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
1524
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
1525
+
1526
+ ss_magic.str("");
1527
+ ss_magic.clear(std::stringstream::goodbit);
1528
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
1529
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
1530
+
1531
+ ss_magic.str("");
1532
+ ss_magic.clear(std::stringstream::goodbit);
1533
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
1534
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
1535
+
1536
+ ss_magic.str("");
1537
+ ss_magic.clear(std::stringstream::goodbit);
1538
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
1539
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
1540
+
1541
+ ss_magic.str("");
1542
+ ss_magic.clear(std::stringstream::goodbit);
1421
1543
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
1422
1544
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1423
- std::stringstream ss_magic_unversioned;
1424
- ss_magic_unversioned << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1425
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic_unversioned.str().c_str()));
1545
+
1546
+ ss_magic.str("");
1547
+ ss_magic.clear(std::stringstream::goodbit);
1548
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1549
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
1550
+
1551
+ ss_magic.str("");
1552
+ ss_magic.clear(std::stringstream::goodbit);
1553
+ ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
1554
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1555
+
1556
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1557
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
1426
1558
  }
@@ -6,6 +6,7 @@ extern "C" {
6
6
 
7
7
  void ggml_init_cublas(void);
8
8
 
9
+ void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
9
10
  bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
10
11
  size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
11
12
  void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
@@ -14,6 +15,9 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
14
15
  void * ggml_cuda_host_malloc(size_t size);
15
16
  void ggml_cuda_host_free(void * ptr);
16
17
 
18
+ void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
19
+ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
20
+
17
21
  #ifdef __cplusplus
18
22
  }
19
23
  #endif