llama_cpp 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c4058abcb7afa897554fc75bb368caeea0e77429e01fb5f3a1949191c50f4de5
4
- data.tar.gz: 9929e94c02b5d9c21379a9275f08668e835f91d3d7be3570a2da9ab4ecbe6ad1
3
+ metadata.gz: 1fe968c9231c20e614fafe89bc521c313ab68401fedd2d803743b18ccc234a28
4
+ data.tar.gz: a4916ec0f52b3e131175141f30bd3a70f37859207e732948d2fe7baac98a4b0c
5
5
  SHA512:
6
- metadata.gz: ab267defd1769e7bf4599da199f50a7c5cc2355d2281ab7fd2ccd1a5ef196b716350cf8df9522a9185d02c8c3ad6a5d0f46f271fad0951440ab9b3fab4019932
7
- data.tar.gz: 16727a2ac2c68f7913749b656c26523e9eee0118b69ff06bbc0935f899eac1874f16395d9e72ed2caa853e9c61fb9f614ad5913fca623e356aa249308b2f3dda
6
+ metadata.gz: fa99138a7a591a7e602e6aa040ccec057dcad09e52c6646edd0def9c0e3ea1aee6796bc32fa05dc9c384af1b8c72a3f5c2077de918d2e0a229901c97732023c1
7
+ data.tar.gz: 1e4399f4b75fcbe69da61ce23d2cf45594e5502e7d6ea6f9b7f0930ca155bcfb4481f81944496031e79c8ef0e48be20a6797d8f9b41967404e2a54330a93c261
data/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
4
+
5
+ **Breaking Changes**
6
+
7
+ - Bump bundled llama.cpp from master-6986c78 to master-265db98
8
+ - bump LLAMA_FILE_VERSION to 3
9
+
10
+ ## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
11
+
12
+ - Add load_session_file method to Context
13
+ - Add save_session_file method to Context
14
+
15
+ **Breaking Changes**
16
+
17
+ - Bump bundled llama.cpp from master-173d0e6 to master-6986c78
18
+ - bump LLAMA_FILE_VERSION to 2
19
+
3
20
  ## [[0.1.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.7...v0.1.0)] - 2023-05-20
4
21
 
5
22
  **Breaking Changes**
@@ -24,6 +24,13 @@ if with_config('openblas')
24
24
  $CFLAGS << ' -DGGML_USE_OPENBLAS'
25
25
  end
26
26
 
27
+ if with_config('blis')
28
+ abort 'libblis is not found.' unless have_library('blis')
29
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
30
+
31
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
32
+ end
33
+
27
34
  if with_config('accelerate')
28
35
  abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
29
36
 
@@ -292,8 +292,6 @@ public:
292
292
  // rb_define_method(rb_cLLaMAContextParams, "initialize", RUBY_METHOD_FUNC(_llama_context_params_init), 0);
293
293
  rb_define_method(rb_cLLaMAContextParams, "n_ctx=", RUBY_METHOD_FUNC(_llama_context_params_set_n_ctx), 1);
294
294
  rb_define_method(rb_cLLaMAContextParams, "n_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_n_ctx), 0);
295
- rb_define_method(rb_cLLaMAContextParams, "n_parts=", RUBY_METHOD_FUNC(_llama_context_params_set_n_parts), 1);
296
- rb_define_method(rb_cLLaMAContextParams, "n_parts", RUBY_METHOD_FUNC(_llama_context_params_get_n_parts), 0);
297
295
  rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
298
296
  rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
299
297
  rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
@@ -331,18 +329,6 @@ private:
331
329
  return INT2NUM(ptr->params.n_ctx);
332
330
  };
333
331
 
334
- // n_parts
335
- static VALUE _llama_context_params_set_n_parts(VALUE self, VALUE n_parts) {
336
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
337
- ptr->params.n_parts = NUM2INT(n_parts);
338
- return INT2NUM(ptr->params.n_parts);
339
- };
340
-
341
- static VALUE _llama_context_params_get_n_parts(VALUE self) {
342
- LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
343
- return INT2NUM(ptr->params.n_parts);
344
- };
345
-
346
332
  // seed
347
333
  static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
348
334
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -494,6 +480,8 @@ public:
494
480
  rb_define_method(rb_cLLaMAContext, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_context_apply_lora_from_file), -1);
495
481
  rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
496
482
  rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
483
+ rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
484
+ rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
497
485
  rb_define_method(rb_cLLaMAContext, "sample_repetition_penalty", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalty), -1);
498
486
  rb_define_method(rb_cLLaMAContext, "sample_frequency_and_presence_penalties", RUBY_METHOD_FUNC(_llama_context_sample_frequency_and_presence_penalties), -1);
499
487
  rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
@@ -536,7 +524,14 @@ private:
536
524
  VALUE filename = kw_values[0];
537
525
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
538
526
  LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
539
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
527
+
528
+ try {
529
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
530
+ } catch (const std::runtime_error& e) {
531
+ rb_raise(rb_eRuntimeError, "%s", e.what());
532
+ return Qnil;
533
+ }
534
+
540
535
  if (ctx_ptr->ctx == NULL) {
541
536
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
542
537
  return Qnil;
@@ -800,7 +795,14 @@ private:
800
795
 
801
796
  VALUE filename = kw_values[0];
802
797
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
803
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
798
+
799
+ try {
800
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
801
+ } catch (const std::runtime_error& e) {
802
+ rb_raise(rb_eRuntimeError, "%s", e.what());
803
+ return Qnil;
804
+ }
805
+
804
806
  if (ctx_ptr->ctx == NULL) {
805
807
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
806
808
  return Qnil;
@@ -870,6 +872,97 @@ private:
870
872
  return Qnil;
871
873
  };
872
874
 
875
+ static VALUE _llama_context_load_session_file(int argc, VALUE* argv, VALUE self) {
876
+ VALUE kw_args = Qnil;
877
+ ID kw_table[1] = { rb_intern("session_path") };
878
+ VALUE kw_values[1] = { Qundef };
879
+ VALUE candidates = Qnil;
880
+ VALUE last_n_tokens = Qnil;
881
+ rb_scan_args(argc, argv, ":", &kw_args);
882
+ rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
883
+
884
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
885
+ rb_raise(rb_eArgError, "session_path must be a String");
886
+ return Qnil;
887
+ }
888
+
889
+ VALUE filename = kw_values[0];
890
+
891
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
892
+ if (ctx_ptr->ctx == NULL) {
893
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
894
+ return Qnil;
895
+ }
896
+
897
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
898
+ const int n_ctx = prms_ptr->params.n_ctx;
899
+
900
+ std::vector<llama_token> session_tokens(n_ctx);
901
+ size_t n_token_count_out = 0;
902
+
903
+ try {
904
+ bool res = llama_load_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), session_tokens.capacity(), &n_token_count_out);
905
+ if (!res) {
906
+ rb_raise(rb_eRuntimeError, "Failed to load session file");
907
+ return Qnil;
908
+ }
909
+ session_tokens.resize(n_token_count_out);
910
+ } catch (const std::runtime_error& e) {
911
+ rb_raise(rb_eRuntimeError, "%s", e.what());
912
+ return Qnil;
913
+ }
914
+
915
+ VALUE ary_session_tokens = rb_ary_new2(n_token_count_out);
916
+ for (size_t i = 0; i < n_token_count_out; i++) {
917
+ rb_ary_store(ary_session_tokens, i, INT2NUM(session_tokens[i]));
918
+ }
919
+
920
+ RB_GC_GUARD(filename);
921
+ return ary_session_tokens;
922
+ }
923
+
924
+ static VALUE _llama_context_save_session_file(int argc, VALUE* argv, VALUE self) {
925
+ VALUE kw_args = Qnil;
926
+ ID kw_table[2] = { rb_intern("session_path"), rb_intern("session_tokens") };
927
+ VALUE kw_values[2] = { Qundef, Qundef };
928
+ VALUE candidates = Qnil;
929
+ VALUE last_n_tokens = Qnil;
930
+ rb_scan_args(argc, argv, ":", &kw_args);
931
+ rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
932
+
933
+ if (!RB_TYPE_P(kw_values[0], T_STRING)) {
934
+ rb_raise(rb_eArgError, "session_path must be a String");
935
+ return Qnil;
936
+ }
937
+ if (!RB_TYPE_P(kw_values[1], T_ARRAY)) {
938
+ rb_raise(rb_eArgError, "session_tokens must be an Array");
939
+ return Qnil;
940
+ }
941
+
942
+ VALUE filename = kw_values[0];
943
+ const size_t sz_session_tokens = RARRAY_LEN(kw_values[1]);
944
+ std::vector<llama_token> session_tokens(sz_session_tokens);
945
+ for (size_t i = 0; i < sz_session_tokens; i++) {
946
+ session_tokens[i] = NUM2INT(rb_ary_entry(kw_values[1], i));
947
+ }
948
+
949
+ LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
950
+ if (ctx_ptr->ctx == NULL) {
951
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
952
+ return Qnil;
953
+ }
954
+
955
+ bool res = llama_save_session_file(ctx_ptr->ctx, StringValueCStr(filename), session_tokens.data(), sz_session_tokens);
956
+
957
+ if (!res) {
958
+ rb_raise(rb_eRuntimeError, "Failed to save session file");
959
+ return Qnil;
960
+ }
961
+
962
+ RB_GC_GUARD(filename);
963
+ return Qnil;
964
+ }
965
+
873
966
  static VALUE _llama_context_sample_repetition_penalty(int argc, VALUE* argv, VALUE self) {
874
967
  VALUE kw_args = Qnil;
875
968
  ID kw_table[1] = { rb_intern("penalty") };
@@ -1328,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
1328
1421
 
1329
1422
  // module functions
1330
1423
 
1424
+ static VALUE rb_llama_llama_init_backend(VALUE self) {
1425
+ llama_init_backend();
1426
+ return Qnil;
1427
+ }
1428
+
1331
1429
  static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
1332
1430
  VALUE kw_args = Qnil;
1333
1431
  ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
@@ -1398,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
1398
1496
  RbLLaMAContext::define_class(rb_mLLaMACpp);
1399
1497
  RbLLaMAContextParams::define_class(rb_mLLaMACpp);
1400
1498
 
1499
+ rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
1401
1500
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
1402
1501
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
1403
1502
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
@@ -1411,16 +1510,49 @@ extern "C" void Init_llama_cpp(void) {
1411
1510
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_0));
1412
1511
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1));
1413
1512
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16));
1414
- rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q4_2", INT2NUM(LLAMA_FTYPE_MOSTLY_Q4_2));
1415
1513
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q8_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q8_0));
1416
1514
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
1417
1515
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
1418
1516
 
1419
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1420
1517
  std::stringstream ss_magic;
1518
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
1519
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
1520
+
1521
+ ss_magic.str("");
1522
+ ss_magic.clear(std::stringstream::goodbit);
1523
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
1524
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
1525
+
1526
+ ss_magic.str("");
1527
+ ss_magic.clear(std::stringstream::goodbit);
1528
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
1529
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
1530
+
1531
+ ss_magic.str("");
1532
+ ss_magic.clear(std::stringstream::goodbit);
1533
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
1534
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
1535
+
1536
+ ss_magic.str("");
1537
+ ss_magic.clear(std::stringstream::goodbit);
1538
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
1539
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
1540
+
1541
+ ss_magic.str("");
1542
+ ss_magic.clear(std::stringstream::goodbit);
1421
1543
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
1422
1544
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1423
- std::stringstream ss_magic_unversioned;
1424
- ss_magic_unversioned << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1425
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic_unversioned.str().c_str()));
1545
+
1546
+ ss_magic.str("");
1547
+ ss_magic.clear(std::stringstream::goodbit);
1548
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1549
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
1550
+
1551
+ ss_magic.str("");
1552
+ ss_magic.clear(std::stringstream::goodbit);
1553
+ ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
1554
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1555
+
1556
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1557
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
1426
1558
  }
@@ -6,6 +6,7 @@ extern "C" {
6
6
 
7
7
  void ggml_init_cublas(void);
8
8
 
9
+ void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
9
10
  bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
10
11
  size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
11
12
  void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
@@ -14,6 +15,9 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
14
15
  void * ggml_cuda_host_malloc(size_t size);
15
16
  void ggml_cuda_host_free(void * ptr);
16
17
 
18
+ void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
19
+ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
20
+
17
21
  #ifdef __cplusplus
18
22
  }
19
23
  #endif