llama_cpp 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33b146badd1bebdf9588e48c0adac1f9924a0653aa5ec806fdf5dd288ef665d8
4
- data.tar.gz: 134606db2b9fb10b51fc82f410d6653a6481b828d9fd05390b1570d6e198526a
3
+ metadata.gz: ed569e816938dfca79c345228cf592eb81827c04acfeae3c8e26e0635bbc518b
4
+ data.tar.gz: f4a899df0cf450370d7dc75e486a17617f1af0cbcacd9d9a8c7d3bde10016441
5
5
  SHA512:
6
- metadata.gz: 462d9e00121408c7af3934b0a663b29f99d5ad28f60a3471155509463bf26a14792c484d1fdc6054460941ae011d39b510774e225ad4ec03d60ce20a1dfef667
7
- data.tar.gz: 4bf447ac55bba2b62d204dc975528de6664fe53af89df8ba4aa4172d4dbff709ac5b14a944326be5c71d64baa2cde00b60f7ba5e916e1fb68123c595f74ce24f
6
+ metadata.gz: 0f3d38eed6628e8d68efc741fe00024fb0c5199fb2e1a33d6f04d9299e1c59deb969e3eafe36190ade84522e70ddca50956fbee9b6406edc5d613f654889a83a
7
+ data.tar.gz: 0b1705a8d70564a59ad6472b03dc0241727766d4121e26a2e9c3c0d4725ddf2ccf65cb8f4a862688661ea9fa2b1c8858cd6e5e722821e6c2c30c91401475ef74
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.1.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.2...v0.1.3)] - 2023-05-27
4
+
5
+ - Bump bundled llama.cpp from master-265db98 to master-66874d4
6
+
7
+ ## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
8
+
9
+ **Breaking Changes**
10
+
11
+ - Bump bundled llama.cpp from master-6986c78 to master-265db98
12
+ - bump LLAMA_FILE_VERSION to 3
13
+
3
14
  ## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
4
15
 
5
16
  - Add load_session_file method to Context
@@ -5,8 +5,7 @@ require 'mkmf'
5
5
  abort 'libstdc++ is not found.' unless have_library('stdc++')
6
6
 
7
7
  $srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
8
- $srcs << 'ggml-opencl.c' if with_config('clblast')
9
-
8
+ $srcs << 'ggml-opencl.cpp' if with_config('clblast')
10
9
  $CFLAGS << ' -w'
11
10
  $CXXFLAGS << ' -std=c++11'
12
11
  $INCFLAGS << ' -I$(srcdir)/src'
@@ -24,6 +23,13 @@ if with_config('openblas')
24
23
  $CFLAGS << ' -DGGML_USE_OPENBLAS'
25
24
  end
26
25
 
26
+ if with_config('blis')
27
+ abort 'libblis is not found.' unless have_library('blis')
28
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
29
+
30
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
31
+ end
32
+
27
33
  if with_config('accelerate')
28
34
  abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
29
35
 
@@ -524,7 +524,14 @@ private:
524
524
  VALUE filename = kw_values[0];
525
525
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
526
526
  LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
527
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
527
+
528
+ try {
529
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
530
+ } catch (const std::runtime_error& e) {
531
+ rb_raise(rb_eRuntimeError, "%s", e.what());
532
+ return Qnil;
533
+ }
534
+
528
535
  if (ctx_ptr->ctx == NULL) {
529
536
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
530
537
  return Qnil;
@@ -788,7 +795,14 @@ private:
788
795
 
789
796
  VALUE filename = kw_values[0];
790
797
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
791
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
798
+
799
+ try {
800
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
801
+ } catch (const std::runtime_error& e) {
802
+ rb_raise(rb_eRuntimeError, "%s", e.what());
803
+ return Qnil;
804
+ }
805
+
792
806
  if (ctx_ptr->ctx == NULL) {
793
807
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
794
808
  return Qnil;
@@ -1407,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
1407
1421
 
1408
1422
  // module functions
1409
1423
 
1424
+ static VALUE rb_llama_llama_init_backend(VALUE self) {
1425
+ llama_init_backend();
1426
+ return Qnil;
1427
+ }
1428
+
1410
1429
  static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
1411
1430
  VALUE kw_args = Qnil;
1412
1431
  ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
@@ -1477,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
1477
1496
  RbLLaMAContext::define_class(rb_mLLaMACpp);
1478
1497
  RbLLaMAContextParams::define_class(rb_mLLaMACpp);
1479
1498
 
1499
+ rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
1480
1500
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
1481
1501
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
1482
1502
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
@@ -1494,11 +1514,45 @@ extern "C" void Init_llama_cpp(void) {
1494
1514
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
1495
1515
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
1496
1516
 
1497
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1498
1517
  std::stringstream ss_magic;
1518
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
1519
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
1520
+
1521
+ ss_magic.str("");
1522
+ ss_magic.clear(std::stringstream::goodbit);
1523
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
1524
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
1525
+
1526
+ ss_magic.str("");
1527
+ ss_magic.clear(std::stringstream::goodbit);
1528
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
1529
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
1530
+
1531
+ ss_magic.str("");
1532
+ ss_magic.clear(std::stringstream::goodbit);
1533
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
1534
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
1535
+
1536
+ ss_magic.str("");
1537
+ ss_magic.clear(std::stringstream::goodbit);
1538
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
1539
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
1540
+
1541
+ ss_magic.str("");
1542
+ ss_magic.clear(std::stringstream::goodbit);
1499
1543
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
1500
1544
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1501
- std::stringstream ss_magic_unversioned;
1502
- ss_magic_unversioned << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1503
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic_unversioned.str().c_str()));
1545
+
1546
+ ss_magic.str("");
1547
+ ss_magic.clear(std::stringstream::goodbit);
1548
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1549
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
1550
+
1551
+ ss_magic.str("");
1552
+ ss_magic.clear(std::stringstream::goodbit);
1553
+ ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
1554
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1555
+
1556
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1557
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
1504
1558
  }
@@ -6,6 +6,7 @@ extern "C" {
6
6
 
7
7
  void ggml_init_cublas(void);
8
8
 
9
+ void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
9
10
  bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
10
11
  size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
11
12
  void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
@@ -15,6 +16,7 @@ void * ggml_cuda_host_malloc(size_t size);
15
16
  void ggml_cuda_host_free(void * ptr);
16
17
 
17
18
  void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
19
+ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
18
20
 
19
21
  #ifdef __cplusplus
20
22
  }