llama_cpp 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33b146badd1bebdf9588e48c0adac1f9924a0653aa5ec806fdf5dd288ef665d8
4
- data.tar.gz: 134606db2b9fb10b51fc82f410d6653a6481b828d9fd05390b1570d6e198526a
3
+ metadata.gz: ed569e816938dfca79c345228cf592eb81827c04acfeae3c8e26e0635bbc518b
4
+ data.tar.gz: f4a899df0cf450370d7dc75e486a17617f1af0cbcacd9d9a8c7d3bde10016441
5
5
  SHA512:
6
- metadata.gz: 462d9e00121408c7af3934b0a663b29f99d5ad28f60a3471155509463bf26a14792c484d1fdc6054460941ae011d39b510774e225ad4ec03d60ce20a1dfef667
7
- data.tar.gz: 4bf447ac55bba2b62d204dc975528de6664fe53af89df8ba4aa4172d4dbff709ac5b14a944326be5c71d64baa2cde00b60f7ba5e916e1fb68123c595f74ce24f
6
+ metadata.gz: 0f3d38eed6628e8d68efc741fe00024fb0c5199fb2e1a33d6f04d9299e1c59deb969e3eafe36190ade84522e70ddca50956fbee9b6406edc5d613f654889a83a
7
+ data.tar.gz: 0b1705a8d70564a59ad6472b03dc0241727766d4121e26a2e9c3c0d4725ddf2ccf65cb8f4a862688661ea9fa2b1c8858cd6e5e722821e6c2c30c91401475ef74
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.1.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.2...v0.1.3)] - 2023-05-27
4
+
5
+ - Bump bundled llama.cpp from master-265db98 to master-66874d4
6
+
7
+ ## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
8
+
9
+ **Breaking Changes**
10
+
11
+ - Bump bundled llama.cpp from master-6986c78 to master-265db98
12
+ - bump LLAMA_FILE_VERSION to 3
13
+
3
14
  ## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
4
15
 
5
16
  - Add load_session_file method to Context
@@ -5,8 +5,7 @@ require 'mkmf'
5
5
  abort 'libstdc++ is not found.' unless have_library('stdc++')
6
6
 
7
7
  $srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
8
- $srcs << 'ggml-opencl.c' if with_config('clblast')
9
-
8
+ $srcs << 'ggml-opencl.cpp' if with_config('clblast')
10
9
  $CFLAGS << ' -w'
11
10
  $CXXFLAGS << ' -std=c++11'
12
11
  $INCFLAGS << ' -I$(srcdir)/src'
@@ -24,6 +23,13 @@ if with_config('openblas')
24
23
  $CFLAGS << ' -DGGML_USE_OPENBLAS'
25
24
  end
26
25
 
26
+ if with_config('blis')
27
+ abort 'libblis is not found.' unless have_library('blis')
28
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
29
+
30
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
31
+ end
32
+
27
33
  if with_config('accelerate')
28
34
  abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
29
35
 
@@ -524,7 +524,14 @@ private:
524
524
  VALUE filename = kw_values[0];
525
525
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
526
526
  LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
527
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
527
+
528
+ try {
529
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
530
+ } catch (const std::runtime_error& e) {
531
+ rb_raise(rb_eRuntimeError, "%s", e.what());
532
+ return Qnil;
533
+ }
534
+
528
535
  if (ctx_ptr->ctx == NULL) {
529
536
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
530
537
  return Qnil;
@@ -788,7 +795,14 @@ private:
788
795
 
789
796
  VALUE filename = kw_values[0];
790
797
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
791
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
798
+
799
+ try {
800
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
801
+ } catch (const std::runtime_error& e) {
802
+ rb_raise(rb_eRuntimeError, "%s", e.what());
803
+ return Qnil;
804
+ }
805
+
792
806
  if (ctx_ptr->ctx == NULL) {
793
807
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
794
808
  return Qnil;
@@ -1407,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
1407
1421
 
1408
1422
  // module functions
1409
1423
 
1424
+ static VALUE rb_llama_llama_init_backend(VALUE self) {
1425
+ llama_init_backend();
1426
+ return Qnil;
1427
+ }
1428
+
1410
1429
  static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
1411
1430
  VALUE kw_args = Qnil;
1412
1431
  ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
@@ -1477,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
1477
1496
  RbLLaMAContext::define_class(rb_mLLaMACpp);
1478
1497
  RbLLaMAContextParams::define_class(rb_mLLaMACpp);
1479
1498
 
1499
+ rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
1480
1500
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
1481
1501
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
1482
1502
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
@@ -1494,11 +1514,45 @@ extern "C" void Init_llama_cpp(void) {
1494
1514
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
1495
1515
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
1496
1516
 
1497
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1498
1517
  std::stringstream ss_magic;
1518
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
1519
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
1520
+
1521
+ ss_magic.str("");
1522
+ ss_magic.clear(std::stringstream::goodbit);
1523
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
1524
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
1525
+
1526
+ ss_magic.str("");
1527
+ ss_magic.clear(std::stringstream::goodbit);
1528
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
1529
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
1530
+
1531
+ ss_magic.str("");
1532
+ ss_magic.clear(std::stringstream::goodbit);
1533
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
1534
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
1535
+
1536
+ ss_magic.str("");
1537
+ ss_magic.clear(std::stringstream::goodbit);
1538
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
1539
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
1540
+
1541
+ ss_magic.str("");
1542
+ ss_magic.clear(std::stringstream::goodbit);
1499
1543
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
1500
1544
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1501
- std::stringstream ss_magic_unversioned;
1502
- ss_magic_unversioned << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1503
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic_unversioned.str().c_str()));
1545
+
1546
+ ss_magic.str("");
1547
+ ss_magic.clear(std::stringstream::goodbit);
1548
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1549
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
1550
+
1551
+ ss_magic.str("");
1552
+ ss_magic.clear(std::stringstream::goodbit);
1553
+ ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
1554
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1555
+
1556
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1557
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
1504
1558
  }
@@ -6,6 +6,7 @@ extern "C" {
6
6
 
7
7
  void ggml_init_cublas(void);
8
8
 
9
+ void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
9
10
  bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
10
11
  size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
11
12
  void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
@@ -15,6 +16,7 @@ void * ggml_cuda_host_malloc(size_t size);
15
16
  void ggml_cuda_host_free(void * ptr);
16
17
 
17
18
  void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
19
+ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
18
20
 
19
21
  #ifdef __cplusplus
20
22
  }