llama_cpp 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33b146badd1bebdf9588e48c0adac1f9924a0653aa5ec806fdf5dd288ef665d8
4
- data.tar.gz: 134606db2b9fb10b51fc82f410d6653a6481b828d9fd05390b1570d6e198526a
3
+ metadata.gz: 1fe968c9231c20e614fafe89bc521c313ab68401fedd2d803743b18ccc234a28
4
+ data.tar.gz: a4916ec0f52b3e131175141f30bd3a70f37859207e732948d2fe7baac98a4b0c
5
5
  SHA512:
6
- metadata.gz: 462d9e00121408c7af3934b0a663b29f99d5ad28f60a3471155509463bf26a14792c484d1fdc6054460941ae011d39b510774e225ad4ec03d60ce20a1dfef667
7
- data.tar.gz: 4bf447ac55bba2b62d204dc975528de6664fe53af89df8ba4aa4172d4dbff709ac5b14a944326be5c71d64baa2cde00b60f7ba5e916e1fb68123c595f74ce24f
6
+ metadata.gz: fa99138a7a591a7e602e6aa040ccec057dcad09e52c6646edd0def9c0e3ea1aee6796bc32fa05dc9c384af1b8c72a3f5c2077de918d2e0a229901c97732023c1
7
+ data.tar.gz: 1e4399f4b75fcbe69da61ce23d2cf45594e5502e7d6ea6f9b7f0930ca155bcfb4481f81944496031e79c8ef0e48be20a6797d8f9b41967404e2a54330a93c261
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.1.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.1...v0.1.2)] - 2023-05-22
4
+
5
+ **Breaking Changes**
6
+
7
+ - Bump bundled llama.cpp from master-6986c78 to master-265db98
8
+ - bump LLAMA_FILE_VERSION to 3
9
+
3
10
  ## [[0.1.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.1.0...v0.1.1)] - 2023-05-21
4
11
 
5
12
  - Add load_session_file method to Context
@@ -24,6 +24,13 @@ if with_config('openblas')
24
24
  $CFLAGS << ' -DGGML_USE_OPENBLAS'
25
25
  end
26
26
 
27
+ if with_config('blis')
28
+ abort 'libblis is not found.' unless have_library('blis')
29
+ abort 'cblas.h is not found.' unless have_header('cblas.h')
30
+
31
+ $CFLAGS << ' -DGGML_USE_OPENBLAS'
32
+ end
33
+
27
34
  if with_config('accelerate')
28
35
  abort 'Accelerate framework is not found.' unless have_framework('Accelerate')
29
36
 
@@ -524,7 +524,14 @@ private:
524
524
  VALUE filename = kw_values[0];
525
525
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
526
526
  LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
527
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
527
+
528
+ try {
529
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
530
+ } catch (const std::runtime_error& e) {
531
+ rb_raise(rb_eRuntimeError, "%s", e.what());
532
+ return Qnil;
533
+ }
534
+
528
535
  if (ctx_ptr->ctx == NULL) {
529
536
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
530
537
  return Qnil;
@@ -788,7 +795,14 @@ private:
788
795
 
789
796
  VALUE filename = kw_values[0];
790
797
  LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
791
- ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
798
+
799
+ try {
800
+ ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
801
+ } catch (const std::runtime_error& e) {
802
+ rb_raise(rb_eRuntimeError, "%s", e.what());
803
+ return Qnil;
804
+ }
805
+
792
806
  if (ctx_ptr->ctx == NULL) {
793
807
  rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
794
808
  return Qnil;
@@ -1407,6 +1421,11 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = {
1407
1421
 
1408
1422
  // module functions
1409
1423
 
1424
+ static VALUE rb_llama_llama_init_backend(VALUE self) {
1425
+ llama_init_backend();
1426
+ return Qnil;
1427
+ }
1428
+
1410
1429
  static VALUE rb_llama_model_quantize(int argc, VALUE* argv, VALUE self) {
1411
1430
  VALUE kw_args = Qnil;
1412
1431
  ID kw_table[4] = { rb_intern("input_path"), rb_intern("output_path"), rb_intern("ftype"), rb_intern("n_threads") };
@@ -1477,6 +1496,7 @@ extern "C" void Init_llama_cpp(void) {
1477
1496
  RbLLaMAContext::define_class(rb_mLLaMACpp);
1478
1497
  RbLLaMAContextParams::define_class(rb_mLLaMACpp);
1479
1498
 
1499
+ rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, 0);
1480
1500
  rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
1481
1501
  rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
1482
1502
  rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
@@ -1494,11 +1514,45 @@ extern "C" void Init_llama_cpp(void) {
1494
1514
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_0", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_0));
1495
1515
  rb_define_const(rb_mLLaMACpp, "LLAMA_FTYPE_MOSTLY_Q5_1", INT2NUM(LLAMA_FTYPE_MOSTLY_Q5_1));
1496
1516
 
1497
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1498
1517
  std::stringstream ss_magic;
1518
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGJT;
1519
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGJT", rb_str_new2(ss_magic.str().c_str()));
1520
+
1521
+ ss_magic.str("");
1522
+ ss_magic.clear(std::stringstream::goodbit);
1523
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGLA;
1524
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(ss_magic.str().c_str()));
1525
+
1526
+ ss_magic.str("");
1527
+ ss_magic.clear(std::stringstream::goodbit);
1528
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGMF;
1529
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGMF", rb_str_new2(ss_magic.str().c_str()));
1530
+
1531
+ ss_magic.str("");
1532
+ ss_magic.clear(std::stringstream::goodbit);
1533
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGML;
1534
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGML", rb_str_new2(ss_magic.str().c_str()));
1535
+
1536
+ ss_magic.str("");
1537
+ ss_magic.clear(std::stringstream::goodbit);
1538
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_GGSN;
1539
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_GGSN", rb_str_new2(ss_magic.str().c_str()));
1540
+
1541
+ ss_magic.str("");
1542
+ ss_magic.clear(std::stringstream::goodbit);
1499
1543
  ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC;
1500
1544
  rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1501
- std::stringstream ss_magic_unversioned;
1502
- ss_magic_unversioned << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1503
- rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic_unversioned.str().c_str()));
1545
+
1546
+ ss_magic.str("");
1547
+ ss_magic.clear(std::stringstream::goodbit);
1548
+ ss_magic << std::showbase << std::hex << LLAMA_FILE_MAGIC_UNVERSIONED;
1549
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_MAGIC_UNVERSIONED", rb_str_new2(ss_magic.str().c_str()));
1550
+
1551
+ ss_magic.str("");
1552
+ ss_magic.clear(std::stringstream::goodbit);
1553
+ ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
1554
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
1555
+
1556
+ rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
1557
+ rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
1504
1558
  }
@@ -6,6 +6,7 @@ extern "C" {
6
6
 
7
7
  void ggml_init_cublas(void);
8
8
 
9
+ void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
9
10
  bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
10
11
  size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
11
12
  void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
@@ -15,6 +16,7 @@ void * ggml_cuda_host_malloc(size_t size);
15
16
  void ggml_cuda_host_free(void * ptr);
16
17
 
17
18
  void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
19
+ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
18
20
 
19
21
  #ifdef __cplusplus
20
22
  }