llama_cpp 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 991df3df6b16ec98a203a6c6565794988eec04697ccb963faab976f436e1bfcc
4
- data.tar.gz: dafd3b8274640eb79353e11056f497a02392fef332a46dcc1717878c836f62bd
3
+ metadata.gz: 545786d4c9308ffe0f7e214a12427beaea0b26bec915ff84b16eed25ef1932a4
4
+ data.tar.gz: aaa0d4fc1710b13a26163306c8b51e423233c2f7e4b3d6127f94c9b6c4846f9c
5
5
  SHA512:
6
- metadata.gz: 0a54fdf18c5be5273f01d4d991b59975a8e8b6a0a8f54087fb90df3f1a8a7ebad557d01fb119f3d61cafa8f6c59fb81641624779fda27b732eea2868cb4642e8
7
- data.tar.gz: 6574064078070502e36ad933bd5efb2f479c94f47a1260286fe485e48d35a7b3985274943c6163189326891349b47b1d9815d77c600b015fe111cc3842179392
6
+ metadata.gz: 12b3ac122fd7ea59b51e2d6ff905ed1a71cf8a8b3650a269d4a3793ae32a0149f6836a792c8f216d0fdb0c39aeb3b47914e73ffc74b574bbe686660e6be84ea1
7
+ data.tar.gz: 5056b95552f3434692a6c19653810d77bb28ddf9b28abd78712ccfb4ee4f7d836a5d54e283513fcfc617cc79ffa7bb9257d4ac2b6d96ec89158bf94acd4cec86
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## [[0.3.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.5...v0.3.6)] - 2023-08-04
2
+
3
+ - Bump bundled llama.cpp from master-1a94186 to master-468ea24.
4
+ - Add `mul_mat_q` option to ContextParams.
5
+
1
6
  ## [[0.3.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.4...v0.3.5)] - 2023-07-29
2
7
 
3
8
  - Bump bundled llama.cpp from master-d924522 to master-1a94186.
data/README.md CHANGED
@@ -12,11 +12,27 @@ This gem is still under development and may undergo many changes in the future.
12
12
 
13
13
  Install the gem and add to the application's Gemfile by executing:
14
14
 
15
- $ bundle add llama_cpp
15
+ ```sh
16
+ $ bundle add llama_cpp
17
+ ```
16
18
 
17
19
  If bundler is not being used to manage dependencies, install the gem by executing:
18
20
 
19
- $ gem install llama_cpp
21
+ ```sh
22
+ $ gem install llama_cpp
23
+ ```
24
+
25
+ There are several installation options for improving execution performance:
26
+
27
+ ```sh
28
+ # use OpenBLAS
29
+ $ gem install llama_cpp -- --with-openblas
30
+
31
+ # use Metal on macOS
32
+ $ gem install llama_cpp -- --with-metal
33
+ ```
34
+
35
+ Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
20
36
 
21
37
  ## Usage
22
38
 
@@ -5,7 +5,7 @@ require 'fileutils'
5
5
 
6
6
  abort 'libstdc++ is not found.' unless have_library('stdc++')
7
7
 
8
- $srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
8
+ $srcs = %w[ggml.c ggml-alloc.c llama.cpp llama_cpp.cpp]
9
9
  $srcs << 'ggml-opencl.cpp' if with_config('clblast')
10
10
  $srcs << 'ggml-mpi.c' if with_config('mpi')
11
11
  $CFLAGS << ' -w -DNDEBUG'
@@ -414,6 +414,8 @@ public:
414
414
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
415
415
  rb_define_method(rb_cLLaMAContextParams, "low_vram=", RUBY_METHOD_FUNC(_llama_context_params_set_low_vram), 1);
416
416
  rb_define_method(rb_cLLaMAContextParams, "low_vram", RUBY_METHOD_FUNC(_llama_context_params_get_low_vram), 0);
417
+ rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
418
+ rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
417
419
  rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
418
420
  rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
419
421
  rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
@@ -527,7 +529,7 @@ private:
527
529
  // low_vram
528
530
  static VALUE _llama_context_params_set_low_vram(VALUE self, VALUE low_vram) {
529
531
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
530
- ptr->params.low_vram = low_vram == Qtrue ? true : false;
532
+ ptr->params.low_vram = RTEST(low_vram) ? true : false;
531
533
  return ptr->params.low_vram ? Qtrue : Qfalse;
532
534
  }
533
535
 
@@ -536,6 +538,18 @@ private:
536
538
  return ptr->params.low_vram ? Qtrue : Qfalse;
537
539
  }
538
540
 
541
+ // mul_mat_q
542
+ static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
543
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
544
+ ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
545
+ return ptr->params.mul_mat_q ? Qtrue : Qfalse;
546
+ }
547
+
548
+ static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
549
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
550
+ return ptr->params.mul_mat_q ? Qtrue : Qfalse;
551
+ }
552
+
539
553
  // seed
540
554
  static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
541
555
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -555,7 +569,7 @@ private:
555
569
  // f16_kv
556
570
  static VALUE _llama_context_params_set_f16_kv(VALUE self, VALUE f16_kv) {
557
571
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
558
- ptr->params.f16_kv = f16_kv == Qtrue ? true : false;
572
+ ptr->params.f16_kv = RTEST(f16_kv) ? true : false;
559
573
  return ptr->params.f16_kv ? Qtrue : Qfalse;
560
574
  }
561
575
 
@@ -567,7 +581,7 @@ private:
567
581
  // logits_all
568
582
  static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
569
583
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
570
- ptr->params.logits_all = logits_all == Qtrue ? true : false;
584
+ ptr->params.logits_all = RTEST(logits_all) ? true : false;
571
585
  return ptr->params.logits_all ? Qtrue : Qfalse;
572
586
  }
573
587
 
@@ -579,7 +593,7 @@ private:
579
593
  // vocab_only
580
594
  static VALUE _llama_context_params_set_vocab_only(VALUE self, VALUE vocab_only) {
581
595
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
582
- ptr->params.vocab_only = vocab_only == Qtrue ? true : false;
596
+ ptr->params.vocab_only = RTEST(vocab_only) ? true : false;
583
597
  return ptr->params.vocab_only ? Qtrue : Qfalse;
584
598
  }
585
599
 
@@ -591,7 +605,7 @@ private:
591
605
  // use_mmap
592
606
  static VALUE _llama_context_params_set_use_mmap(VALUE self, VALUE use_mmap) {
593
607
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
594
- ptr->params.use_mmap = use_mmap == Qtrue ? true : false;
608
+ ptr->params.use_mmap = RTEST(use_mmap) ? true : false;
595
609
  return ptr->params.use_mmap ? Qtrue : Qfalse;
596
610
  }
597
611
 
@@ -603,7 +617,7 @@ private:
603
617
  // use_mlock
604
618
  static VALUE _llama_context_params_set_use_mlock(VALUE self, VALUE use_mlock) {
605
619
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
606
- ptr->params.use_mlock = use_mlock == Qtrue ? true : false;
620
+ ptr->params.use_mlock = RTEST(use_mlock) ? true : false;
607
621
  return ptr->params.use_mlock ? Qtrue : Qfalse;
608
622
  }
609
623
 
@@ -615,7 +629,7 @@ private:
615
629
  // embedding
616
630
  static VALUE _llama_context_params_set_embedding(VALUE self, VALUE embedding) {
617
631
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
618
- ptr->params.embedding = embedding == Qtrue ? true : false;
632
+ ptr->params.embedding = RTEST(embedding) ? true : false;
619
633
  return ptr->params.embedding ? Qtrue : Qfalse;
620
634
  }
621
635
 
@@ -2408,7 +2422,7 @@ static VALUE rb_llama_llama_backend_init(int argc, VALUE* argv, VALUE self) {
2408
2422
  rb_scan_args(argc, argv, ":", &kw_args);
2409
2423
  rb_get_kwargs(kw_args, kw_table, 0, 1, kw_values);
2410
2424
 
2411
- const bool numa = kw_values[0] == Qundef ? false : (RTEST ? true : false);
2425
+ const bool numa = kw_values[0] == Qundef ? false : (RTEST(kw_values[0]) ? true : false);
2412
2426
  llama_backend_init(numa);
2413
2427
 
2414
2428
  return Qnil;