llama_cpp 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +18 -2
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +22 -8
- data/ext/llama_cpp/src/ggml-alloc.c +541 -0
- data/ext/llama_cpp/src/ggml-alloc.h +22 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +2090 -438
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +17 -16
- data/ext/llama_cpp/src/ggml-metal.metal +4 -1
- data/ext/llama_cpp/src/ggml.c +49 -26
- data/ext/llama_cpp/src/ggml.h +12 -1
- data/ext/llama_cpp/src/k_quants.c +32 -30
- data/ext/llama_cpp/src/llama.cpp +199 -68
- data/ext/llama_cpp/src/llama.h +1 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 545786d4c9308ffe0f7e214a12427beaea0b26bec915ff84b16eed25ef1932a4
|
4
|
+
data.tar.gz: aaa0d4fc1710b13a26163306c8b51e423233c2f7e4b3d6127f94c9b6c4846f9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 12b3ac122fd7ea59b51e2d6ff905ed1a71cf8a8b3650a269d4a3793ae32a0149f6836a792c8f216d0fdb0c39aeb3b47914e73ffc74b574bbe686660e6be84ea1
|
7
|
+
data.tar.gz: 5056b95552f3434692a6c19653810d77bb28ddf9b28abd78712ccfb4ee4f7d836a5d54e283513fcfc617cc79ffa7bb9257d4ac2b6d96ec89158bf94acd4cec86
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## [[0.3.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.5...v0.3.6)] - 2023-08-04
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from master-1a94186 to master-468ea24.
|
4
|
+
- Add `mul_mat_q` option to ContextParams.
|
5
|
+
|
1
6
|
## [[0.3.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.4...v0.3.5)] - 2023-07-29
|
2
7
|
|
3
8
|
- Bump bundled llama.cpp from master-d924522 to master-1a94186.
|
data/README.md
CHANGED
@@ -12,11 +12,27 @@ This gem is still under development and may undergo many changes in the future.
|
|
12
12
|
|
13
13
|
Install the gem and add to the application's Gemfile by executing:
|
14
14
|
|
15
|
-
|
15
|
+
```sh
|
16
|
+
$ bundle add llama_cpp
|
17
|
+
```
|
16
18
|
|
17
19
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
18
20
|
|
19
|
-
|
21
|
+
```sh
|
22
|
+
$ gem install llama_cpp
|
23
|
+
```
|
24
|
+
|
25
|
+
There are several installation options for improving execution performance:
|
26
|
+
|
27
|
+
```sh
|
28
|
+
# use OpenBLAS
|
29
|
+
$ gem install llama_cpp -- --with-openblas
|
30
|
+
|
31
|
+
# use Metal on macOS
|
32
|
+
$ gem install llama_cpp -- --with-metal
|
33
|
+
```
|
34
|
+
|
35
|
+
Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
|
20
36
|
|
21
37
|
## Usage
|
22
38
|
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -5,7 +5,7 @@ require 'fileutils'
|
|
5
5
|
|
6
6
|
abort 'libstdc++ is not found.' unless have_library('stdc++')
|
7
7
|
|
8
|
-
$srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
|
8
|
+
$srcs = %w[ggml.c ggml-alloc.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
10
|
$srcs << 'ggml-mpi.c' if with_config('mpi')
|
11
11
|
$CFLAGS << ' -w -DNDEBUG'
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -414,6 +414,8 @@ public:
|
|
414
414
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
|
415
415
|
rb_define_method(rb_cLLaMAContextParams, "low_vram=", RUBY_METHOD_FUNC(_llama_context_params_set_low_vram), 1);
|
416
416
|
rb_define_method(rb_cLLaMAContextParams, "low_vram", RUBY_METHOD_FUNC(_llama_context_params_get_low_vram), 0);
|
417
|
+
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
418
|
+
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
417
419
|
rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
|
418
420
|
rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
|
419
421
|
rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
|
@@ -527,7 +529,7 @@ private:
|
|
527
529
|
// low_vram
|
528
530
|
static VALUE _llama_context_params_set_low_vram(VALUE self, VALUE low_vram) {
|
529
531
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
530
|
-
ptr->params.low_vram = low_vram
|
532
|
+
ptr->params.low_vram = RTEST(low_vram) ? true : false;
|
531
533
|
return ptr->params.low_vram ? Qtrue : Qfalse;
|
532
534
|
}
|
533
535
|
|
@@ -536,6 +538,18 @@ private:
|
|
536
538
|
return ptr->params.low_vram ? Qtrue : Qfalse;
|
537
539
|
}
|
538
540
|
|
541
|
+
// mul_mat_q
|
542
|
+
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
543
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
544
|
+
ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
|
545
|
+
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
546
|
+
}
|
547
|
+
|
548
|
+
static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
|
549
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
550
|
+
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
551
|
+
}
|
552
|
+
|
539
553
|
// seed
|
540
554
|
static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
|
541
555
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -555,7 +569,7 @@ private:
|
|
555
569
|
// f16_kv
|
556
570
|
static VALUE _llama_context_params_set_f16_kv(VALUE self, VALUE f16_kv) {
|
557
571
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
558
|
-
ptr->params.f16_kv = f16_kv
|
572
|
+
ptr->params.f16_kv = RTEST(f16_kv) ? true : false;
|
559
573
|
return ptr->params.f16_kv ? Qtrue : Qfalse;
|
560
574
|
}
|
561
575
|
|
@@ -567,7 +581,7 @@ private:
|
|
567
581
|
// logits_all
|
568
582
|
static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
|
569
583
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
570
|
-
ptr->params.logits_all = logits_all
|
584
|
+
ptr->params.logits_all = RTEST(logits_all) ? true : false;
|
571
585
|
return ptr->params.logits_all ? Qtrue : Qfalse;
|
572
586
|
}
|
573
587
|
|
@@ -579,7 +593,7 @@ private:
|
|
579
593
|
// vocab_only
|
580
594
|
static VALUE _llama_context_params_set_vocab_only(VALUE self, VALUE vocab_only) {
|
581
595
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
582
|
-
ptr->params.vocab_only = vocab_only
|
596
|
+
ptr->params.vocab_only = RTEST(vocab_only) ? true : false;
|
583
597
|
return ptr->params.vocab_only ? Qtrue : Qfalse;
|
584
598
|
}
|
585
599
|
|
@@ -591,7 +605,7 @@ private:
|
|
591
605
|
// use_mmap
|
592
606
|
static VALUE _llama_context_params_set_use_mmap(VALUE self, VALUE use_mmap) {
|
593
607
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
594
|
-
ptr->params.use_mmap = use_mmap
|
608
|
+
ptr->params.use_mmap = RTEST(use_mmap) ? true : false;
|
595
609
|
return ptr->params.use_mmap ? Qtrue : Qfalse;
|
596
610
|
}
|
597
611
|
|
@@ -603,7 +617,7 @@ private:
|
|
603
617
|
// use_mlock
|
604
618
|
static VALUE _llama_context_params_set_use_mlock(VALUE self, VALUE use_mlock) {
|
605
619
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
606
|
-
ptr->params.use_mlock = use_mlock
|
620
|
+
ptr->params.use_mlock = RTEST(use_mlock) ? true : false;
|
607
621
|
return ptr->params.use_mlock ? Qtrue : Qfalse;
|
608
622
|
}
|
609
623
|
|
@@ -615,7 +629,7 @@ private:
|
|
615
629
|
// embedding
|
616
630
|
static VALUE _llama_context_params_set_embedding(VALUE self, VALUE embedding) {
|
617
631
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
618
|
-
ptr->params.embedding = embedding
|
632
|
+
ptr->params.embedding = RTEST(embedding) ? true : false;
|
619
633
|
return ptr->params.embedding ? Qtrue : Qfalse;
|
620
634
|
}
|
621
635
|
|
@@ -2408,7 +2422,7 @@ static VALUE rb_llama_llama_backend_init(int argc, VALUE* argv, VALUE self) {
|
|
2408
2422
|
rb_scan_args(argc, argv, ":", &kw_args);
|
2409
2423
|
rb_get_kwargs(kw_args, kw_table, 0, 1, kw_values);
|
2410
2424
|
|
2411
|
-
const bool numa = kw_values[0] == Qundef ? false : (RTEST ? true : false);
|
2425
|
+
const bool numa = kw_values[0] == Qundef ? false : (RTEST(kw_values[0]) ? true : false);
|
2412
2426
|
llama_backend_init(numa);
|
2413
2427
|
|
2414
2428
|
return Qnil;
|