llama_cpp 0.3.5 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +18 -2
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +22 -8
- data/ext/llama_cpp/src/ggml-alloc.c +549 -0
- data/ext/llama_cpp/src/ggml-alloc.h +22 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +2526 -430
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +56 -34
- data/ext/llama_cpp/src/ggml-metal.metal +4 -1
- data/ext/llama_cpp/src/ggml.c +445 -176
- data/ext/llama_cpp/src/ggml.h +125 -33
- data/ext/llama_cpp/src/k_quants.c +32 -30
- data/ext/llama_cpp/src/llama-util.h +41 -1
- data/ext/llama_cpp/src/llama.cpp +409 -210
- data/ext/llama_cpp/src/llama.h +19 -1
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 349bc515c7f9f4f85ab75e092b568e042559a782e6943bc8906e66791b3ed2ce
|
4
|
+
data.tar.gz: ed4e310e20af8b2ebc54fa3bf9b4cc0321262577d31d9a955eba36aa4a8fd71e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee350ecf8bcb7fb9fb40e4be4a66c321c9248c0b9bc90a5988e4d08a98b012e26a5f0c814d96e871a7db4abda07839b782aed214f23b48ed7dbbfcfe6f245d69
|
7
|
+
data.tar.gz: 7a36940dd803468ae889c31771ed4f1ff72a450eb06f44b1118c4ae334cad6643c7335f45c974e8f269435c5265efdd347e17d1c71c78b1cf6c5f57734d4e9fb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## [[0.3.7](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.6...v0.3.7)] - 2023-08-12
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from master-468ea24 to master-9ca4abe .
|
4
|
+
|
5
|
+
## [[0.3.6](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.5...v0.3.6)] - 2023-08-04
|
6
|
+
|
7
|
+
- Bump bundled llama.cpp from master-1a94186 to master-468ea24.
|
8
|
+
- Add `mul_mat_q` option to ContextParams.
|
9
|
+
|
1
10
|
## [[0.3.5](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.4...v0.3.5)] - 2023-07-29
|
2
11
|
|
3
12
|
- Bump bundled llama.cpp from master-d924522 to master-1a94186.
|
data/README.md
CHANGED
@@ -12,11 +12,27 @@ This gem is still under development and may undergo many changes in the future.
|
|
12
12
|
|
13
13
|
Install the gem and add to the application's Gemfile by executing:
|
14
14
|
|
15
|
-
|
15
|
+
```sh
|
16
|
+
$ bundle add llama_cpp
|
17
|
+
```
|
16
18
|
|
17
19
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
18
20
|
|
19
|
-
|
21
|
+
```sh
|
22
|
+
$ gem install llama_cpp
|
23
|
+
```
|
24
|
+
|
25
|
+
There are several installation options for improving execution performance:
|
26
|
+
|
27
|
+
```sh
|
28
|
+
# use OpenBLAS
|
29
|
+
$ gem install llama_cpp -- --with-openblas
|
30
|
+
|
31
|
+
# use Metal on macOS
|
32
|
+
$ gem install llama_cpp -- --with-metal
|
33
|
+
```
|
34
|
+
|
35
|
+
Those options are defined in [extconf.rb](https://github.com/yoshoku/llama_cpp.rb/blob/main/ext/llama_cpp/extconf.rb) by with_config method.
|
20
36
|
|
21
37
|
## Usage
|
22
38
|
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -5,7 +5,7 @@ require 'fileutils'
|
|
5
5
|
|
6
6
|
abort 'libstdc++ is not found.' unless have_library('stdc++')
|
7
7
|
|
8
|
-
$srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
|
8
|
+
$srcs = %w[ggml.c ggml-alloc.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
10
|
$srcs << 'ggml-mpi.c' if with_config('mpi')
|
11
11
|
$CFLAGS << ' -w -DNDEBUG'
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -414,6 +414,8 @@ public:
|
|
414
414
|
rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
|
415
415
|
rb_define_method(rb_cLLaMAContextParams, "low_vram=", RUBY_METHOD_FUNC(_llama_context_params_set_low_vram), 1);
|
416
416
|
rb_define_method(rb_cLLaMAContextParams, "low_vram", RUBY_METHOD_FUNC(_llama_context_params_get_low_vram), 0);
|
417
|
+
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q=", RUBY_METHOD_FUNC(_llama_context_params_set_mul_mat_q), 1);
|
418
|
+
rb_define_method(rb_cLLaMAContextParams, "mul_mat_q", RUBY_METHOD_FUNC(_llama_context_params_get_mul_mat_q), 0);
|
417
419
|
rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
|
418
420
|
rb_define_method(rb_cLLaMAContextParams, "seed", RUBY_METHOD_FUNC(_llama_context_params_get_seed), 0);
|
419
421
|
rb_define_method(rb_cLLaMAContextParams, "f16_kv=", RUBY_METHOD_FUNC(_llama_context_params_set_f16_kv), 1);
|
@@ -527,7 +529,7 @@ private:
|
|
527
529
|
// low_vram
|
528
530
|
static VALUE _llama_context_params_set_low_vram(VALUE self, VALUE low_vram) {
|
529
531
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
530
|
-
ptr->params.low_vram = low_vram
|
532
|
+
ptr->params.low_vram = RTEST(low_vram) ? true : false;
|
531
533
|
return ptr->params.low_vram ? Qtrue : Qfalse;
|
532
534
|
}
|
533
535
|
|
@@ -536,6 +538,18 @@ private:
|
|
536
538
|
return ptr->params.low_vram ? Qtrue : Qfalse;
|
537
539
|
}
|
538
540
|
|
541
|
+
// mul_mat_q
|
542
|
+
static VALUE _llama_context_params_set_mul_mat_q(VALUE self, VALUE mul_mat_q) {
|
543
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
544
|
+
ptr->params.mul_mat_q = RTEST(mul_mat_q) ? true : false;
|
545
|
+
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
546
|
+
}
|
547
|
+
|
548
|
+
static VALUE _llama_context_params_get_mul_mat_q(VALUE self) {
|
549
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
550
|
+
return ptr->params.mul_mat_q ? Qtrue : Qfalse;
|
551
|
+
}
|
552
|
+
|
539
553
|
// seed
|
540
554
|
static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
|
541
555
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -555,7 +569,7 @@ private:
|
|
555
569
|
// f16_kv
|
556
570
|
static VALUE _llama_context_params_set_f16_kv(VALUE self, VALUE f16_kv) {
|
557
571
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
558
|
-
ptr->params.f16_kv = f16_kv
|
572
|
+
ptr->params.f16_kv = RTEST(f16_kv) ? true : false;
|
559
573
|
return ptr->params.f16_kv ? Qtrue : Qfalse;
|
560
574
|
}
|
561
575
|
|
@@ -567,7 +581,7 @@ private:
|
|
567
581
|
// logits_all
|
568
582
|
static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
|
569
583
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
570
|
-
ptr->params.logits_all = logits_all
|
584
|
+
ptr->params.logits_all = RTEST(logits_all) ? true : false;
|
571
585
|
return ptr->params.logits_all ? Qtrue : Qfalse;
|
572
586
|
}
|
573
587
|
|
@@ -579,7 +593,7 @@ private:
|
|
579
593
|
// vocab_only
|
580
594
|
static VALUE _llama_context_params_set_vocab_only(VALUE self, VALUE vocab_only) {
|
581
595
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
582
|
-
ptr->params.vocab_only = vocab_only
|
596
|
+
ptr->params.vocab_only = RTEST(vocab_only) ? true : false;
|
583
597
|
return ptr->params.vocab_only ? Qtrue : Qfalse;
|
584
598
|
}
|
585
599
|
|
@@ -591,7 +605,7 @@ private:
|
|
591
605
|
// use_mmap
|
592
606
|
static VALUE _llama_context_params_set_use_mmap(VALUE self, VALUE use_mmap) {
|
593
607
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
594
|
-
ptr->params.use_mmap = use_mmap
|
608
|
+
ptr->params.use_mmap = RTEST(use_mmap) ? true : false;
|
595
609
|
return ptr->params.use_mmap ? Qtrue : Qfalse;
|
596
610
|
}
|
597
611
|
|
@@ -603,7 +617,7 @@ private:
|
|
603
617
|
// use_mlock
|
604
618
|
static VALUE _llama_context_params_set_use_mlock(VALUE self, VALUE use_mlock) {
|
605
619
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
606
|
-
ptr->params.use_mlock = use_mlock
|
620
|
+
ptr->params.use_mlock = RTEST(use_mlock) ? true : false;
|
607
621
|
return ptr->params.use_mlock ? Qtrue : Qfalse;
|
608
622
|
}
|
609
623
|
|
@@ -615,7 +629,7 @@ private:
|
|
615
629
|
// embedding
|
616
630
|
static VALUE _llama_context_params_set_embedding(VALUE self, VALUE embedding) {
|
617
631
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
618
|
-
ptr->params.embedding = embedding
|
632
|
+
ptr->params.embedding = RTEST(embedding) ? true : false;
|
619
633
|
return ptr->params.embedding ? Qtrue : Qfalse;
|
620
634
|
}
|
621
635
|
|
@@ -2408,7 +2422,7 @@ static VALUE rb_llama_llama_backend_init(int argc, VALUE* argv, VALUE self) {
|
|
2408
2422
|
rb_scan_args(argc, argv, ":", &kw_args);
|
2409
2423
|
rb_get_kwargs(kw_args, kw_table, 0, 1, kw_values);
|
2410
2424
|
|
2411
|
-
const bool numa = kw_values[0] == Qundef ? false : (RTEST ? true : false);
|
2425
|
+
const bool numa = kw_values[0] == Qundef ? false : (RTEST(kw_values[0]) ? true : false);
|
2412
2426
|
llama_backend_init(numa);
|
2413
2427
|
|
2414
2428
|
return Qnil;
|