llama_cpp 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/README.md +9 -0
- data/examples/chat.rb +1 -1
- data/examples/embedding.rb +1 -1
- data/examples/prompt_jp.txt +8 -0
- data/ext/llama_cpp/extconf.rb +11 -2
- data/ext/llama_cpp/llama_cpp.cpp +284 -111
- data/ext/llama_cpp/src/ggml-cuda.cu +639 -148
- data/ext/llama_cpp/src/ggml-cuda.h +0 -4
- data/ext/llama_cpp/src/ggml-metal.h +5 -1
- data/ext/llama_cpp/src/ggml-metal.m +19 -6
- data/ext/llama_cpp/src/ggml-metal.metal +56 -47
- data/ext/llama_cpp/src/ggml-mpi.c +216 -0
- data/ext/llama_cpp/src/ggml-mpi.h +39 -0
- data/ext/llama_cpp/src/ggml-opencl.cpp +11 -7
- data/ext/llama_cpp/src/ggml.c +1734 -2248
- data/ext/llama_cpp/src/ggml.h +152 -80
- data/ext/llama_cpp/src/llama.cpp +282 -90
- data/ext/llama_cpp/src/llama.h +30 -1
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +16 -13
- data/sig/llama_cpp.rbs +22 -2
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf337091019bb773e47cf206ff2ff30ed0bef963094494e6493455cad7c59840
|
4
|
+
data.tar.gz: fdbae8e08a6b87d49c5658d5c1857f20bf8efdf5a5371906630dccf4eb0f1159
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0fee68294960c5ab9f56ebfe7256a00f9330e55f4954f2b016e07cbc023570298fa8f8b578f3e187fe9183b869769085311931122f93a033c6c21158b4e9485
|
7
|
+
data.tar.gz: 7eec8c98ae9ec1a56fa4bdb4e83a2dc2bdea407fc037af8d1b8f09a30c0d1246333d410707f4d66f3f473bf73574757cf12e56a86a0cb47074501f63f65f0c02
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,44 @@
|
|
1
|
+
## [[0.3.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.2...v0.3.3)] - 2023-07-15
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from master-481f793 to master-32c5411.
|
4
|
+
- Add MPI config options:
|
5
|
+
```
|
6
|
+
$ gem install llama_cpp -- --with-mpi
|
7
|
+
```
|
8
|
+
- Add `backend_free` module function to `LLaMACpp`. This method should be called once at the end of the program when the MPI option is enabled.
|
9
|
+
- Add `sample_classifier_free_guidance` method to `Context`.
|
10
|
+
|
11
|
+
**Breaking Changes**
|
12
|
+
- Rename `init_backend` method to `backend_init`. This method is called internally at `require 'llama_cpp'`.
|
13
|
+
|
14
|
+
## [[0.3.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.1...v0.3.2)] - 2023-07-08
|
15
|
+
|
16
|
+
- Bump bundled llama.cpp from master-b8c8dda to master-481f793.
|
17
|
+
- Add `Timings` class and `timings` method to `Context`:
|
18
|
+
```ruby
|
19
|
+
require 'llama_cpp'
|
20
|
+
|
21
|
+
# ...
|
22
|
+
|
23
|
+
context = LLaMACpp::Context.new(model: model)
|
24
|
+
timings = context.timings
|
25
|
+
|
26
|
+
puts timings.class
|
27
|
+
# => LLaMACpp::Timings
|
28
|
+
puts timings.t_load_ms
|
29
|
+
# => 79.61
|
30
|
+
```
|
31
|
+
- Expose sampling options as the arguemnts of `generate` module function:
|
32
|
+
```ruby
|
33
|
+
require 'llama_cpp'
|
34
|
+
|
35
|
+
# ...
|
36
|
+
|
37
|
+
LLaMACpp.generate(context, 'Hello, world.', top_k: 30, top_p: 0.8, temperature: 0.9)
|
38
|
+
```
|
39
|
+
- Add `ModelQuantizaParams` class, this class was not published because the author forgot to write rb_define_class.
|
40
|
+
- Minor update to example scripts, configuration files, and documentations.
|
41
|
+
|
1
42
|
## [[0.3.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.0...v0.3.1)] - 2023-07-02
|
2
43
|
|
3
44
|
- Bump bundled llama.cpp from master-9d23589 to master-b8c8dda.
|
data/README.md
CHANGED
@@ -68,6 +68,15 @@ User:
|
|
68
68
|
|
69
69
|
![llama_cpp_chat_example](https://github.com/yoshoku/llama_cpp.rb/assets/5562409/374ae3d8-63a6-498f-ae6e-5552b464bdda)
|
70
70
|
|
71
|
+
Japanse chat is also possible using the [Vicuna model on Hugging Face](https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized).
|
72
|
+
|
73
|
+
```sh
|
74
|
+
$ wget https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_0.bin
|
75
|
+
$ ruby chat.rb --model ggml-vicuna-7b-1.1-q4_0.bin --file prompt_jp.txt
|
76
|
+
```
|
77
|
+
|
78
|
+
![llama_cpp rb-jpchat](https://github.com/yoshoku/llama_cpp.rb/assets/5562409/526ff18c-2bb2-4b06-8933-f72960024033)
|
79
|
+
|
71
80
|
## Contributing
|
72
81
|
|
73
82
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/llama_cpp.rb.
|
data/examples/chat.rb
CHANGED
@@ -33,7 +33,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
33
33
|
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
34
34
|
def main # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
35
35
|
params = LLaMACpp::ContextParams.new
|
36
|
-
params.seed = options[:seed]
|
36
|
+
params.seed = options[:seed] if options[:seed] != -1
|
37
37
|
params.n_gpu_layers = options[:n_gpu_layers]
|
38
38
|
model = LLaMACpp::Model.new(model_path: options[:model], params: params)
|
39
39
|
context = LLaMACpp::Context.new(model: model)
|
data/examples/embedding.rb
CHANGED
@@ -18,7 +18,7 @@ class Embedding < Thor # rubocop:disable Style/Documentation
|
|
18
18
|
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
19
19
|
def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
20
20
|
params = LLaMACpp::ContextParams.new
|
21
|
-
params.seed = options[:seed]
|
21
|
+
params.seed = options[:seed] if options[:seed] != -1
|
22
22
|
params.n_gpu_layers = options[:n_gpu_layers]
|
23
23
|
params.embedding = true
|
24
24
|
model = LLaMACpp::Model.new(model_path: options[:model], params: params)
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -7,8 +7,9 @@ abort 'libstdc++ is not found.' unless have_library('stdc++')
|
|
7
7
|
|
8
8
|
$srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
|
-
$
|
11
|
-
$
|
10
|
+
$srcs << 'ggml-mpi.c' if with_config('mpi')
|
11
|
+
$CFLAGS << ' -w -DNDEBUG'
|
12
|
+
$CXXFLAGS << ' -std=c++11 -DNDEBUG'
|
12
13
|
$INCFLAGS << ' -I$(srcdir)/src'
|
13
14
|
$VPATH << '$(srcdir)/src'
|
14
15
|
|
@@ -76,6 +77,14 @@ if with_config('clblast')
|
|
76
77
|
end
|
77
78
|
end
|
78
79
|
|
80
|
+
if with_config('mpi')
|
81
|
+
abort 'libmpi is not found.' unless have_library('mpi')
|
82
|
+
abort 'mpi.h is not found.' unless have_header('mpi.h')
|
83
|
+
|
84
|
+
$CFLAGS << ' -DGGML_USE_MPI -Wno-cast-qual'
|
85
|
+
$CXXFLAGS << ' -DGGML_USE_MPI -Wno-cast-qual'
|
86
|
+
end
|
87
|
+
|
79
88
|
UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
|
80
89
|
|
81
90
|
# rubocop:disable Layout/LineLength
|