llama_cpp 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/README.md +9 -0
- data/examples/chat.rb +1 -1
- data/examples/embedding.rb +1 -1
- data/examples/prompt_jp.txt +8 -0
- data/ext/llama_cpp/extconf.rb +11 -2
- data/ext/llama_cpp/llama_cpp.cpp +284 -111
- data/ext/llama_cpp/src/ggml-cuda.cu +639 -148
- data/ext/llama_cpp/src/ggml-cuda.h +0 -4
- data/ext/llama_cpp/src/ggml-metal.h +5 -1
- data/ext/llama_cpp/src/ggml-metal.m +19 -6
- data/ext/llama_cpp/src/ggml-metal.metal +56 -47
- data/ext/llama_cpp/src/ggml-mpi.c +216 -0
- data/ext/llama_cpp/src/ggml-mpi.h +39 -0
- data/ext/llama_cpp/src/ggml-opencl.cpp +11 -7
- data/ext/llama_cpp/src/ggml.c +1734 -2248
- data/ext/llama_cpp/src/ggml.h +152 -80
- data/ext/llama_cpp/src/llama.cpp +282 -90
- data/ext/llama_cpp/src/llama.h +30 -1
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +16 -13
- data/sig/llama_cpp.rbs +22 -2
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf337091019bb773e47cf206ff2ff30ed0bef963094494e6493455cad7c59840
|
4
|
+
data.tar.gz: fdbae8e08a6b87d49c5658d5c1857f20bf8efdf5a5371906630dccf4eb0f1159
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0fee68294960c5ab9f56ebfe7256a00f9330e55f4954f2b016e07cbc023570298fa8f8b578f3e187fe9183b869769085311931122f93a033c6c21158b4e9485
|
7
|
+
data.tar.gz: 7eec8c98ae9ec1a56fa4bdb4e83a2dc2bdea407fc037af8d1b8f09a30c0d1246333d410707f4d66f3f473bf73574757cf12e56a86a0cb47074501f63f65f0c02
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,44 @@
|
|
1
|
+
## [[0.3.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.2...v0.3.3)] - 2023-07-15
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from master-481f793 to master-32c5411.
|
4
|
+
- Add MPI config options:
|
5
|
+
```
|
6
|
+
$ gem install llama_cpp -- --with-mpi
|
7
|
+
```
|
8
|
+
- Add `backend_free` module function to `LLaMACpp`. This method should be called once at the end of the program when the MPI option is enabled.
|
9
|
+
- Add `sample_classifier_free_guidance` method to `Context`.
|
10
|
+
|
11
|
+
**Breaking Changes**
|
12
|
+
- Rename `init_backend` method to `backend_init`. This method is called internally at `require 'llama_cpp'`.
|
13
|
+
|
14
|
+
## [[0.3.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.1...v0.3.2)] - 2023-07-08
|
15
|
+
|
16
|
+
- Bump bundled llama.cpp from master-b8c8dda to master-481f793.
|
17
|
+
- Add `Timings` class and `timings` method to `Context`:
|
18
|
+
```ruby
|
19
|
+
require 'llama_cpp'
|
20
|
+
|
21
|
+
# ...
|
22
|
+
|
23
|
+
context = LLaMACpp::Context.new(model: model)
|
24
|
+
timings = context.timings
|
25
|
+
|
26
|
+
puts timings.class
|
27
|
+
# => LLaMACpp::Timings
|
28
|
+
puts timings.t_load_ms
|
29
|
+
# => 79.61
|
30
|
+
```
|
31
|
+
- Expose sampling options as the arguemnts of `generate` module function:
|
32
|
+
```ruby
|
33
|
+
require 'llama_cpp'
|
34
|
+
|
35
|
+
# ...
|
36
|
+
|
37
|
+
LLaMACpp.generate(context, 'Hello, world.', top_k: 30, top_p: 0.8, temperature: 0.9)
|
38
|
+
```
|
39
|
+
- Add `ModelQuantizaParams` class, this class was not published because the author forgot to write rb_define_class.
|
40
|
+
- Minor update to example scripts, configuration files, and documentations.
|
41
|
+
|
1
42
|
## [[0.3.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.0...v0.3.1)] - 2023-07-02
|
2
43
|
|
3
44
|
- Bump bundled llama.cpp from master-9d23589 to master-b8c8dda.
|
data/README.md
CHANGED
@@ -68,6 +68,15 @@ User:
|
|
68
68
|
|
69
69
|

|
70
70
|
|
71
|
+
Japanse chat is also possible using the [Vicuna model on Hugging Face](https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized).
|
72
|
+
|
73
|
+
```sh
|
74
|
+
$ wget https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_0.bin
|
75
|
+
$ ruby chat.rb --model ggml-vicuna-7b-1.1-q4_0.bin --file prompt_jp.txt
|
76
|
+
```
|
77
|
+
|
78
|
+

|
79
|
+
|
71
80
|
## Contributing
|
72
81
|
|
73
82
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/llama_cpp.rb.
|
data/examples/chat.rb
CHANGED
@@ -33,7 +33,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
33
33
|
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
34
34
|
def main # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
35
35
|
params = LLaMACpp::ContextParams.new
|
36
|
-
params.seed = options[:seed]
|
36
|
+
params.seed = options[:seed] if options[:seed] != -1
|
37
37
|
params.n_gpu_layers = options[:n_gpu_layers]
|
38
38
|
model = LLaMACpp::Model.new(model_path: options[:model], params: params)
|
39
39
|
context = LLaMACpp::Context.new(model: model)
|
data/examples/embedding.rb
CHANGED
@@ -18,7 +18,7 @@ class Embedding < Thor # rubocop:disable Style/Documentation
|
|
18
18
|
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
19
19
|
def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
20
20
|
params = LLaMACpp::ContextParams.new
|
21
|
-
params.seed = options[:seed]
|
21
|
+
params.seed = options[:seed] if options[:seed] != -1
|
22
22
|
params.n_gpu_layers = options[:n_gpu_layers]
|
23
23
|
params.embedding = true
|
24
24
|
model = LLaMACpp::Model.new(model_path: options[:model], params: params)
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -7,8 +7,9 @@ abort 'libstdc++ is not found.' unless have_library('stdc++')
|
|
7
7
|
|
8
8
|
$srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
|
-
$
|
11
|
-
$
|
10
|
+
$srcs << 'ggml-mpi.c' if with_config('mpi')
|
11
|
+
$CFLAGS << ' -w -DNDEBUG'
|
12
|
+
$CXXFLAGS << ' -std=c++11 -DNDEBUG'
|
12
13
|
$INCFLAGS << ' -I$(srcdir)/src'
|
13
14
|
$VPATH << '$(srcdir)/src'
|
14
15
|
|
@@ -76,6 +77,14 @@ if with_config('clblast')
|
|
76
77
|
end
|
77
78
|
end
|
78
79
|
|
80
|
+
if with_config('mpi')
|
81
|
+
abort 'libmpi is not found.' unless have_library('mpi')
|
82
|
+
abort 'mpi.h is not found.' unless have_header('mpi.h')
|
83
|
+
|
84
|
+
$CFLAGS << ' -DGGML_USE_MPI -Wno-cast-qual'
|
85
|
+
$CXXFLAGS << ' -DGGML_USE_MPI -Wno-cast-qual'
|
86
|
+
end
|
87
|
+
|
79
88
|
UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
|
80
89
|
|
81
90
|
# rubocop:disable Layout/LineLength
|