llama_cpp 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/examples/README.md +60 -0
- data/examples/chat.rb +195 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +697 -130
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +548 -497
- data/ext/llama_cpp/src/ggml-metal.metal +425 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -32
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +1904 -303
- data/ext/llama_cpp/src/ggml.h +126 -2
- data/ext/llama_cpp/src/llama.cpp +212 -108
- data/ext/llama_cpp/src/llama.h +12 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +4 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -77,6 +77,7 @@ extern "C" {
|
|
77
77
|
int n_gpu_layers; // number of layers to store in VRAM
|
78
78
|
int main_gpu; // the GPU that is used for scratch and small tensors
|
79
79
|
float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
80
|
+
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
80
81
|
int seed; // RNG seed, -1 for random
|
81
82
|
|
82
83
|
bool f16_kv; // use fp16 for KV cache
|
@@ -220,6 +221,14 @@ extern "C" {
|
|
220
221
|
LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
|
221
222
|
LLAMA_API int llama_n_embd (const struct llama_context * ctx);
|
222
223
|
|
224
|
+
// Get the vocabulary as output parameters.
|
225
|
+
// Returns number of results.
|
226
|
+
LLAMA_API int llama_get_vocab(
|
227
|
+
const struct llama_context * ctx,
|
228
|
+
const char * * strings,
|
229
|
+
float * scores,
|
230
|
+
int capacity);
|
231
|
+
|
223
232
|
// Token logits obtained from the last call to llama_eval()
|
224
233
|
// The logits for the last token are stored in the last row
|
225
234
|
// Can be mutated in order to change the probabilities of the next token
|
@@ -235,9 +244,9 @@ extern "C" {
|
|
235
244
|
LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
|
236
245
|
|
237
246
|
// Special tokens
|
238
|
-
LLAMA_API llama_token llama_token_bos();
|
239
|
-
LLAMA_API llama_token llama_token_eos();
|
240
|
-
LLAMA_API llama_token llama_token_nl();
|
247
|
+
LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
|
248
|
+
LLAMA_API llama_token llama_token_eos(); // end-of-sentence
|
249
|
+
LLAMA_API llama_token llama_token_nl(); // next-line
|
241
250
|
|
242
251
|
// Sampling functions
|
243
252
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.2.
|
6
|
+
VERSION = '0.2.1'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = 'master-
|
9
|
+
LLAMA_CPP_VERSION = 'master-a09f919'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -70,6 +70,7 @@ module LLaMACpp
|
|
70
70
|
def n_ctx: () -> Integer
|
71
71
|
def n_embd: () -> Integer
|
72
72
|
def n_vocab: () -> Integer
|
73
|
+
def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
|
73
74
|
def print_timings: () -> void
|
74
75
|
def reset_timings: () -> void
|
75
76
|
def token_to_str: (Integer) -> String
|
@@ -111,6 +112,8 @@ module LLaMACpp
|
|
111
112
|
def main_gpu: () -> Integer
|
112
113
|
def main_gpu=: (Integer) -> Integer
|
113
114
|
def tensor_split: () -> Array[Float]
|
115
|
+
def low_vram: () -> bool
|
116
|
+
def low_vram=: (bool) -> bool
|
114
117
|
def seed: () -> Integer
|
115
118
|
def seed=: (Integer) -> Integer
|
116
119
|
def use_mlock: () -> bool
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -22,6 +22,8 @@ files:
|
|
22
22
|
- CODE_OF_CONDUCT.md
|
23
23
|
- LICENSE.txt
|
24
24
|
- README.md
|
25
|
+
- examples/README.md
|
26
|
+
- examples/chat.rb
|
25
27
|
- ext/llama_cpp/extconf.rb
|
26
28
|
- ext/llama_cpp/llama_cpp.cpp
|
27
29
|
- ext/llama_cpp/llama_cpp.h
|