llama_cpp 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/examples/README.md +60 -0
- data/examples/chat.rb +195 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +697 -130
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +548 -497
- data/ext/llama_cpp/src/ggml-metal.metal +425 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -32
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +1904 -303
- data/ext/llama_cpp/src/ggml.h +126 -2
- data/ext/llama_cpp/src/llama.cpp +212 -108
- data/ext/llama_cpp/src/llama.h +12 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +4 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -77,6 +77,7 @@ extern "C" {
|
|
77
77
|
int n_gpu_layers; // number of layers to store in VRAM
|
78
78
|
int main_gpu; // the GPU that is used for scratch and small tensors
|
79
79
|
float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
80
|
+
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
80
81
|
int seed; // RNG seed, -1 for random
|
81
82
|
|
82
83
|
bool f16_kv; // use fp16 for KV cache
|
@@ -220,6 +221,14 @@ extern "C" {
|
|
220
221
|
LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
|
221
222
|
LLAMA_API int llama_n_embd (const struct llama_context * ctx);
|
222
223
|
|
224
|
+
// Get the vocabulary as output parameters.
|
225
|
+
// Returns number of results.
|
226
|
+
LLAMA_API int llama_get_vocab(
|
227
|
+
const struct llama_context * ctx,
|
228
|
+
const char * * strings,
|
229
|
+
float * scores,
|
230
|
+
int capacity);
|
231
|
+
|
223
232
|
// Token logits obtained from the last call to llama_eval()
|
224
233
|
// The logits for the last token are stored in the last row
|
225
234
|
// Can be mutated in order to change the probabilities of the next token
|
@@ -235,9 +244,9 @@ extern "C" {
|
|
235
244
|
LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
|
236
245
|
|
237
246
|
// Special tokens
|
238
|
-
LLAMA_API llama_token llama_token_bos();
|
239
|
-
LLAMA_API llama_token llama_token_eos();
|
240
|
-
LLAMA_API llama_token llama_token_nl();
|
247
|
+
LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
|
248
|
+
LLAMA_API llama_token llama_token_eos(); // end-of-sentence
|
249
|
+
LLAMA_API llama_token llama_token_nl(); // next-line
|
241
250
|
|
242
251
|
// Sampling functions
|
243
252
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.2.
|
6
|
+
VERSION = '0.2.1'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = 'master-
|
9
|
+
LLAMA_CPP_VERSION = 'master-a09f919'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -70,6 +70,7 @@ module LLaMACpp
|
|
70
70
|
def n_ctx: () -> Integer
|
71
71
|
def n_embd: () -> Integer
|
72
72
|
def n_vocab: () -> Integer
|
73
|
+
def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
|
73
74
|
def print_timings: () -> void
|
74
75
|
def reset_timings: () -> void
|
75
76
|
def token_to_str: (Integer) -> String
|
@@ -111,6 +112,8 @@ module LLaMACpp
|
|
111
112
|
def main_gpu: () -> Integer
|
112
113
|
def main_gpu=: (Integer) -> Integer
|
113
114
|
def tensor_split: () -> Array[Float]
|
115
|
+
def low_vram: () -> bool
|
116
|
+
def low_vram=: (bool) -> bool
|
114
117
|
def seed: () -> Integer
|
115
118
|
def seed=: (Integer) -> Integer
|
116
119
|
def use_mlock: () -> bool
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -22,6 +22,8 @@ files:
|
|
22
22
|
- CODE_OF_CONDUCT.md
|
23
23
|
- LICENSE.txt
|
24
24
|
- README.md
|
25
|
+
- examples/README.md
|
26
|
+
- examples/chat.rb
|
25
27
|
- ext/llama_cpp/extconf.rb
|
26
28
|
- ext/llama_cpp/llama_cpp.cpp
|
27
29
|
- ext/llama_cpp/llama_cpp.h
|