llama_cpp 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/examples/README.md +92 -0
- data/examples/chat.rb +195 -0
- data/examples/embedding.rb +37 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1218 -411
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +5 -1
- data/ext/llama_cpp/src/ggml-metal.m +703 -514
- data/ext/llama_cpp/src/ggml-metal.metal +574 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +496 -36
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +2715 -476
- data/ext/llama_cpp/src/ggml.h +266 -11
- data/ext/llama_cpp/src/llama.cpp +266 -135
- data/ext/llama_cpp/src/llama.h +19 -11
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +5 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -71,27 +71,27 @@ extern "C" {
|
|
71
71
|
|
72
72
|
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
73
73
|
|
74
|
-
|
74
|
+
struct llama_context_params {
|
75
|
+
int seed; // RNG seed, -1 for random
|
75
76
|
int n_ctx; // text context
|
76
77
|
int n_batch; // prompt processing batch size
|
77
78
|
int n_gpu_layers; // number of layers to store in VRAM
|
78
79
|
int main_gpu; // the GPU that is used for scratch and small tensors
|
79
80
|
float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
80
|
-
|
81
|
+
// called with a progress value between 0 and 1, pass NULL to disable
|
82
|
+
llama_progress_callback progress_callback;
|
83
|
+
// context pointer passed to the progress callback
|
84
|
+
void * progress_callback_user_data;
|
81
85
|
|
86
|
+
// Keep the booleans together to avoid misalignment during copy-by-value.
|
87
|
+
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
82
88
|
bool f16_kv; // use fp16 for KV cache
|
83
89
|
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
84
90
|
bool vocab_only; // only load the vocabulary, no weights
|
85
91
|
bool use_mmap; // use mmap if possible
|
86
92
|
bool use_mlock; // force system to keep model in RAM
|
87
93
|
bool embedding; // embedding mode only
|
88
|
-
|
89
|
-
// called with a progress value between 0 and 1, pass NULL to disable
|
90
|
-
llama_progress_callback progress_callback;
|
91
|
-
// context pointer passed to the progress callback
|
92
|
-
void * progress_callback_user_data;
|
93
94
|
};
|
94
|
-
|
95
95
|
// model file types
|
96
96
|
enum llama_ftype {
|
97
97
|
LLAMA_FTYPE_ALL_F32 = 0,
|
@@ -220,6 +220,14 @@ extern "C" {
|
|
220
220
|
LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
|
221
221
|
LLAMA_API int llama_n_embd (const struct llama_context * ctx);
|
222
222
|
|
223
|
+
// Get the vocabulary as output parameters.
|
224
|
+
// Returns number of results.
|
225
|
+
LLAMA_API int llama_get_vocab(
|
226
|
+
const struct llama_context * ctx,
|
227
|
+
const char * * strings,
|
228
|
+
float * scores,
|
229
|
+
int capacity);
|
230
|
+
|
223
231
|
// Token logits obtained from the last call to llama_eval()
|
224
232
|
// The logits for the last token are stored in the last row
|
225
233
|
// Can be mutated in order to change the probabilities of the next token
|
@@ -235,9 +243,9 @@ extern "C" {
|
|
235
243
|
LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
|
236
244
|
|
237
245
|
// Special tokens
|
238
|
-
LLAMA_API llama_token llama_token_bos();
|
239
|
-
LLAMA_API llama_token llama_token_eos();
|
240
|
-
LLAMA_API llama_token llama_token_nl();
|
246
|
+
LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
|
247
|
+
LLAMA_API llama_token llama_token_eos(); // end-of-sentence
|
248
|
+
LLAMA_API llama_token llama_token_nl(); // next-line
|
241
249
|
|
242
250
|
// Sampling functions
|
243
251
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.2.
|
6
|
+
VERSION = '0.2.2'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = 'master-
|
9
|
+
LLAMA_CPP_VERSION = 'master-7487137'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -70,6 +70,7 @@ module LLaMACpp
|
|
70
70
|
def n_ctx: () -> Integer
|
71
71
|
def n_embd: () -> Integer
|
72
72
|
def n_vocab: () -> Integer
|
73
|
+
def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
|
73
74
|
def print_timings: () -> void
|
74
75
|
def reset_timings: () -> void
|
75
76
|
def token_to_str: (Integer) -> String
|
@@ -111,6 +112,8 @@ module LLaMACpp
|
|
111
112
|
def main_gpu: () -> Integer
|
112
113
|
def main_gpu=: (Integer) -> Integer
|
113
114
|
def tensor_split: () -> Array[Float]
|
115
|
+
def low_vram: () -> bool
|
116
|
+
def low_vram=: (bool) -> bool
|
114
117
|
def seed: () -> Integer
|
115
118
|
def seed=: (Integer) -> Integer
|
116
119
|
def use_mlock: () -> bool
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -22,6 +22,9 @@ files:
|
|
22
22
|
- CODE_OF_CONDUCT.md
|
23
23
|
- LICENSE.txt
|
24
24
|
- README.md
|
25
|
+
- examples/README.md
|
26
|
+
- examples/chat.rb
|
27
|
+
- examples/embedding.rb
|
25
28
|
- ext/llama_cpp/extconf.rb
|
26
29
|
- ext/llama_cpp/llama_cpp.cpp
|
27
30
|
- ext/llama_cpp/llama_cpp.h
|