llama_cpp 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/examples/README.md +92 -0
- data/examples/chat.rb +195 -0
- data/examples/embedding.rb +37 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1218 -411
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +5 -1
- data/ext/llama_cpp/src/ggml-metal.m +703 -514
- data/ext/llama_cpp/src/ggml-metal.metal +574 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +496 -36
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +2715 -476
- data/ext/llama_cpp/src/ggml.h +266 -11
- data/ext/llama_cpp/src/llama.cpp +266 -135
- data/ext/llama_cpp/src/llama.h +19 -11
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +5 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -71,27 +71,27 @@ extern "C" {
|
|
71
71
|
|
72
72
|
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
73
73
|
|
74
|
-
|
74
|
+
struct llama_context_params {
|
75
|
+
int seed; // RNG seed, -1 for random
|
75
76
|
int n_ctx; // text context
|
76
77
|
int n_batch; // prompt processing batch size
|
77
78
|
int n_gpu_layers; // number of layers to store in VRAM
|
78
79
|
int main_gpu; // the GPU that is used for scratch and small tensors
|
79
80
|
float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
80
|
-
|
81
|
+
// called with a progress value between 0 and 1, pass NULL to disable
|
82
|
+
llama_progress_callback progress_callback;
|
83
|
+
// context pointer passed to the progress callback
|
84
|
+
void * progress_callback_user_data;
|
81
85
|
|
86
|
+
// Keep the booleans together to avoid misalignment during copy-by-value.
|
87
|
+
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
82
88
|
bool f16_kv; // use fp16 for KV cache
|
83
89
|
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
84
90
|
bool vocab_only; // only load the vocabulary, no weights
|
85
91
|
bool use_mmap; // use mmap if possible
|
86
92
|
bool use_mlock; // force system to keep model in RAM
|
87
93
|
bool embedding; // embedding mode only
|
88
|
-
|
89
|
-
// called with a progress value between 0 and 1, pass NULL to disable
|
90
|
-
llama_progress_callback progress_callback;
|
91
|
-
// context pointer passed to the progress callback
|
92
|
-
void * progress_callback_user_data;
|
93
94
|
};
|
94
|
-
|
95
95
|
// model file types
|
96
96
|
enum llama_ftype {
|
97
97
|
LLAMA_FTYPE_ALL_F32 = 0,
|
@@ -220,6 +220,14 @@ extern "C" {
|
|
220
220
|
LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
|
221
221
|
LLAMA_API int llama_n_embd (const struct llama_context * ctx);
|
222
222
|
|
223
|
+
// Get the vocabulary as output parameters.
|
224
|
+
// Returns number of results.
|
225
|
+
LLAMA_API int llama_get_vocab(
|
226
|
+
const struct llama_context * ctx,
|
227
|
+
const char * * strings,
|
228
|
+
float * scores,
|
229
|
+
int capacity);
|
230
|
+
|
223
231
|
// Token logits obtained from the last call to llama_eval()
|
224
232
|
// The logits for the last token are stored in the last row
|
225
233
|
// Can be mutated in order to change the probabilities of the next token
|
@@ -235,9 +243,9 @@ extern "C" {
|
|
235
243
|
LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
|
236
244
|
|
237
245
|
// Special tokens
|
238
|
-
LLAMA_API llama_token llama_token_bos();
|
239
|
-
LLAMA_API llama_token llama_token_eos();
|
240
|
-
LLAMA_API llama_token llama_token_nl();
|
246
|
+
LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
|
247
|
+
LLAMA_API llama_token llama_token_eos(); // end-of-sentence
|
248
|
+
LLAMA_API llama_token llama_token_nl(); // next-line
|
241
249
|
|
242
250
|
// Sampling functions
|
243
251
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.2.
|
6
|
+
VERSION = '0.2.2'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = 'master-
|
9
|
+
LLAMA_CPP_VERSION = 'master-7487137'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -70,6 +70,7 @@ module LLaMACpp
|
|
70
70
|
def n_ctx: () -> Integer
|
71
71
|
def n_embd: () -> Integer
|
72
72
|
def n_vocab: () -> Integer
|
73
|
+
def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
|
73
74
|
def print_timings: () -> void
|
74
75
|
def reset_timings: () -> void
|
75
76
|
def token_to_str: (Integer) -> String
|
@@ -111,6 +112,8 @@ module LLaMACpp
|
|
111
112
|
def main_gpu: () -> Integer
|
112
113
|
def main_gpu=: (Integer) -> Integer
|
113
114
|
def tensor_split: () -> Array[Float]
|
115
|
+
def low_vram: () -> bool
|
116
|
+
def low_vram=: (bool) -> bool
|
114
117
|
def seed: () -> Integer
|
115
118
|
def seed=: (Integer) -> Integer
|
116
119
|
def use_mlock: () -> bool
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -22,6 +22,9 @@ files:
|
|
22
22
|
- CODE_OF_CONDUCT.md
|
23
23
|
- LICENSE.txt
|
24
24
|
- README.md
|
25
|
+
- examples/README.md
|
26
|
+
- examples/chat.rb
|
27
|
+
- examples/embedding.rb
|
25
28
|
- ext/llama_cpp/extconf.rb
|
26
29
|
- ext/llama_cpp/llama_cpp.cpp
|
27
30
|
- ext/llama_cpp/llama_cpp.h
|