llama_cpp 0.3.5 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +18 -2
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +22 -8
- data/ext/llama_cpp/src/ggml-alloc.c +549 -0
- data/ext/llama_cpp/src/ggml-alloc.h +22 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +2526 -430
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +56 -34
- data/ext/llama_cpp/src/ggml-metal.metal +4 -1
- data/ext/llama_cpp/src/ggml.c +445 -176
- data/ext/llama_cpp/src/ggml.h +125 -33
- data/ext/llama_cpp/src/k_quants.c +32 -30
- data/ext/llama_cpp/src/llama-util.h +41 -1
- data/ext/llama_cpp/src/llama.cpp +409 -210
- data/ext/llama_cpp/src/llama.h +19 -1
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +4 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -86,7 +86,20 @@ extern "C" {
|
|
86
86
|
|
87
87
|
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
88
88
|
|
89
|
-
|
89
|
+
enum llama_log_level {
|
90
|
+
LLAMA_LOG_LEVEL_ERROR = 2,
|
91
|
+
LLAMA_LOG_LEVEL_WARN = 3,
|
92
|
+
LLAMA_LOG_LEVEL_INFO = 4
|
93
|
+
};
|
94
|
+
|
95
|
+
// Signature for logging events
|
96
|
+
// Note that text includes the new line character at the end for most events.
|
97
|
+
// If your logging mechanism cannot handle that, check if the last character is '\n' and strip it
|
98
|
+
// if it exists.
|
99
|
+
// It might not exist for progress report where '.' is output repeatedly.
|
100
|
+
typedef void (*llama_log_callback)(llama_log_level level, const char * text, void * user_data);
|
101
|
+
|
102
|
+
struct llama_context_params {
|
90
103
|
uint32_t seed; // RNG seed, -1 for random
|
91
104
|
int32_t n_ctx; // text context
|
92
105
|
int32_t n_batch; // prompt processing batch size
|
@@ -108,6 +121,7 @@ extern "C" {
|
|
108
121
|
|
109
122
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
110
123
|
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
124
|
+
bool mul_mat_q; // if true, use experimental mul_mat_q kernels
|
111
125
|
bool f16_kv; // use fp16 for KV cache
|
112
126
|
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
113
127
|
bool vocab_only; // only load the vocabulary, no weights
|
@@ -194,6 +208,10 @@ extern "C" {
|
|
194
208
|
int32_t n_eval;
|
195
209
|
};
|
196
210
|
|
211
|
+
// Set callback for all future logging events.
|
212
|
+
// If this is not called, or NULL is supplied, everything is output on stderr.
|
213
|
+
LLAMA_API void llama_log_set(llama_log_callback log_callback, void * user_data);
|
214
|
+
|
197
215
|
LLAMA_API int llama_max_devices();
|
198
216
|
|
199
217
|
LLAMA_API struct llama_context_params llama_context_default_params();
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.3.
|
6
|
+
VERSION = '0.3.7'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = 'master-
|
9
|
+
LLAMA_CPP_VERSION = 'master-9ca4abe'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -163,6 +163,8 @@ module LLaMACpp
|
|
163
163
|
def rope_freq_scale: () -> Float
|
164
164
|
def low_vram: () -> bool
|
165
165
|
def low_vram=: (bool) -> bool
|
166
|
+
def mul_mat_q: () -> bool
|
167
|
+
def mul_mat_q=: (bool) -> bool
|
166
168
|
def seed: () -> Integer
|
167
169
|
def seed=: (Integer) -> Integer
|
168
170
|
def use_mlock: () -> bool
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -30,6 +30,8 @@ files:
|
|
30
30
|
- ext/llama_cpp/llama_cpp.cpp
|
31
31
|
- ext/llama_cpp/llama_cpp.h
|
32
32
|
- ext/llama_cpp/src/LICENSE
|
33
|
+
- ext/llama_cpp/src/ggml-alloc.c
|
34
|
+
- ext/llama_cpp/src/ggml-alloc.h
|
33
35
|
- ext/llama_cpp/src/ggml-cuda.cu
|
34
36
|
- ext/llama_cpp/src/ggml-cuda.h
|
35
37
|
- ext/llama_cpp/src/ggml-metal.h
|