llama_cpp 0.9.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/llama_cpp/llama_cpp.cpp +121 -15
- data/ext/llama_cpp/src/ggml-alloc.c +42 -7
- data/ext/llama_cpp/src/ggml-alloc.h +7 -0
- data/ext/llama_cpp/src/ggml-backend-impl.h +46 -21
- data/ext/llama_cpp/src/ggml-backend.c +563 -156
- data/ext/llama_cpp/src/ggml-backend.h +62 -17
- data/ext/llama_cpp/src/ggml-cuda.cu +1140 -355
- data/ext/llama_cpp/src/ggml-cuda.h +9 -1
- data/ext/llama_cpp/src/ggml-impl.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.h +6 -0
- data/ext/llama_cpp/src/ggml-metal.m +506 -158
- data/ext/llama_cpp/src/ggml-metal.metal +795 -144
- data/ext/llama_cpp/src/ggml.c +331 -111
- data/ext/llama_cpp/src/ggml.h +49 -4
- data/ext/llama_cpp/src/llama.cpp +749 -329
- data/ext/llama_cpp/src/llama.h +28 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +20 -2
- metadata +2 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -42,7 +42,7 @@
|
|
42
42
|
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
43
43
|
|
44
44
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
45
|
-
#define LLAMA_SESSION_VERSION
|
45
|
+
#define LLAMA_SESSION_VERSION 3
|
46
46
|
|
47
47
|
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
|
48
48
|
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
@@ -158,6 +158,22 @@ extern "C" {
|
|
158
158
|
llama_seq_id all_seq_id; // used if seq_id == NULL
|
159
159
|
} llama_batch;
|
160
160
|
|
161
|
+
enum llama_model_kv_override_type {
|
162
|
+
LLAMA_KV_OVERRIDE_INT,
|
163
|
+
LLAMA_KV_OVERRIDE_FLOAT,
|
164
|
+
LLAMA_KV_OVERRIDE_BOOL,
|
165
|
+
};
|
166
|
+
|
167
|
+
struct llama_model_kv_override {
|
168
|
+
char key[128];
|
169
|
+
enum llama_model_kv_override_type tag;
|
170
|
+
union {
|
171
|
+
int64_t int_value;
|
172
|
+
double float_value;
|
173
|
+
bool bool_value;
|
174
|
+
};
|
175
|
+
};
|
176
|
+
|
161
177
|
struct llama_model_params {
|
162
178
|
int32_t n_gpu_layers; // number of layers to store in VRAM
|
163
179
|
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
@@ -165,9 +181,13 @@ extern "C" {
|
|
165
181
|
|
166
182
|
// called with a progress value between 0 and 1, pass NULL to disable
|
167
183
|
llama_progress_callback progress_callback;
|
184
|
+
|
168
185
|
// context pointer passed to the progress callback
|
169
186
|
void * progress_callback_user_data;
|
170
187
|
|
188
|
+
// override key-value pairs of the model meta data
|
189
|
+
const struct llama_model_kv_override * kv_overrides;
|
190
|
+
|
171
191
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
172
192
|
bool vocab_only; // only load the vocabulary, no weights
|
173
193
|
bool use_mmap; // use mmap if possible
|
@@ -191,11 +211,14 @@ extern "C" {
|
|
191
211
|
float yarn_beta_slow; // YaRN high correction dim
|
192
212
|
uint32_t yarn_orig_ctx; // YaRN original context size
|
193
213
|
|
214
|
+
enum ggml_type type_k; // data type for K cache
|
215
|
+
enum ggml_type type_v; // data type for V cache
|
216
|
+
|
194
217
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
195
|
-
bool mul_mat_q;
|
196
|
-
bool
|
197
|
-
bool
|
198
|
-
bool
|
218
|
+
bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
|
219
|
+
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
220
|
+
bool embedding; // embedding mode only
|
221
|
+
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
199
222
|
};
|
200
223
|
|
201
224
|
// model quantization parameters
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.10.0'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1620'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -23,6 +23,10 @@ module LLaMACpp
|
|
23
23
|
LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
|
24
24
|
LLAMA_FTYPE_MOSTLY_Q6_K: Integer
|
25
25
|
|
26
|
+
LLAMA_KV_OVERRIDE_INT: Integer
|
27
|
+
LLAMA_KV_OVERRIDE_FLOAT: Integer
|
28
|
+
LLAMA_KV_OVERRIDE_BOOL: Integer
|
29
|
+
|
26
30
|
LLAMA_GRETYPE_END: Integer
|
27
31
|
LLAMA_GRETYPE_ALT: Integer
|
28
32
|
LLAMA_GRETYPE_RULE_REF: Integer
|
@@ -116,6 +120,16 @@ module LLaMACpp
|
|
116
120
|
def n_eval: () -> Integer
|
117
121
|
end
|
118
122
|
|
123
|
+
class ModelKVOverride
|
124
|
+
public
|
125
|
+
|
126
|
+
def key: () -> String
|
127
|
+
def tag: () -> Integer
|
128
|
+
def int_value: () -> Integer
|
129
|
+
def float_value: () -> Float
|
130
|
+
def bool_value: () -> bool
|
131
|
+
end
|
132
|
+
|
119
133
|
class ModelParams
|
120
134
|
public
|
121
135
|
|
@@ -225,14 +239,18 @@ module LLaMACpp
|
|
225
239
|
def yarn_beta_slow: () -> Float
|
226
240
|
def yarn_orig_ctx=: (Integer) -> Integer
|
227
241
|
def yarn_orig_ctx: () -> Integer
|
242
|
+
def type_k=: (Integer) -> Integer
|
243
|
+
def type_k: () -> Integer
|
244
|
+
def type_v=: (Integer) -> Integer
|
245
|
+
def type_v: () -> Integer
|
228
246
|
def mul_mat_q: () -> bool
|
229
247
|
def mul_mat_q=: (bool) -> bool
|
230
|
-
def f16_kv: () -> bool
|
231
|
-
def f16_kv=: (bool) -> bool
|
232
248
|
def logits_all: () -> bool
|
233
249
|
def logits_all=: (bool) -> bool
|
234
250
|
def embedding: () -> bool
|
235
251
|
def embedding=: (bool) -> bool
|
252
|
+
def offload_kqv: () -> bool
|
253
|
+
def offload_kqv=: (bool) -> bool
|
236
254
|
end
|
237
255
|
|
238
256
|
class ModelQuantizeParams
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|