llama_cpp 0.10.0 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/llama_cpp/llama_cpp.cpp +18 -1
- data/ext/llama_cpp/src/ggml-alloc.c +12 -4
- data/ext/llama_cpp/src/ggml-alloc.h +1 -1
- data/ext/llama_cpp/src/ggml-backend-impl.h +12 -8
- data/ext/llama_cpp/src/ggml-backend.c +75 -5
- data/ext/llama_cpp/src/ggml-backend.h +7 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +952 -232
- data/ext/llama_cpp/src/ggml-metal.h +3 -0
- data/ext/llama_cpp/src/ggml-metal.m +725 -98
- data/ext/llama_cpp/src/ggml-metal.metal +1508 -171
- data/ext/llama_cpp/src/ggml-quants.c +2 -2
- data/ext/llama_cpp/src/ggml.c +554 -215
- data/ext/llama_cpp/src/ggml.h +58 -23
- data/ext/llama_cpp/src/llama.cpp +1157 -851
- data/ext/llama_cpp/src/llama.h +9 -4
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +2 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -39,6 +39,7 @@
|
|
39
39
|
|
40
40
|
#define LLAMA_MAX_RNG_STATE (64*1024)
|
41
41
|
|
42
|
+
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
|
42
43
|
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
43
44
|
|
44
45
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
@@ -126,7 +127,7 @@ extern "C" {
|
|
126
127
|
bool sorted;
|
127
128
|
} llama_token_data_array;
|
128
129
|
|
129
|
-
typedef
|
130
|
+
typedef bool (*llama_progress_callback)(float progress, void *ctx);
|
130
131
|
|
131
132
|
// Input data for llama_decode
|
132
133
|
// A llama_batch object can contain input about one or many sequences
|
@@ -179,7 +180,9 @@ extern "C" {
|
|
179
180
|
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
180
181
|
const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
|
181
182
|
|
182
|
-
//
|
183
|
+
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
184
|
+
// If the provided progress_callback returns true, model loading continues.
|
185
|
+
// If it returns false, model loading is immediately aborted.
|
183
186
|
llama_progress_callback progress_callback;
|
184
187
|
|
185
188
|
// context pointer passed to the progress callback
|
@@ -216,7 +219,7 @@ extern "C" {
|
|
216
219
|
|
217
220
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
218
221
|
bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
|
219
|
-
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
222
|
+
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
220
223
|
bool embedding; // embedding mode only
|
221
224
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
222
225
|
};
|
@@ -313,7 +316,9 @@ extern "C" {
|
|
313
316
|
|
314
317
|
LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
|
315
318
|
|
316
|
-
|
319
|
+
// TODO: become more consistent with returned int types across the API
|
320
|
+
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
321
|
+
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
317
322
|
|
318
323
|
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
319
324
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.10.
|
6
|
+
VERSION = '0.10.2'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1686'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|