llama_cpp 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/llama_cpp.cpp +16 -1
- data/ext/llama_cpp/src/ggml-alloc.c +12 -4
- data/ext/llama_cpp/src/ggml-backend-impl.h +12 -8
- data/ext/llama_cpp/src/ggml-backend.c +75 -5
- data/ext/llama_cpp/src/ggml-backend.h +7 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +284 -162
- data/ext/llama_cpp/src/ggml-metal.h +3 -0
- data/ext/llama_cpp/src/ggml-metal.m +190 -44
- data/ext/llama_cpp/src/ggml-metal.metal +11 -2
- data/ext/llama_cpp/src/ggml.c +262 -89
- data/ext/llama_cpp/src/ggml.h +24 -10
- data/ext/llama_cpp/src/llama.cpp +926 -780
- data/ext/llama_cpp/src/llama.h +8 -3
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -39,6 +39,7 @@
|
|
39
39
|
|
40
40
|
#define LLAMA_MAX_RNG_STATE (64*1024)
|
41
41
|
|
42
|
+
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
|
42
43
|
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
43
44
|
|
44
45
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
@@ -126,7 +127,7 @@ extern "C" {
|
|
126
127
|
bool sorted;
|
127
128
|
} llama_token_data_array;
|
128
129
|
|
129
|
-
typedef
|
130
|
+
typedef bool (*llama_progress_callback)(float progress, void *ctx);
|
130
131
|
|
131
132
|
// Input data for llama_decode
|
132
133
|
// A llama_batch object can contain input about one or many sequences
|
@@ -179,7 +180,9 @@ extern "C" {
|
|
179
180
|
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
180
181
|
const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
|
181
182
|
|
182
|
-
//
|
183
|
+
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
184
|
+
// If the provided progress_callback returns true, model loading continues.
|
185
|
+
// If it returns false, model loading is immediately aborted.
|
183
186
|
llama_progress_callback progress_callback;
|
184
187
|
|
185
188
|
// context pointer passed to the progress callback
|
@@ -313,7 +316,9 @@ extern "C" {
|
|
313
316
|
|
314
317
|
LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
|
315
318
|
|
316
|
-
|
319
|
+
// TODO: become more consistent with returned int types across the API
|
320
|
+
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
321
|
+
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
317
322
|
|
318
323
|
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
319
324
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.10.
|
6
|
+
VERSION = '0.10.2'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1686'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|