llama_cpp 0.10.1 → 0.10.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/llama_cpp.cpp +16 -1
- data/ext/llama_cpp/src/ggml-alloc.c +12 -4
- data/ext/llama_cpp/src/ggml-backend-impl.h +12 -8
- data/ext/llama_cpp/src/ggml-backend.c +75 -5
- data/ext/llama_cpp/src/ggml-backend.h +7 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +284 -162
- data/ext/llama_cpp/src/ggml-metal.h +3 -0
- data/ext/llama_cpp/src/ggml-metal.m +190 -44
- data/ext/llama_cpp/src/ggml-metal.metal +11 -2
- data/ext/llama_cpp/src/ggml.c +262 -89
- data/ext/llama_cpp/src/ggml.h +24 -10
- data/ext/llama_cpp/src/llama.cpp +926 -780
- data/ext/llama_cpp/src/llama.h +8 -3
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -39,6 +39,7 @@
|
|
39
39
|
|
40
40
|
#define LLAMA_MAX_RNG_STATE (64*1024)
|
41
41
|
|
42
|
+
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
|
42
43
|
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
43
44
|
|
44
45
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
@@ -126,7 +127,7 @@ extern "C" {
|
|
126
127
|
bool sorted;
|
127
128
|
} llama_token_data_array;
|
128
129
|
|
129
|
-
typedef
|
130
|
+
typedef bool (*llama_progress_callback)(float progress, void *ctx);
|
130
131
|
|
131
132
|
// Input data for llama_decode
|
132
133
|
// A llama_batch object can contain input about one or many sequences
|
@@ -179,7 +180,9 @@ extern "C" {
|
|
179
180
|
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
180
181
|
const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
|
181
182
|
|
182
|
-
//
|
183
|
+
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
184
|
+
// If the provided progress_callback returns true, model loading continues.
|
185
|
+
// If it returns false, model loading is immediately aborted.
|
183
186
|
llama_progress_callback progress_callback;
|
184
187
|
|
185
188
|
// context pointer passed to the progress callback
|
@@ -313,7 +316,9 @@ extern "C" {
|
|
313
316
|
|
314
317
|
LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
|
315
318
|
|
316
|
-
|
319
|
+
// TODO: become more consistent with returned int types across the API
|
320
|
+
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
321
|
+
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
317
322
|
|
318
323
|
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
319
324
|
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.10.
|
6
|
+
VERSION = '0.10.2'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1686'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|