llama_cpp 0.15.3 → 0.15.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +4 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +27 -10
- data/vendor/tmp/llama.cpp/ggml-impl.h +4 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +65 -11
- data/vendor/tmp/llama.cpp/ggml-metal.metal +69 -27
- data/vendor/tmp/llama.cpp/ggml-quants.c +101 -11
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +75 -58
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +338 -160
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +2 -0
- data/vendor/tmp/llama.cpp/ggml.c +145 -101
- data/vendor/tmp/llama.cpp/ggml.h +18 -3
- data/vendor/tmp/llama.cpp/llama.cpp +637 -249
- data/vendor/tmp/llama.cpp/llama.h +11 -5
- metadata +2 -2
@@ -85,6 +85,7 @@ extern "C" {
|
|
85
85
|
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
86
86
|
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
87
87
|
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
88
|
+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
88
89
|
};
|
89
90
|
|
90
91
|
// note: these values should be synchronized with ggml_rope
|
@@ -264,6 +265,8 @@ extern "C" {
|
|
264
265
|
bool check_tensors; // validate model tensor data
|
265
266
|
};
|
266
267
|
|
268
|
+
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
269
|
+
// https://github.com/ggerganov/llama.cpp/pull/7544
|
267
270
|
struct llama_context_params {
|
268
271
|
uint32_t seed; // RNG seed, -1 for random
|
269
272
|
uint32_t n_ctx; // text context, 0 = from model
|
@@ -290,14 +293,14 @@ extern "C" {
|
|
290
293
|
ggml_backend_sched_eval_callback cb_eval;
|
291
294
|
void * cb_eval_user_data;
|
292
295
|
|
293
|
-
enum ggml_type type_k; // data type for K cache
|
294
|
-
enum ggml_type type_v; // data type for V cache
|
296
|
+
enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
|
297
|
+
enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
|
295
298
|
|
296
299
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
297
300
|
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
298
301
|
bool embeddings; // if true, extract embeddings (together with logits)
|
299
302
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
300
|
-
bool flash_attn; // whether to use flash attention
|
303
|
+
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
|
301
304
|
|
302
305
|
// Abort callback
|
303
306
|
// if it returns true, execution of llama_decode() will be aborted
|
@@ -421,8 +424,8 @@ extern "C" {
|
|
421
424
|
|
422
425
|
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
|
423
426
|
|
424
|
-
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model
|
425
|
-
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model
|
427
|
+
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
|
428
|
+
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
426
429
|
|
427
430
|
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
428
431
|
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
@@ -823,6 +826,9 @@ extern "C" {
|
|
823
826
|
// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
|
824
827
|
LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
|
825
828
|
|
829
|
+
// Identify if Token Id is a control token or a render-able token
|
830
|
+
LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
|
831
|
+
|
826
832
|
// Special tokens
|
827
833
|
LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
|
828
834
|
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|