llama_cpp 0.15.3 → 0.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- data/vendor/tmp/llama.cpp/Makefile +4 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +27 -10
- data/vendor/tmp/llama.cpp/ggml-impl.h +4 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +0 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +65 -11
- data/vendor/tmp/llama.cpp/ggml-metal.metal +69 -27
- data/vendor/tmp/llama.cpp/ggml-quants.c +101 -11
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +75 -58
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +338 -160
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +2 -0
- data/vendor/tmp/llama.cpp/ggml.c +145 -101
- data/vendor/tmp/llama.cpp/ggml.h +18 -3
- data/vendor/tmp/llama.cpp/llama.cpp +637 -249
- data/vendor/tmp/llama.cpp/llama.h +11 -5
- metadata +2 -2
@@ -85,6 +85,7 @@ extern "C" {
|
|
85
85
|
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
86
86
|
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
87
87
|
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
88
|
+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
88
89
|
};
|
89
90
|
|
90
91
|
// note: these values should be synchronized with ggml_rope
|
@@ -264,6 +265,8 @@ extern "C" {
|
|
264
265
|
bool check_tensors; // validate model tensor data
|
265
266
|
};
|
266
267
|
|
268
|
+
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
269
|
+
// https://github.com/ggerganov/llama.cpp/pull/7544
|
267
270
|
struct llama_context_params {
|
268
271
|
uint32_t seed; // RNG seed, -1 for random
|
269
272
|
uint32_t n_ctx; // text context, 0 = from model
|
@@ -290,14 +293,14 @@ extern "C" {
|
|
290
293
|
ggml_backend_sched_eval_callback cb_eval;
|
291
294
|
void * cb_eval_user_data;
|
292
295
|
|
293
|
-
enum ggml_type type_k; // data type for K cache
|
294
|
-
enum ggml_type type_v; // data type for V cache
|
296
|
+
enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
|
297
|
+
enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
|
295
298
|
|
296
299
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
297
300
|
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
298
301
|
bool embeddings; // if true, extract embeddings (together with logits)
|
299
302
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
300
|
-
bool flash_attn; // whether to use flash attention
|
303
|
+
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
|
301
304
|
|
302
305
|
// Abort callback
|
303
306
|
// if it returns true, execution of llama_decode() will be aborted
|
@@ -421,8 +424,8 @@ extern "C" {
|
|
421
424
|
|
422
425
|
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
|
423
426
|
|
424
|
-
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model
|
425
|
-
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model
|
427
|
+
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
|
428
|
+
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
426
429
|
|
427
430
|
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
428
431
|
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
@@ -823,6 +826,9 @@ extern "C" {
|
|
823
826
|
// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
|
824
827
|
LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
|
825
828
|
|
829
|
+
// Identify if Token Id is a control token or a render-able token
|
830
|
+
LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
|
831
|
+
|
826
832
|
// Special tokens
|
827
833
|
LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
|
828
834
|
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|