llama_cpp 0.15.2 → 0.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/ext/llama_cpp/llama_cpp.cpp +61 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +6 -0
- data/vendor/tmp/llama.cpp/Makefile +8 -16
- data/vendor/tmp/llama.cpp/ggml-common.h +0 -54
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +99 -40
- data/vendor/tmp/llama.cpp/ggml-cuda.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +44 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +4 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +133 -81
- data/vendor/tmp/llama.cpp/ggml-metal.metal +91 -434
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +4 -1
- data/vendor/tmp/llama.cpp/ggml-quants.c +1962 -2443
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +248 -108
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +375 -657
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +204 -225
- data/vendor/tmp/llama.cpp/ggml.c +498 -836
- data/vendor/tmp/llama.cpp/ggml.h +57 -30
- data/vendor/tmp/llama.cpp/llama.cpp +1477 -859
- data/vendor/tmp/llama.cpp/llama.h +21 -8
- metadata +3 -3
@@ -81,9 +81,11 @@ extern "C" {
|
|
81
81
|
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
82
82
|
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
83
83
|
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
84
|
-
|
85
|
-
|
86
|
-
|
84
|
+
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
85
|
+
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
86
|
+
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
87
|
+
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
88
|
+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
87
89
|
};
|
88
90
|
|
89
91
|
// note: these values should be synchronized with ggml_rope
|
@@ -263,6 +265,8 @@ extern "C" {
|
|
263
265
|
bool check_tensors; // validate model tensor data
|
264
266
|
};
|
265
267
|
|
268
|
+
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
269
|
+
// https://github.com/ggerganov/llama.cpp/pull/7544
|
266
270
|
struct llama_context_params {
|
267
271
|
uint32_t seed; // RNG seed, -1 for random
|
268
272
|
uint32_t n_ctx; // text context, 0 = from model
|
@@ -289,14 +293,14 @@ extern "C" {
|
|
289
293
|
ggml_backend_sched_eval_callback cb_eval;
|
290
294
|
void * cb_eval_user_data;
|
291
295
|
|
292
|
-
enum ggml_type type_k; // data type for K cache
|
293
|
-
enum ggml_type type_v; // data type for V cache
|
296
|
+
enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
|
297
|
+
enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
|
294
298
|
|
295
299
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
296
300
|
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
297
301
|
bool embeddings; // if true, extract embeddings (together with logits)
|
298
302
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
299
|
-
bool flash_attn; // whether to use flash attention
|
303
|
+
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
|
300
304
|
|
301
305
|
// Abort callback
|
302
306
|
// if it returns true, execution of llama_decode() will be aborted
|
@@ -420,8 +424,8 @@ extern "C" {
|
|
420
424
|
|
421
425
|
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
|
422
426
|
|
423
|
-
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model
|
424
|
-
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model
|
427
|
+
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
|
428
|
+
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
425
429
|
|
426
430
|
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
427
431
|
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
@@ -758,6 +762,12 @@ extern "C" {
|
|
758
762
|
// n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
|
759
763
|
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
|
760
764
|
|
765
|
+
// Get the number of threads used for generation of a single token.
|
766
|
+
LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
|
767
|
+
|
768
|
+
// Get the number of threads used for prompt and batch processing (multiple token).
|
769
|
+
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
|
770
|
+
|
761
771
|
// Set whether to use causal attention or not
|
762
772
|
// If set to true, the model will only attend to the past tokens
|
763
773
|
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
@@ -816,6 +826,9 @@ extern "C" {
|
|
816
826
|
// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
|
817
827
|
LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
|
818
828
|
|
829
|
+
// Identify if Token Id is a control token or a render-able token
|
830
|
+
LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
|
831
|
+
|
819
832
|
// Special tokens
|
820
833
|
LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
|
821
834
|
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
|
-
rubygems_version: 3.5.
|
102
|
+
rubygems_version: 3.5.10
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: Ruby bindings for the llama.cpp.
|