llama_cpp 0.15.2 → 0.15.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/ext/llama_cpp/llama_cpp.cpp +61 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +6 -0
- data/vendor/tmp/llama.cpp/Makefile +8 -16
- data/vendor/tmp/llama.cpp/ggml-common.h +0 -54
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +99 -40
- data/vendor/tmp/llama.cpp/ggml-cuda.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +44 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +4 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +133 -81
- data/vendor/tmp/llama.cpp/ggml-metal.metal +91 -434
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +4 -1
- data/vendor/tmp/llama.cpp/ggml-quants.c +1962 -2443
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +248 -108
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +375 -657
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +204 -225
- data/vendor/tmp/llama.cpp/ggml.c +498 -836
- data/vendor/tmp/llama.cpp/ggml.h +57 -30
- data/vendor/tmp/llama.cpp/llama.cpp +1477 -859
- data/vendor/tmp/llama.cpp/llama.h +21 -8
- metadata +3 -3
@@ -81,9 +81,11 @@ extern "C" {
|
|
81
81
|
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
82
82
|
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
83
83
|
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
84
|
-
|
85
|
-
|
86
|
-
|
84
|
+
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
85
|
+
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
86
|
+
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
87
|
+
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
88
|
+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
87
89
|
};
|
88
90
|
|
89
91
|
// note: these values should be synchronized with ggml_rope
|
@@ -263,6 +265,8 @@ extern "C" {
|
|
263
265
|
bool check_tensors; // validate model tensor data
|
264
266
|
};
|
265
267
|
|
268
|
+
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
269
|
+
// https://github.com/ggerganov/llama.cpp/pull/7544
|
266
270
|
struct llama_context_params {
|
267
271
|
uint32_t seed; // RNG seed, -1 for random
|
268
272
|
uint32_t n_ctx; // text context, 0 = from model
|
@@ -289,14 +293,14 @@ extern "C" {
|
|
289
293
|
ggml_backend_sched_eval_callback cb_eval;
|
290
294
|
void * cb_eval_user_data;
|
291
295
|
|
292
|
-
enum ggml_type type_k; // data type for K cache
|
293
|
-
enum ggml_type type_v; // data type for V cache
|
296
|
+
enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
|
297
|
+
enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
|
294
298
|
|
295
299
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
296
300
|
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
297
301
|
bool embeddings; // if true, extract embeddings (together with logits)
|
298
302
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
299
|
-
bool flash_attn; // whether to use flash attention
|
303
|
+
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
|
300
304
|
|
301
305
|
// Abort callback
|
302
306
|
// if it returns true, execution of llama_decode() will be aborted
|
@@ -420,8 +424,8 @@ extern "C" {
|
|
420
424
|
|
421
425
|
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
|
422
426
|
|
423
|
-
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model
|
424
|
-
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model
|
427
|
+
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
|
428
|
+
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
425
429
|
|
426
430
|
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
427
431
|
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
@@ -758,6 +762,12 @@ extern "C" {
|
|
758
762
|
// n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
|
759
763
|
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
|
760
764
|
|
765
|
+
// Get the number of threads used for generation of a single token.
|
766
|
+
LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
|
767
|
+
|
768
|
+
// Get the number of threads used for prompt and batch processing (multiple token).
|
769
|
+
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
|
770
|
+
|
761
771
|
// Set whether to use causal attention or not
|
762
772
|
// If set to true, the model will only attend to the past tokens
|
763
773
|
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
@@ -816,6 +826,9 @@ extern "C" {
|
|
816
826
|
// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
|
817
827
|
LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
|
818
828
|
|
829
|
+
// Identify if Token Id is a control token or a render-able token
|
830
|
+
LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
|
831
|
+
|
819
832
|
// Special tokens
|
820
833
|
LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
|
821
834
|
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
|
-
rubygems_version: 3.5.
|
102
|
+
rubygems_version: 3.5.10
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: Ruby bindings for the llama.cpp.
|