llama_cpp 0.15.2 → 0.15.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +49 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -0
- data/vendor/tmp/llama.cpp/Makefile +6 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +0 -54
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +72 -30
- data/vendor/tmp/llama.cpp/ggml-cuda.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +40 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +4 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +68 -70
- data/vendor/tmp/llama.cpp/ggml-metal.metal +24 -409
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +4 -1
- data/vendor/tmp/llama.cpp/ggml-quants.c +1879 -2450
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +176 -53
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +40 -500
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +202 -225
- data/vendor/tmp/llama.cpp/ggml.c +376 -758
- data/vendor/tmp/llama.cpp/ggml.h +39 -27
- data/vendor/tmp/llama.cpp/llama.cpp +823 -593
- data/vendor/tmp/llama.cpp/llama.h +10 -3
- metadata +3 -3
@@ -81,9 +81,10 @@ extern "C" {
|
|
81
81
|
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
82
82
|
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
83
83
|
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
84
|
-
|
85
|
-
|
86
|
-
|
84
|
+
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
85
|
+
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
86
|
+
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
87
|
+
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
87
88
|
};
|
88
89
|
|
89
90
|
// note: these values should be synchronized with ggml_rope
|
@@ -758,6 +759,12 @@ extern "C" {
|
|
758
759
|
// n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
|
759
760
|
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
|
760
761
|
|
762
|
+
// Get the number of threads used for generation of a single token.
|
763
|
+
LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
|
764
|
+
|
765
|
+
// Get the number of threads used for prompt and batch processing (multiple token).
|
766
|
+
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
|
767
|
+
|
761
768
|
// Set whether to use causal attention or not
|
762
769
|
// If set to true, the model will only attend to the past tokens
|
763
770
|
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
|
-
rubygems_version: 3.5.
|
102
|
+
rubygems_version: 3.5.10
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: Ruby bindings for the llama.cpp.
|