llama_cpp 0.15.2 → 0.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +49 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -0
- data/vendor/tmp/llama.cpp/Makefile +6 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +0 -54
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +72 -30
- data/vendor/tmp/llama.cpp/ggml-cuda.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-impl.h +40 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +4 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +68 -70
- data/vendor/tmp/llama.cpp/ggml-metal.metal +24 -409
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +4 -1
- data/vendor/tmp/llama.cpp/ggml-quants.c +1879 -2450
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +176 -53
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +40 -500
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +202 -225
- data/vendor/tmp/llama.cpp/ggml.c +376 -758
- data/vendor/tmp/llama.cpp/ggml.h +39 -27
- data/vendor/tmp/llama.cpp/llama.cpp +823 -593
- data/vendor/tmp/llama.cpp/llama.h +10 -3
- metadata +3 -3
@@ -81,9 +81,10 @@ extern "C" {
|
|
81
81
|
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
82
82
|
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
83
83
|
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
84
|
-
|
85
|
-
|
86
|
-
|
84
|
+
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
|
85
|
+
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
|
86
|
+
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
|
87
|
+
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
87
88
|
};
|
88
89
|
|
89
90
|
// note: these values should be synchronized with ggml_rope
|
@@ -758,6 +759,12 @@ extern "C" {
|
|
758
759
|
// n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
|
759
760
|
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
|
760
761
|
|
762
|
+
// Get the number of threads used for generation of a single token.
|
763
|
+
LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
|
764
|
+
|
765
|
+
// Get the number of threads used for prompt and batch processing (multiple token).
|
766
|
+
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
|
767
|
+
|
761
768
|
// Set whether to use causal attention or not
|
762
769
|
// If set to true, the model will only attend to the past tokens
|
763
770
|
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
|
-
rubygems_version: 3.5.
|
102
|
+
rubygems_version: 3.5.10
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: Ruby bindings for the llama.cpp.
|