llama_cpp 0.9.1 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/ext/llama_cpp/src/ggml-alloc.c +383 -210
- data/ext/llama_cpp/src/ggml-alloc.h +68 -16
- data/ext/llama_cpp/src/ggml-backend-impl.h +87 -0
- data/ext/llama_cpp/src/ggml-backend.c +578 -13
- data/ext/llama_cpp/src/ggml-backend.h +70 -77
- data/ext/llama_cpp/src/ggml-cuda.cu +277 -53
- data/ext/llama_cpp/src/ggml-cuda.h +5 -0
- data/ext/llama_cpp/src/ggml-impl.h +13 -7
- data/ext/llama_cpp/src/ggml-metal.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.m +112 -30
- data/ext/llama_cpp/src/ggml-metal.metal +107 -1
- data/ext/llama_cpp/src/ggml-quants.c +173 -73
- data/ext/llama_cpp/src/ggml.c +877 -1707
- data/ext/llama_cpp/src/ggml.h +68 -45
- data/ext/llama_cpp/src/llama.cpp +475 -117
- data/ext/llama_cpp/src/llama.h +11 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +3 -2
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -175,11 +175,11 @@ extern "C" {
|
|
175
175
|
};
|
176
176
|
|
177
177
|
struct llama_context_params {
|
178
|
-
uint32_t seed;
|
179
|
-
uint32_t n_ctx;
|
180
|
-
uint32_t n_batch;
|
181
|
-
uint32_t n_threads;
|
182
|
-
uint32_t n_threads_batch;
|
178
|
+
uint32_t seed; // RNG seed, -1 for random
|
179
|
+
uint32_t n_ctx; // text context, 0 = from model
|
180
|
+
uint32_t n_batch; // prompt processing maximum batch size
|
181
|
+
uint32_t n_threads; // number of threads to use for generation
|
182
|
+
uint32_t n_threads_batch; // number of threads to use for batch processing
|
183
183
|
int8_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
184
184
|
|
185
185
|
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
@@ -517,6 +517,12 @@ extern "C" {
|
|
517
517
|
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
|
518
518
|
LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
|
519
519
|
|
520
|
+
// Returns -1 if unknown, 1 for true or 0 for false.
|
521
|
+
LLAMA_API int llama_add_bos_token(const struct llama_model * model);
|
522
|
+
|
523
|
+
// Returns -1 if unknown, 1 for true or 0 for false.
|
524
|
+
LLAMA_API int llama_add_eos_token(const struct llama_model * model);
|
525
|
+
|
520
526
|
// codellama infill tokens
|
521
527
|
LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
|
522
528
|
LLAMA_API llama_token llama_token_middle(const struct llama_model * model); // Beginning of infill middle
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.9.
|
6
|
+
VERSION = '0.9.3'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1523'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -94,6 +94,8 @@ module LLaMACpp
|
|
94
94
|
def token_bos: () -> Integer
|
95
95
|
def token_eos: () -> Integer
|
96
96
|
def token_nl: () -> Integer
|
97
|
+
def add_bos_token?: () -> bool
|
98
|
+
def add_eos_token?: () -> bool
|
97
99
|
def token_prefix: () -> Integer
|
98
100
|
def token_middle: () -> Integer
|
99
101
|
def token_suffix: () -> Integer
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- ext/llama_cpp/src/LICENSE
|
33
33
|
- ext/llama_cpp/src/ggml-alloc.c
|
34
34
|
- ext/llama_cpp/src/ggml-alloc.h
|
35
|
+
- ext/llama_cpp/src/ggml-backend-impl.h
|
35
36
|
- ext/llama_cpp/src/ggml-backend.c
|
36
37
|
- ext/llama_cpp/src/ggml-backend.h
|
37
38
|
- ext/llama_cpp/src/ggml-cuda.cu
|