llama_cpp 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +41 -21
- data/ext/llama_cpp/src/ggml-metal.m +44 -3
- data/ext/llama_cpp/src/ggml-metal.metal +162 -1
- data/ext/llama_cpp/src/ggml-opencl.cpp +30 -56
- data/ext/llama_cpp/src/ggml.c +13 -9
- data/ext/llama_cpp/src/ggml.h +3 -2
- data/ext/llama_cpp/src/k_quants.c +12 -20
- data/ext/llama_cpp/src/llama.cpp +359 -58
- data/ext/llama_cpp/src/llama.h +18 -12
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -4
- metadata +3 -3
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -133,11 +133,12 @@ extern "C" {
|
|
133
133
|
typedef struct llama_batch {
|
134
134
|
int32_t n_tokens;
|
135
135
|
|
136
|
-
llama_token *
|
137
|
-
float *
|
138
|
-
llama_pos *
|
139
|
-
|
140
|
-
|
136
|
+
llama_token * token;
|
137
|
+
float * embd;
|
138
|
+
llama_pos * pos;
|
139
|
+
int32_t * n_seq_id;
|
140
|
+
llama_seq_id ** seq_id;
|
141
|
+
int8_t * logits;
|
141
142
|
|
142
143
|
// NOTE: helpers for smooth API transition - can be deprecated in the future
|
143
144
|
// for future-proof code, use the above fields instead and ignore everything below
|
@@ -446,7 +447,8 @@ extern "C" {
|
|
446
447
|
llama_pos pos_0,
|
447
448
|
llama_seq_id seq_id);
|
448
449
|
|
449
|
-
// Allocates a batch of tokens on the heap
|
450
|
+
// Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
|
451
|
+
// Each token can be assigned up to n_seq_max sequence ids
|
450
452
|
// The batch has to be freed with llama_batch_free()
|
451
453
|
// If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
|
452
454
|
// Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
|
@@ -454,7 +456,8 @@ extern "C" {
|
|
454
456
|
// All members are left uninitialized
|
455
457
|
LLAMA_API struct llama_batch llama_batch_init(
|
456
458
|
int32_t n_tokens,
|
457
|
-
int32_t embd
|
459
|
+
int32_t embd,
|
460
|
+
int32_t n_seq_max);
|
458
461
|
|
459
462
|
// Frees a batch of tokens allocated with llama_batch_init()
|
460
463
|
LLAMA_API void llama_batch_free(struct llama_batch batch);
|
@@ -511,17 +514,20 @@ extern "C" {
|
|
511
514
|
// Tokenization
|
512
515
|
//
|
513
516
|
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
517
|
+
/// @details Convert the provided text into tokens.
|
518
|
+
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
519
|
+
/// @return Returns the number of tokens on success, no more than n_max_tokens
|
520
|
+
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
521
|
+
/// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
|
522
|
+
/// Does not insert a leading space.
|
518
523
|
LLAMA_API int llama_tokenize(
|
519
524
|
const struct llama_model * model,
|
520
525
|
const char * text,
|
521
526
|
int text_len,
|
522
527
|
llama_token * tokens,
|
523
528
|
int n_max_tokens,
|
524
|
-
bool add_bos
|
529
|
+
bool add_bos,
|
530
|
+
bool special);
|
525
531
|
|
526
532
|
// Token Id -> Piece.
|
527
533
|
// Uses the vocabulary in the provided context.
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.8.0'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1405'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -78,7 +78,7 @@ module LLaMACpp
|
|
78
78
|
def n_embd: () -> Integer
|
79
79
|
def rope_freq_scale_train: () -> Float
|
80
80
|
def token_to_piece: (Integer) -> String
|
81
|
-
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
|
81
|
+
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
|
82
82
|
def desc: () -> String
|
83
83
|
def size: () -> Integer
|
84
84
|
def n_params: () -> Integer
|
@@ -117,7 +117,7 @@ module LLaMACpp
|
|
117
117
|
class Batch
|
118
118
|
public
|
119
119
|
|
120
|
-
def initialize: (n_tokens: Integer, embd: Integer) -> void
|
120
|
+
def initialize: (n_tokens: Integer, embd: Integer, n_seq_max: Integer) -> void
|
121
121
|
def n_tokens=: (Integer) -> Integer
|
122
122
|
def n_tokens: () -> Integer
|
123
123
|
def all_pos_zero=: (Integer) -> Integer
|
@@ -130,8 +130,8 @@ module LLaMACpp
|
|
130
130
|
def get_token: (Integer) -> Integer
|
131
131
|
def set_pos: (Integer, Integer) -> Integer
|
132
132
|
def get_pos: (Integer) -> Integer
|
133
|
-
def set_seq_id: (Integer, Integer) -> Integer
|
134
|
-
def get_seq_id: (Integer) -> Integer
|
133
|
+
def set_seq_id: (Integer, Integer, Integer) -> Integer
|
134
|
+
def get_seq_id: (Integer, Integer) -> Integer
|
135
135
|
def set_logit: (Integer, bool) -> bool
|
136
136
|
def get_logit: (Integer) -> bool
|
137
137
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -78,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: '0'
|
80
80
|
requirements: []
|
81
|
-
rubygems_version: 3.4.
|
81
|
+
rubygems_version: 3.4.20
|
82
82
|
signing_key:
|
83
83
|
specification_version: 4
|
84
84
|
summary: Ruby bindings for the llama.cpp.
|