llama_cpp 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/llama_cpp/llama_cpp.cpp +41 -21
- data/ext/llama_cpp/src/ggml-metal.m +44 -3
- data/ext/llama_cpp/src/ggml-metal.metal +162 -1
- data/ext/llama_cpp/src/ggml-opencl.cpp +30 -56
- data/ext/llama_cpp/src/ggml.c +13 -9
- data/ext/llama_cpp/src/ggml.h +3 -2
- data/ext/llama_cpp/src/k_quants.c +12 -20
- data/ext/llama_cpp/src/llama.cpp +359 -58
- data/ext/llama_cpp/src/llama.h +18 -12
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -4
- metadata +3 -3
data/ext/llama_cpp/src/llama.h
CHANGED
@@ -133,11 +133,12 @@ extern "C" {
|
|
133
133
|
typedef struct llama_batch {
|
134
134
|
int32_t n_tokens;
|
135
135
|
|
136
|
-
llama_token *
|
137
|
-
float *
|
138
|
-
llama_pos *
|
139
|
-
|
140
|
-
|
136
|
+
llama_token * token;
|
137
|
+
float * embd;
|
138
|
+
llama_pos * pos;
|
139
|
+
int32_t * n_seq_id;
|
140
|
+
llama_seq_id ** seq_id;
|
141
|
+
int8_t * logits;
|
141
142
|
|
142
143
|
// NOTE: helpers for smooth API transition - can be deprecated in the future
|
143
144
|
// for future-proof code, use the above fields instead and ignore everything below
|
@@ -446,7 +447,8 @@ extern "C" {
|
|
446
447
|
llama_pos pos_0,
|
447
448
|
llama_seq_id seq_id);
|
448
449
|
|
449
|
-
// Allocates a batch of tokens on the heap
|
450
|
+
// Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
|
451
|
+
// Each token can be assigned up to n_seq_max sequence ids
|
450
452
|
// The batch has to be freed with llama_batch_free()
|
451
453
|
// If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
|
452
454
|
// Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
|
@@ -454,7 +456,8 @@ extern "C" {
|
|
454
456
|
// All members are left uninitialized
|
455
457
|
LLAMA_API struct llama_batch llama_batch_init(
|
456
458
|
int32_t n_tokens,
|
457
|
-
int32_t embd
|
459
|
+
int32_t embd,
|
460
|
+
int32_t n_seq_max);
|
458
461
|
|
459
462
|
// Frees a batch of tokens allocated with llama_batch_init()
|
460
463
|
LLAMA_API void llama_batch_free(struct llama_batch batch);
|
@@ -511,17 +514,20 @@ extern "C" {
|
|
511
514
|
// Tokenization
|
512
515
|
//
|
513
516
|
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
517
|
+
/// @details Convert the provided text into tokens.
|
518
|
+
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
519
|
+
/// @return Returns the number of tokens on success, no more than n_max_tokens
|
520
|
+
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
521
|
+
/// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
|
522
|
+
/// Does not insert a leading space.
|
518
523
|
LLAMA_API int llama_tokenize(
|
519
524
|
const struct llama_model * model,
|
520
525
|
const char * text,
|
521
526
|
int text_len,
|
522
527
|
llama_token * tokens,
|
523
528
|
int n_max_tokens,
|
524
|
-
bool add_bos
|
529
|
+
bool add_bos,
|
530
|
+
bool special);
|
525
531
|
|
526
532
|
// Token Id -> Piece.
|
527
533
|
// Uses the vocabulary in the provided context.
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.8.0'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b1405'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -78,7 +78,7 @@ module LLaMACpp
|
|
78
78
|
def n_embd: () -> Integer
|
79
79
|
def rope_freq_scale_train: () -> Float
|
80
80
|
def token_to_piece: (Integer) -> String
|
81
|
-
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
|
81
|
+
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
|
82
82
|
def desc: () -> String
|
83
83
|
def size: () -> Integer
|
84
84
|
def n_params: () -> Integer
|
@@ -117,7 +117,7 @@ module LLaMACpp
|
|
117
117
|
class Batch
|
118
118
|
public
|
119
119
|
|
120
|
-
def initialize: (n_tokens: Integer, embd: Integer) -> void
|
120
|
+
def initialize: (n_tokens: Integer, embd: Integer, n_seq_max: Integer) -> void
|
121
121
|
def n_tokens=: (Integer) -> Integer
|
122
122
|
def n_tokens: () -> Integer
|
123
123
|
def all_pos_zero=: (Integer) -> Integer
|
@@ -130,8 +130,8 @@ module LLaMACpp
|
|
130
130
|
def get_token: (Integer) -> Integer
|
131
131
|
def set_pos: (Integer, Integer) -> Integer
|
132
132
|
def get_pos: (Integer) -> Integer
|
133
|
-
def set_seq_id: (Integer, Integer) -> Integer
|
134
|
-
def get_seq_id: (Integer) -> Integer
|
133
|
+
def set_seq_id: (Integer, Integer, Integer) -> Integer
|
134
|
+
def get_seq_id: (Integer, Integer) -> Integer
|
135
135
|
def set_logit: (Integer, bool) -> bool
|
136
136
|
def get_logit: (Integer) -> bool
|
137
137
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -78,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: '0'
|
80
80
|
requirements: []
|
81
|
-
rubygems_version: 3.4.
|
81
|
+
rubygems_version: 3.4.20
|
82
82
|
signing_key:
|
83
83
|
specification_version: 4
|
84
84
|
summary: Ruby bindings for the llama.cpp.
|