llama_cpp 0.5.3 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +6 -5
- data/examples/chat.rb +13 -13
- data/examples/embedding.rb +9 -9
- data/ext/llama_cpp/llama_cpp.cpp +583 -262
- data/ext/llama_cpp/src/ggml-alloc.c +8 -2
- data/ext/llama_cpp/src/ggml-alloc.h +1 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +326 -149
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +167 -89
- data/ext/llama_cpp/src/ggml-metal.metal +130 -40
- data/ext/llama_cpp/src/ggml-opencl.cpp +119 -53
- data/ext/llama_cpp/src/ggml.c +2355 -1166
- data/ext/llama_cpp/src/ggml.h +129 -35
- data/ext/llama_cpp/src/k_quants.c +744 -2
- data/ext/llama_cpp/src/llama.cpp +1766 -671
- data/ext/llama_cpp/src/llama.h +321 -120
- data/ext/llama_cpp/src/unicode.h +462 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +6 -10
- data/sig/llama_cpp.rbs +70 -34
- metadata +4 -3
data/sig/llama_cpp.rbs
CHANGED
@@ -67,16 +67,16 @@ module LLaMACpp
|
|
67
67
|
class Model
|
68
68
|
public
|
69
69
|
|
70
|
-
def initialize: (model_path: String, params: ::LLaMACpp::
|
70
|
+
def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
|
71
71
|
| () -> void
|
72
72
|
def empty?: () -> bool
|
73
73
|
def free: () -> void
|
74
|
-
def load: (model_path: String, params: ::LLaMACpp::
|
75
|
-
def apply_lora_from_file: (lora_path: String, ?base_model_path: String, ?n_threads: Integer) -> void
|
74
|
+
def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
|
75
|
+
def apply_lora_from_file: (lora_path: String, ?scale: Float, ?base_model_path: String, ?n_threads: Integer) -> void
|
76
76
|
def n_vocab: () -> Integer
|
77
|
-
def n_ctx: () -> Integer
|
78
77
|
def n_ctx_train: () -> Integer
|
79
78
|
def n_embd: () -> Integer
|
79
|
+
def rope_freq_scale_train: () -> Float
|
80
80
|
def token_to_piece: (Integer) -> String
|
81
81
|
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
|
82
82
|
def desc: () -> String
|
@@ -98,10 +98,50 @@ module LLaMACpp
|
|
98
98
|
def n_eval: () -> Integer
|
99
99
|
end
|
100
100
|
|
101
|
+
class ModelParams
|
102
|
+
public
|
103
|
+
|
104
|
+
def n_gpu_layers: () -> Integer
|
105
|
+
def n_gpu_layers=: (Integer) -> Integer
|
106
|
+
def main_gpu: () -> Integer
|
107
|
+
def main_gpu=: (Integer) -> Integer
|
108
|
+
def tensor_split: () -> Array[Float]
|
109
|
+
def vocab_only: () -> bool
|
110
|
+
def vocab_only=: (bool) -> bool
|
111
|
+
def use_mmap: () -> bool
|
112
|
+
def use_mmap=: (bool) -> bool
|
113
|
+
def use_mlock: () -> bool
|
114
|
+
def use_mlock=: (bool) -> bool
|
115
|
+
end
|
116
|
+
|
117
|
+
class Batch
|
118
|
+
public
|
119
|
+
|
120
|
+
def initialize: (n_tokens: Integer, embd: Integer) -> void
|
121
|
+
def n_tokens=: (Integer) -> Integer
|
122
|
+
def n_tokens: () -> Integer
|
123
|
+
def all_pos_zero=: (Integer) -> Integer
|
124
|
+
def all_pos_zero: () -> Integer
|
125
|
+
def all_pos_one=: (Integer) -> Integer
|
126
|
+
def all_pos_one: () -> Integer
|
127
|
+
def all_seq_id=: (Integer) -> Integer
|
128
|
+
def all_seq_id: () -> Integer
|
129
|
+
def set_token: (Integer, Integer) -> Integer
|
130
|
+
def get_token: (Integer) -> Integer
|
131
|
+
def set_pos: (Integer, Integer) -> Integer
|
132
|
+
def get_pos: (Integer) -> Integer
|
133
|
+
def set_seq_id: (Integer, Integer) -> Integer
|
134
|
+
def get_seq_id: (Integer) -> Integer
|
135
|
+
def set_logit: (Integer, bool) -> bool
|
136
|
+
def get_logit: (Integer) -> bool
|
137
|
+
end
|
138
|
+
|
101
139
|
class Context
|
102
140
|
public
|
103
141
|
|
104
|
-
|
142
|
+
attr_reader model: ::LLaMACpp::Model
|
143
|
+
|
144
|
+
def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
|
105
145
|
def embeddings: () -> Array[Float]
|
106
146
|
def text: (Integer) -> String
|
107
147
|
def score: (Integer) -> Float
|
@@ -109,20 +149,24 @@ module LLaMACpp
|
|
109
149
|
def token_bos: () -> Integer
|
110
150
|
def token_eos: () -> Integer
|
111
151
|
def token_nl: () -> Integer
|
112
|
-
def
|
113
|
-
def
|
114
|
-
def
|
152
|
+
def token_prefix: () -> Integer
|
153
|
+
def token_middle: () -> Integer
|
154
|
+
def token_suffix: () -> Integer
|
155
|
+
def token_eot: () -> Integer
|
156
|
+
def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
|
157
|
+
def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
|
158
|
+
def decode: (::LLaMACpp::Batch) -> void
|
115
159
|
def logits: () -> Array[Float]
|
116
160
|
def n_ctx: () -> Integer
|
117
|
-
def n_ctx_train: () -> Integer
|
118
|
-
def n_embd: () -> Integer
|
119
|
-
def n_vocab: () -> Integer
|
120
161
|
def timings: () -> ::LLaMACpp::Timings
|
121
162
|
def print_timings: () -> void
|
122
163
|
def reset_timings: () -> void
|
123
|
-
def token_to_piece: (Integer) -> String
|
124
|
-
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
|
125
164
|
def kv_cache_token_count: () -> Integer
|
165
|
+
def kv_cache_tokens_rm: (Integer, Integer) -> void
|
166
|
+
def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
|
167
|
+
def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
|
168
|
+
def kv_cache_seq_keep: (Integer) -> void
|
169
|
+
def kv_cache_seq_shift: (Integer, Integer, Ingteger, Integer) -> void
|
126
170
|
def set_rng_seed: (Integer) -> void
|
127
171
|
def load_session_file: (session_path: String) -> void
|
128
172
|
def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
|
@@ -134,6 +178,7 @@ module LLaMACpp
|
|
134
178
|
def sample_top_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
|
135
179
|
def sample_tail_free: (::LLaMACpp::TokenDataArray, z: Float, ?min_keep: Integer) -> void
|
136
180
|
def sample_typical: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
|
181
|
+
def sample_temp: (::LLaMACpp::TokenDataArray, temp: Float) -> void
|
137
182
|
def sample_temperature: (::LLaMACpp::TokenDataArray, temperature: Float) -> void
|
138
183
|
def sample_token_mirostat: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, m: Integer, mu: Float) -> [Integer, Float]
|
139
184
|
def sample_token_mirostat_v2: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, mu: Float) -> [Integer, Float]
|
@@ -146,37 +191,28 @@ module LLaMACpp
|
|
146
191
|
class ContextParams
|
147
192
|
public
|
148
193
|
|
149
|
-
def
|
150
|
-
def
|
151
|
-
def f16_kv: () -> bool
|
152
|
-
def f16_kv=: (bool) -> bool
|
153
|
-
def logits_all: () -> bool
|
154
|
-
def logits_all=: (bool) -> bool
|
194
|
+
def seed: () -> Integer
|
195
|
+
def seed=: (Integer) -> Integer
|
155
196
|
def n_ctx: () -> Integer
|
156
197
|
def n_ctx=: (Integer) -> Integer
|
157
198
|
def n_batch: () -> Integer
|
158
199
|
def n_batch=: (Integer) -> Integer
|
159
|
-
def
|
160
|
-
def
|
161
|
-
def
|
162
|
-
def
|
163
|
-
def tensor_split: () -> Array[Float]
|
200
|
+
def n_threads: () -> Integer
|
201
|
+
def n_threads=: (Integer) -> Integer
|
202
|
+
def n_threads_batch: () -> Integer
|
203
|
+
def n_threads_batch=: (Integer) -> Integer
|
164
204
|
def rope_freq_base=: (Float) -> Float
|
165
205
|
def rope_freq_base: () -> Float
|
166
206
|
def rope_freq_scale=: (Float) -> Float
|
167
207
|
def rope_freq_scale: () -> Float
|
168
|
-
def low_vram: () -> bool
|
169
|
-
def low_vram=: (bool) -> bool
|
170
208
|
def mul_mat_q: () -> bool
|
171
209
|
def mul_mat_q=: (bool) -> bool
|
172
|
-
def
|
173
|
-
def
|
174
|
-
def
|
175
|
-
def
|
176
|
-
def
|
177
|
-
def
|
178
|
-
def vocab_only: () -> bool
|
179
|
-
def vocab_only=: (bool) -> bool
|
210
|
+
def f16_kv: () -> bool
|
211
|
+
def f16_kv=: (bool) -> bool
|
212
|
+
def logits_all: () -> bool
|
213
|
+
def logits_all=: (bool) -> bool
|
214
|
+
def embedding: () -> bool
|
215
|
+
def embedding=: (bool) -> bool
|
180
216
|
end
|
181
217
|
|
182
218
|
class ModelQuantizeParams
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -48,6 +48,7 @@ files:
|
|
48
48
|
- ext/llama_cpp/src/llama-util.h
|
49
49
|
- ext/llama_cpp/src/llama.cpp
|
50
50
|
- ext/llama_cpp/src/llama.h
|
51
|
+
- ext/llama_cpp/src/unicode.h
|
51
52
|
- lib/llama_cpp.rb
|
52
53
|
- lib/llama_cpp/version.rb
|
53
54
|
- sig/llama_cpp.rbs
|
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
76
|
- !ruby/object:Gem::Version
|
76
77
|
version: '0'
|
77
78
|
requirements: []
|
78
|
-
rubygems_version: 3.
|
79
|
+
rubygems_version: 3.4.19
|
79
80
|
signing_key:
|
80
81
|
specification_version: 4
|
81
82
|
summary: Ruby bindings for the llama.cpp.
|