llama_cpp 0.5.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +6 -5
- data/examples/chat.rb +13 -13
- data/examples/embedding.rb +9 -9
- data/ext/llama_cpp/llama_cpp.cpp +583 -262
- data/ext/llama_cpp/src/ggml-alloc.c +8 -2
- data/ext/llama_cpp/src/ggml-alloc.h +1 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +326 -149
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +167 -89
- data/ext/llama_cpp/src/ggml-metal.metal +130 -40
- data/ext/llama_cpp/src/ggml-opencl.cpp +119 -53
- data/ext/llama_cpp/src/ggml.c +2355 -1166
- data/ext/llama_cpp/src/ggml.h +129 -35
- data/ext/llama_cpp/src/k_quants.c +744 -2
- data/ext/llama_cpp/src/llama.cpp +1766 -671
- data/ext/llama_cpp/src/llama.h +321 -120
- data/ext/llama_cpp/src/unicode.h +462 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +6 -10
- data/sig/llama_cpp.rbs +70 -34
- metadata +4 -3
data/sig/llama_cpp.rbs
CHANGED
@@ -67,16 +67,16 @@ module LLaMACpp
|
|
67
67
|
class Model
|
68
68
|
public
|
69
69
|
|
70
|
-
def initialize: (model_path: String, params: ::LLaMACpp::
|
70
|
+
def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
|
71
71
|
| () -> void
|
72
72
|
def empty?: () -> bool
|
73
73
|
def free: () -> void
|
74
|
-
def load: (model_path: String, params: ::LLaMACpp::
|
75
|
-
def apply_lora_from_file: (lora_path: String, ?base_model_path: String, ?n_threads: Integer) -> void
|
74
|
+
def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
|
75
|
+
def apply_lora_from_file: (lora_path: String, ?scale: Float, ?base_model_path: String, ?n_threads: Integer) -> void
|
76
76
|
def n_vocab: () -> Integer
|
77
|
-
def n_ctx: () -> Integer
|
78
77
|
def n_ctx_train: () -> Integer
|
79
78
|
def n_embd: () -> Integer
|
79
|
+
def rope_freq_scale_train: () -> Float
|
80
80
|
def token_to_piece: (Integer) -> String
|
81
81
|
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
|
82
82
|
def desc: () -> String
|
@@ -98,10 +98,50 @@ module LLaMACpp
|
|
98
98
|
def n_eval: () -> Integer
|
99
99
|
end
|
100
100
|
|
101
|
+
class ModelParams
|
102
|
+
public
|
103
|
+
|
104
|
+
def n_gpu_layers: () -> Integer
|
105
|
+
def n_gpu_layers=: (Integer) -> Integer
|
106
|
+
def main_gpu: () -> Integer
|
107
|
+
def main_gpu=: (Integer) -> Integer
|
108
|
+
def tensor_split: () -> Array[Float]
|
109
|
+
def vocab_only: () -> bool
|
110
|
+
def vocab_only=: (bool) -> bool
|
111
|
+
def use_mmap: () -> bool
|
112
|
+
def use_mmap=: (bool) -> bool
|
113
|
+
def use_mlock: () -> bool
|
114
|
+
def use_mlock=: (bool) -> bool
|
115
|
+
end
|
116
|
+
|
117
|
+
class Batch
|
118
|
+
public
|
119
|
+
|
120
|
+
def initialize: (n_tokens: Integer, embd: Integer) -> void
|
121
|
+
def n_tokens=: (Integer) -> Integer
|
122
|
+
def n_tokens: () -> Integer
|
123
|
+
def all_pos_zero=: (Integer) -> Integer
|
124
|
+
def all_pos_zero: () -> Integer
|
125
|
+
def all_pos_one=: (Integer) -> Integer
|
126
|
+
def all_pos_one: () -> Integer
|
127
|
+
def all_seq_id=: (Integer) -> Integer
|
128
|
+
def all_seq_id: () -> Integer
|
129
|
+
def set_token: (Integer, Integer) -> Integer
|
130
|
+
def get_token: (Integer) -> Integer
|
131
|
+
def set_pos: (Integer, Integer) -> Integer
|
132
|
+
def get_pos: (Integer) -> Integer
|
133
|
+
def set_seq_id: (Integer, Integer) -> Integer
|
134
|
+
def get_seq_id: (Integer) -> Integer
|
135
|
+
def set_logit: (Integer, bool) -> bool
|
136
|
+
def get_logit: (Integer) -> bool
|
137
|
+
end
|
138
|
+
|
101
139
|
class Context
|
102
140
|
public
|
103
141
|
|
104
|
-
|
142
|
+
attr_reader model: ::LLaMACpp::Model
|
143
|
+
|
144
|
+
def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
|
105
145
|
def embeddings: () -> Array[Float]
|
106
146
|
def text: (Integer) -> String
|
107
147
|
def score: (Integer) -> Float
|
@@ -109,20 +149,24 @@ module LLaMACpp
|
|
109
149
|
def token_bos: () -> Integer
|
110
150
|
def token_eos: () -> Integer
|
111
151
|
def token_nl: () -> Integer
|
112
|
-
def
|
113
|
-
def
|
114
|
-
def
|
152
|
+
def token_prefix: () -> Integer
|
153
|
+
def token_middle: () -> Integer
|
154
|
+
def token_suffix: () -> Integer
|
155
|
+
def token_eot: () -> Integer
|
156
|
+
def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
|
157
|
+
def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
|
158
|
+
def decode: (::LLaMACpp::Batch) -> void
|
115
159
|
def logits: () -> Array[Float]
|
116
160
|
def n_ctx: () -> Integer
|
117
|
-
def n_ctx_train: () -> Integer
|
118
|
-
def n_embd: () -> Integer
|
119
|
-
def n_vocab: () -> Integer
|
120
161
|
def timings: () -> ::LLaMACpp::Timings
|
121
162
|
def print_timings: () -> void
|
122
163
|
def reset_timings: () -> void
|
123
|
-
def token_to_piece: (Integer) -> String
|
124
|
-
def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
|
125
164
|
def kv_cache_token_count: () -> Integer
|
165
|
+
def kv_cache_tokens_rm: (Integer, Integer) -> void
|
166
|
+
def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
|
167
|
+
def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
|
168
|
+
def kv_cache_seq_keep: (Integer) -> void
|
169
|
+
def kv_cache_seq_shift: (Integer, Integer, Ingteger, Integer) -> void
|
126
170
|
def set_rng_seed: (Integer) -> void
|
127
171
|
def load_session_file: (session_path: String) -> void
|
128
172
|
def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
|
@@ -134,6 +178,7 @@ module LLaMACpp
|
|
134
178
|
def sample_top_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
|
135
179
|
def sample_tail_free: (::LLaMACpp::TokenDataArray, z: Float, ?min_keep: Integer) -> void
|
136
180
|
def sample_typical: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
|
181
|
+
def sample_temp: (::LLaMACpp::TokenDataArray, temp: Float) -> void
|
137
182
|
def sample_temperature: (::LLaMACpp::TokenDataArray, temperature: Float) -> void
|
138
183
|
def sample_token_mirostat: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, m: Integer, mu: Float) -> [Integer, Float]
|
139
184
|
def sample_token_mirostat_v2: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, mu: Float) -> [Integer, Float]
|
@@ -146,37 +191,28 @@ module LLaMACpp
|
|
146
191
|
class ContextParams
|
147
192
|
public
|
148
193
|
|
149
|
-
def
|
150
|
-
def
|
151
|
-
def f16_kv: () -> bool
|
152
|
-
def f16_kv=: (bool) -> bool
|
153
|
-
def logits_all: () -> bool
|
154
|
-
def logits_all=: (bool) -> bool
|
194
|
+
def seed: () -> Integer
|
195
|
+
def seed=: (Integer) -> Integer
|
155
196
|
def n_ctx: () -> Integer
|
156
197
|
def n_ctx=: (Integer) -> Integer
|
157
198
|
def n_batch: () -> Integer
|
158
199
|
def n_batch=: (Integer) -> Integer
|
159
|
-
def
|
160
|
-
def
|
161
|
-
def
|
162
|
-
def
|
163
|
-
def tensor_split: () -> Array[Float]
|
200
|
+
def n_threads: () -> Integer
|
201
|
+
def n_threads=: (Integer) -> Integer
|
202
|
+
def n_threads_batch: () -> Integer
|
203
|
+
def n_threads_batch=: (Integer) -> Integer
|
164
204
|
def rope_freq_base=: (Float) -> Float
|
165
205
|
def rope_freq_base: () -> Float
|
166
206
|
def rope_freq_scale=: (Float) -> Float
|
167
207
|
def rope_freq_scale: () -> Float
|
168
|
-
def low_vram: () -> bool
|
169
|
-
def low_vram=: (bool) -> bool
|
170
208
|
def mul_mat_q: () -> bool
|
171
209
|
def mul_mat_q=: (bool) -> bool
|
172
|
-
def
|
173
|
-
def
|
174
|
-
def
|
175
|
-
def
|
176
|
-
def
|
177
|
-
def
|
178
|
-
def vocab_only: () -> bool
|
179
|
-
def vocab_only=: (bool) -> bool
|
210
|
+
def f16_kv: () -> bool
|
211
|
+
def f16_kv=: (bool) -> bool
|
212
|
+
def logits_all: () -> bool
|
213
|
+
def logits_all=: (bool) -> bool
|
214
|
+
def embedding: () -> bool
|
215
|
+
def embedding=: (bool) -> bool
|
180
216
|
end
|
181
217
|
|
182
218
|
class ModelQuantizeParams
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -48,6 +48,7 @@ files:
|
|
48
48
|
- ext/llama_cpp/src/llama-util.h
|
49
49
|
- ext/llama_cpp/src/llama.cpp
|
50
50
|
- ext/llama_cpp/src/llama.h
|
51
|
+
- ext/llama_cpp/src/unicode.h
|
51
52
|
- lib/llama_cpp.rb
|
52
53
|
- lib/llama_cpp/version.rb
|
53
54
|
- sig/llama_cpp.rbs
|
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
76
|
- !ruby/object:Gem::Version
|
76
77
|
version: '0'
|
77
78
|
requirements: []
|
78
|
-
rubygems_version: 3.
|
79
|
+
rubygems_version: 3.4.19
|
79
80
|
signing_key:
|
80
81
|
specification_version: 4
|
81
82
|
summary: Ruby bindings for the llama.cpp.
|