llama_cpp 0.9.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,7 @@
42
42
  #define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
43
43
 
44
44
  #define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
45
- #define LLAMA_SESSION_VERSION 2
45
+ #define LLAMA_SESSION_VERSION 3
46
46
 
47
47
  #if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
48
48
  // Defined when llama.cpp is compiled with support for offloading model layers to GPU.
@@ -158,6 +158,22 @@ extern "C" {
158
158
  llama_seq_id all_seq_id; // used if seq_id == NULL
159
159
  } llama_batch;
160
160
 
161
+ enum llama_model_kv_override_type {
162
+ LLAMA_KV_OVERRIDE_INT,
163
+ LLAMA_KV_OVERRIDE_FLOAT,
164
+ LLAMA_KV_OVERRIDE_BOOL,
165
+ };
166
+
167
+ struct llama_model_kv_override {
168
+ char key[128];
169
+ enum llama_model_kv_override_type tag;
170
+ union {
171
+ int64_t int_value;
172
+ double float_value;
173
+ bool bool_value;
174
+ };
175
+ };
176
+
161
177
  struct llama_model_params {
162
178
  int32_t n_gpu_layers; // number of layers to store in VRAM
163
179
  int32_t main_gpu; // the GPU that is used for scratch and small tensors
@@ -165,9 +181,13 @@ extern "C" {
165
181
 
166
182
  // called with a progress value between 0 and 1, pass NULL to disable
167
183
  llama_progress_callback progress_callback;
184
+
168
185
  // context pointer passed to the progress callback
169
186
  void * progress_callback_user_data;
170
187
 
188
+ // override key-value pairs of the model meta data
189
+ const struct llama_model_kv_override * kv_overrides;
190
+
171
191
  // Keep the booleans together to avoid misalignment during copy-by-value.
172
192
  bool vocab_only; // only load the vocabulary, no weights
173
193
  bool use_mmap; // use mmap if possible
@@ -191,11 +211,14 @@ extern "C" {
191
211
  float yarn_beta_slow; // YaRN high correction dim
192
212
  uint32_t yarn_orig_ctx; // YaRN original context size
193
213
 
214
+ enum ggml_type type_k; // data type for K cache
215
+ enum ggml_type type_v; // data type for V cache
216
+
194
217
  // Keep the booleans together to avoid misalignment during copy-by-value.
195
- bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
196
- bool f16_kv; // use fp16 for KV cache, fp32 otherwise
197
- bool logits_all; // the llama_eval() call computes all logits, not just the last one
198
- bool embedding; // embedding mode only
218
+ bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
219
+ bool logits_all; // the llama_eval() call computes all logits, not just the last one
220
+ bool embedding; // embedding mode only
221
+ bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
199
222
  };
200
223
 
201
224
  // model quantization parameters
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.9.5'
6
+ VERSION = '0.10.0'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
- LLAMA_CPP_VERSION = 'b1593'
9
+ LLAMA_CPP_VERSION = 'b1620'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -23,6 +23,10 @@ module LLaMACpp
23
23
  LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
24
24
  LLAMA_FTYPE_MOSTLY_Q6_K: Integer
25
25
 
26
+ LLAMA_KV_OVERRIDE_INT: Integer
27
+ LLAMA_KV_OVERRIDE_FLOAT: Integer
28
+ LLAMA_KV_OVERRIDE_BOOL: Integer
29
+
26
30
  LLAMA_GRETYPE_END: Integer
27
31
  LLAMA_GRETYPE_ALT: Integer
28
32
  LLAMA_GRETYPE_RULE_REF: Integer
@@ -116,6 +120,16 @@ module LLaMACpp
116
120
  def n_eval: () -> Integer
117
121
  end
118
122
 
123
+ class ModelKVOverride
124
+ public
125
+
126
+ def key: () -> String
127
+ def tag: () -> Integer
128
+ def int_value: () -> Integer
129
+ def float_value: () -> Float
130
+ def bool_value: () -> bool
131
+ end
132
+
119
133
  class ModelParams
120
134
  public
121
135
 
@@ -225,14 +239,18 @@ module LLaMACpp
225
239
  def yarn_beta_slow: () -> Float
226
240
  def yarn_orig_ctx=: (Integer) -> Integer
227
241
  def yarn_orig_ctx: () -> Integer
242
+ def type_k=: (Integer) -> Integer
243
+ def type_k: () -> Integer
244
+ def type_v=: (Integer) -> Integer
245
+ def type_v: () -> Integer
228
246
  def mul_mat_q: () -> bool
229
247
  def mul_mat_q=: (bool) -> bool
230
- def f16_kv: () -> bool
231
- def f16_kv=: (bool) -> bool
232
248
  def logits_all: () -> bool
233
249
  def logits_all=: (bool) -> bool
234
250
  def embedding: () -> bool
235
251
  def embedding=: (bool) -> bool
252
+ def offload_kqv: () -> bool
253
+ def offload_kqv=: (bool) -> bool
236
254
  end
237
255
 
238
256
  class ModelQuantizeParams
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.5
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-02 00:00:00.000000000 Z
11
+ date: 2023-12-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: