llama_cpp 0.15.3 → 0.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,7 @@ extern "C" {
85
85
  LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
86
86
  LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
87
87
  LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
88
+ LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
88
89
  };
89
90
 
90
91
  // note: these values should be synchronized with ggml_rope
@@ -264,6 +265,8 @@ extern "C" {
264
265
  bool check_tensors; // validate model tensor data
265
266
  };
266
267
 
268
+ // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
269
+ // https://github.com/ggerganov/llama.cpp/pull/7544
267
270
  struct llama_context_params {
268
271
  uint32_t seed; // RNG seed, -1 for random
269
272
  uint32_t n_ctx; // text context, 0 = from model
@@ -290,14 +293,14 @@ extern "C" {
290
293
  ggml_backend_sched_eval_callback cb_eval;
291
294
  void * cb_eval_user_data;
292
295
 
293
- enum ggml_type type_k; // data type for K cache
294
- enum ggml_type type_v; // data type for V cache
296
+ enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
297
+ enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
295
298
 
296
299
  // Keep the booleans together to avoid misalignment during copy-by-value.
297
300
  bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
298
301
  bool embeddings; // if true, extract embeddings (together with logits)
299
302
  bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
300
- bool flash_attn; // whether to use flash attention
303
+ bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
301
304
 
302
305
  // Abort callback
303
306
  // if it returns true, execution of llama_decode() will be aborted
@@ -421,8 +424,8 @@ extern "C" {
421
424
 
422
425
  LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
423
426
 
424
- LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
425
- LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
427
+ LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
428
+ LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
426
429
 
427
430
  LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
428
431
  LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
@@ -823,6 +826,9 @@ extern "C" {
823
826
  // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
824
827
  LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
825
828
 
829
+ // Identify if Token Id is a control token or a render-able token
830
+ LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
831
+
826
832
  // Special tokens
827
833
  LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
828
834
  LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.3
4
+ version: 0.15.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-25 00:00:00.000000000 Z
11
+ date: 2024-06-01 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: