llama_cpp 0.12.6 → 0.12.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,6 +100,8 @@ extern "C" {
100
100
  LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
101
101
  LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
102
102
  LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
103
+ LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
104
+ LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
103
105
 
104
106
  LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
105
107
  };
@@ -112,6 +114,12 @@ extern "C" {
112
114
  LLAMA_ROPE_SCALING_MAX_VALUE = LLAMA_ROPE_SCALING_YARN,
113
115
  };
114
116
 
117
+ enum llama_pooling_type {
118
+ LLAMA_POOLING_NONE = 0,
119
+ LLAMA_POOLING_MEAN = 1,
120
+ LLAMA_POOLING_CLS = 2,
121
+ };
122
+
115
123
  enum llama_split_mode {
116
124
  LLAMA_SPLIT_NONE = 0, // single GPU
117
125
  LLAMA_SPLIT_LAYER = 1, // split layers and KV across GPUs
@@ -298,6 +306,12 @@ extern "C" {
298
306
  int32_t n_eval;
299
307
  };
300
308
 
309
+ // used in chat template
310
+ typedef struct llama_chat_message {
311
+ const char * role;
312
+ const char * content;
313
+ } llama_chat_message;
314
+
301
315
  // Helpers for getting default parameters
302
316
  LLAMA_API struct llama_model_params llama_model_default_params(void);
303
317
  LLAMA_API struct llama_context_params llama_context_default_params(void);
@@ -306,7 +320,10 @@ extern "C" {
306
320
  // Initialize the llama + ggml backend
307
321
  // If numa is true, use NUMA optimizations
308
322
  // Call once at the start of the program
309
- LLAMA_API void llama_backend_init(bool numa);
323
+ LLAMA_API void llama_backend_init(void);
324
+
325
+ //optional:
326
+ LLAMA_API void llama_numa_init(enum ggml_numa_strategy numa);
310
327
 
311
328
  // Call once at the end of the program - currently only used for MPI
312
329
  LLAMA_API void llama_backend_free(void);
@@ -689,6 +706,25 @@ extern "C" {
689
706
  char * buf,
690
707
  int32_t length);
691
708
 
709
+ /// Apply chat template. Inspired by hf apply_chat_template() on python.
710
+ /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
711
+ /// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
712
+ /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead.
713
+ /// @param chat Pointer to a list of multiple llama_chat_message
714
+ /// @param n_msg Number of llama_chat_message in this chat
715
+ /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message.
716
+ /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)
717
+ /// @param length The size of the allocated buffer
718
+ /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
719
+ LLAMA_API int32_t llama_chat_apply_template(
720
+ const struct llama_model * model,
721
+ const char * tmpl,
722
+ const struct llama_chat_message * chat,
723
+ size_t n_msg,
724
+ bool add_ass,
725
+ char * buf,
726
+ int32_t length);
727
+
692
728
  //
693
729
  // Grammar
694
730
  //
@@ -1,6 +1,6 @@
1
1
  ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
2
2
  GF_CC_IS_GCC = 1
3
- GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
3
+ GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null; echo; $(GF_CC) -dumpversion; } | awk -F. '/./ { printf("%02d%02d%02d", $$1, $$2, $$3); exit }')
4
4
  else
5
5
  GF_CC_IS_CLANG = 1
6
6
  ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.6
4
+ version: 0.12.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-17 00:00:00.000000000 Z
11
+ date: 2024-02-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: