llama_cpp 0.12.6 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -100,6 +100,8 @@ extern "C" {
100
100
  LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
101
101
  LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
102
102
  LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
103
+ LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
104
+ LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
103
105
 
104
106
  LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
105
107
  };
@@ -112,6 +114,12 @@ extern "C" {
112
114
  LLAMA_ROPE_SCALING_MAX_VALUE = LLAMA_ROPE_SCALING_YARN,
113
115
  };
114
116
 
117
+ enum llama_pooling_type {
118
+ LLAMA_POOLING_NONE = 0,
119
+ LLAMA_POOLING_MEAN = 1,
120
+ LLAMA_POOLING_CLS = 2,
121
+ };
122
+
115
123
  enum llama_split_mode {
116
124
  LLAMA_SPLIT_NONE = 0, // single GPU
117
125
  LLAMA_SPLIT_LAYER = 1, // split layers and KV across GPUs
@@ -298,6 +306,12 @@ extern "C" {
298
306
  int32_t n_eval;
299
307
  };
300
308
 
309
+ // used in chat template
310
+ typedef struct llama_chat_message {
311
+ const char * role;
312
+ const char * content;
313
+ } llama_chat_message;
314
+
301
315
  // Helpers for getting default parameters
302
316
  LLAMA_API struct llama_model_params llama_model_default_params(void);
303
317
  LLAMA_API struct llama_context_params llama_context_default_params(void);
@@ -306,7 +320,10 @@ extern "C" {
306
320
  // Initialize the llama + ggml backend
307
321
  // If numa is true, use NUMA optimizations
308
322
  // Call once at the start of the program
309
- LLAMA_API void llama_backend_init(bool numa);
323
+ LLAMA_API void llama_backend_init(void);
324
+
325
+ //optional:
326
+ LLAMA_API void llama_numa_init(enum ggml_numa_strategy numa);
310
327
 
311
328
  // Call once at the end of the program - currently only used for MPI
312
329
  LLAMA_API void llama_backend_free(void);
@@ -689,6 +706,25 @@ extern "C" {
689
706
  char * buf,
690
707
  int32_t length);
691
708
 
709
+ /// Apply chat template. Inspired by hf apply_chat_template() on python.
710
+ /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
711
+ /// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
712
+ /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead.
713
+ /// @param chat Pointer to a list of multiple llama_chat_message
714
+ /// @param n_msg Number of llama_chat_message in this chat
715
+ /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message.
716
+ /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)
717
+ /// @param length The size of the allocated buffer
718
+ /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
719
+ LLAMA_API int32_t llama_chat_apply_template(
720
+ const struct llama_model * model,
721
+ const char * tmpl,
722
+ const struct llama_chat_message * chat,
723
+ size_t n_msg,
724
+ bool add_ass,
725
+ char * buf,
726
+ int32_t length);
727
+
692
728
  //
693
729
  // Grammar
694
730
  //
@@ -1,6 +1,6 @@
1
1
  ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
2
2
  GF_CC_IS_GCC = 1
3
- GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
3
+ GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null; echo; $(GF_CC) -dumpversion; } | awk -F. '/./ { printf("%02d%02d%02d", $$1, $$2, $$3); exit }')
4
4
  else
5
5
  GF_CC_IS_CLANG = 1
6
6
  ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.6
4
+ version: 0.12.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-17 00:00:00.000000000 Z
11
+ date: 2024-02-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: