llama_cpp 0.10.1 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -303,7 +303,7 @@ extern "C" {
303
303
 
304
304
  #if defined(__ARM_NEON) && defined(__CUDACC__)
305
305
  typedef half ggml_fp16_t;
306
- #elif defined(__ARM_NEON)
306
+ #elif defined(__ARM_NEON) && !defined(_MSC_VER)
307
307
  typedef __fp16 ggml_fp16_t;
308
308
  #else
309
309
  typedef uint16_t ggml_fp16_t;
@@ -343,6 +343,12 @@ extern "C" {
343
343
  GGML_TYPE_COUNT,
344
344
  };
345
345
 
346
+ // precision
347
+ enum ggml_prec {
348
+ GGML_PREC_DEFAULT,
349
+ GGML_PREC_F32,
350
+ };
351
+
346
352
  enum ggml_backend_type {
347
353
  GGML_BACKEND_CPU = 0,
348
354
  GGML_BACKEND_GPU = 10,
@@ -478,7 +484,8 @@ extern "C" {
478
484
  enum ggml_log_level {
479
485
  GGML_LOG_LEVEL_ERROR = 2,
480
486
  GGML_LOG_LEVEL_WARN = 3,
481
- GGML_LOG_LEVEL_INFO = 4
487
+ GGML_LOG_LEVEL_INFO = 4,
488
+ GGML_LOG_LEVEL_DEBUG = 5
482
489
  };
483
490
 
484
491
  // ggml object
@@ -729,8 +736,8 @@ extern "C" {
729
736
  GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
730
737
 
731
738
  // Context tensor enumeration and lookup
732
- GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
733
- GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
739
+ GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx);
740
+ GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
734
741
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
735
742
 
736
743
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
@@ -1057,6 +1064,12 @@ extern "C" {
1057
1064
  struct ggml_tensor * a,
1058
1065
  struct ggml_tensor * b);
1059
1066
 
1067
+ // change the precision of a matrix multiplication
1068
+ // set to GGML_PREC_F32 for higher precision (useful for phi-2)
1069
+ GGML_API void ggml_mul_mat_set_prec(
1070
+ struct ggml_tensor * a,
1071
+ enum ggml_prec prec);
1072
+
1060
1073
  // indirect matrix multiplication
1061
1074
  // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1062
1075
  GGML_API struct ggml_tensor * ggml_mul_mat_id(
@@ -1082,13 +1095,13 @@ extern "C" {
1082
1095
  GGML_API struct ggml_tensor * ggml_scale(
1083
1096
  struct ggml_context * ctx,
1084
1097
  struct ggml_tensor * a,
1085
- struct ggml_tensor * b);
1098
+ float s);
1086
1099
 
1087
1100
  // in-place, returns view(a)
1088
1101
  GGML_API struct ggml_tensor * ggml_scale_inplace(
1089
1102
  struct ggml_context * ctx,
1090
1103
  struct ggml_tensor * a,
1091
- struct ggml_tensor * b);
1104
+ float s);
1092
1105
 
1093
1106
  // b -> view(a,offset,nb1,nb2,3), return modified a
1094
1107
  GGML_API struct ggml_tensor * ggml_set(
@@ -2123,10 +2136,11 @@ extern "C" {
2123
2136
  GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2124
2137
  GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2125
2138
 
2126
- GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2127
- GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2128
- GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2129
- GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2139
+ GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2140
+ GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2141
+ GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2142
+ GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2143
+ GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2130
2144
 
2131
2145
  // overrides existing values or adds a new one
2132
2146
  GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);