llama_cpp 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -303,7 +303,7 @@ extern "C" {
303
303
 
304
304
  #if defined(__ARM_NEON) && defined(__CUDACC__)
305
305
  typedef half ggml_fp16_t;
306
- #elif defined(__ARM_NEON)
306
+ #elif defined(__ARM_NEON) && !defined(_MSC_VER)
307
307
  typedef __fp16 ggml_fp16_t;
308
308
  #else
309
309
  typedef uint16_t ggml_fp16_t;
@@ -343,6 +343,12 @@ extern "C" {
343
343
  GGML_TYPE_COUNT,
344
344
  };
345
345
 
346
+ // precision
347
+ enum ggml_prec {
348
+ GGML_PREC_DEFAULT,
349
+ GGML_PREC_F32,
350
+ };
351
+
346
352
  enum ggml_backend_type {
347
353
  GGML_BACKEND_CPU = 0,
348
354
  GGML_BACKEND_GPU = 10,
@@ -478,7 +484,8 @@ extern "C" {
478
484
  enum ggml_log_level {
479
485
  GGML_LOG_LEVEL_ERROR = 2,
480
486
  GGML_LOG_LEVEL_WARN = 3,
481
- GGML_LOG_LEVEL_INFO = 4
487
+ GGML_LOG_LEVEL_INFO = 4,
488
+ GGML_LOG_LEVEL_DEBUG = 5
482
489
  };
483
490
 
484
491
  // ggml object
@@ -729,8 +736,8 @@ extern "C" {
729
736
  GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
730
737
 
731
738
  // Context tensor enumeration and lookup
732
- GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
733
- GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
739
+ GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx);
740
+ GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
734
741
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
735
742
 
736
743
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
@@ -1057,6 +1064,12 @@ extern "C" {
1057
1064
  struct ggml_tensor * a,
1058
1065
  struct ggml_tensor * b);
1059
1066
 
1067
+ // change the precision of a matrix multiplication
1068
+ // set to GGML_PREC_F32 for higher precision (useful for phi-2)
1069
+ GGML_API void ggml_mul_mat_set_prec(
1070
+ struct ggml_tensor * a,
1071
+ enum ggml_prec prec);
1072
+
1060
1073
  // indirect matrix multiplication
1061
1074
  // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1062
1075
  GGML_API struct ggml_tensor * ggml_mul_mat_id(
@@ -1082,13 +1095,13 @@ extern "C" {
1082
1095
  GGML_API struct ggml_tensor * ggml_scale(
1083
1096
  struct ggml_context * ctx,
1084
1097
  struct ggml_tensor * a,
1085
- struct ggml_tensor * b);
1098
+ float s);
1086
1099
 
1087
1100
  // in-place, returns view(a)
1088
1101
  GGML_API struct ggml_tensor * ggml_scale_inplace(
1089
1102
  struct ggml_context * ctx,
1090
1103
  struct ggml_tensor * a,
1091
- struct ggml_tensor * b);
1104
+ float s);
1092
1105
 
1093
1106
  // b -> view(a,offset,nb1,nb2,3), return modified a
1094
1107
  GGML_API struct ggml_tensor * ggml_set(
@@ -2123,10 +2136,11 @@ extern "C" {
2123
2136
  GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2124
2137
  GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2125
2138
 
2126
- GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2127
- GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2128
- GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2129
- GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2139
+ GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2140
+ GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2141
+ GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2142
+ GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2143
+ GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2130
2144
 
2131
2145
  // overrides existing values or adds a new one
2132
2146
  GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);