llama_cpp 0.5.1 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -847,7 +847,7 @@ std::array<std::string, 2> mul_str_values = {
847
847
  "mul_f32", "float"
848
848
  };
849
849
 
850
- std::string& replace(std::string& s, const std::string& from, const std::string& to) {
850
+ static std::string& replace(std::string& s, const std::string& from, const std::string& to) {
851
851
  size_t pos = 0;
852
852
  while ((pos = s.find(from, pos)) != std::string::npos) {
853
853
  s.replace(pos, from.length(), to);
@@ -856,7 +856,7 @@ std::string& replace(std::string& s, const std::string& from, const std::string&
856
856
  return s;
857
857
  }
858
858
 
859
- std::string generate_kernels() {
859
+ static std::string generate_kernels() {
860
860
  std::stringstream src;
861
861
  src << program_source << '\n';
862
862
  src << k_quants_source << '\n';
@@ -1788,7 +1788,7 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
1788
1788
  return false;
1789
1789
  }
1790
1790
 
1791
- bool ggml_cl_mul_mat_use_f16(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * /* dst */) {
1791
+ static bool ggml_cl_mul_mat_use_f16(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * /* dst */) {
1792
1792
  // If device doesn't support FP16
1793
1793
  if (!fp16_support) {
1794
1794
  return false;
@@ -1,4 +1,3 @@
1
- #define _GNU_SOURCE // Defines CLOCK_MONOTONIC on Linux
2
1
  #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
3
2
 
4
3
  #include "ggml.h"
@@ -47,6 +46,10 @@
47
46
  // disable "possible loss of data" to avoid hundreds of casts
48
47
  // we should just be careful :)
49
48
  #pragma warning(disable: 4244 4267)
49
+
50
+ // disable POSIX deprecation warnigns
51
+ // these functions are never going away, anyway
52
+ #pragma warning(disable: 4996)
50
53
  #endif
51
54
 
52
55
  #if defined(_WIN32)
@@ -280,7 +283,7 @@ typedef double ggml_float;
280
283
  // 16-bit float
281
284
  // on Arm, we use __fp16
282
285
  // on x86, we use uint16_t
283
- #ifdef __ARM_NEON
286
+ #if defined(__ARM_NEON) && !defined(_MSC_VER)
284
287
 
285
288
  // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
286
289
  //
@@ -307,12 +310,14 @@ typedef double ggml_float;
307
310
  #if defined(_MSC_VER) || defined(__MINGW32__)
308
311
  #include <intrin.h>
309
312
  #else
313
+ #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
310
314
  #if !defined(__riscv)
311
315
  #include <immintrin.h>
312
316
  #endif
313
317
  #endif
314
318
  #endif
315
319
  #endif
320
+ #endif
316
321
 
317
322
  #ifdef __riscv_v_intrinsic
318
323
  #include <riscv_vector.h>
@@ -4298,10 +4303,21 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
4298
4303
  }
4299
4304
 
4300
4305
  size_t ggml_nbytes(const struct ggml_tensor * tensor) {
4301
- size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type);
4302
- for (int i = 1; i < GGML_MAX_DIMS; ++i) {
4303
- nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
4306
+ size_t nbytes;
4307
+ size_t blck_size = ggml_blck_size(tensor->type);
4308
+ if (blck_size == 1) {
4309
+ nbytes = ggml_type_size(tensor->type);
4310
+ for (int i = 0; i < GGML_MAX_DIMS; ++i) {
4311
+ nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
4312
+ }
4304
4313
  }
4314
+ else {
4315
+ nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
4316
+ for (int i = 1; i < GGML_MAX_DIMS; ++i) {
4317
+ nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
4318
+ }
4319
+ }
4320
+
4305
4321
  return nbytes;
4306
4322
  }
4307
4323
 
@@ -17278,10 +17294,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17278
17294
  } else {
17279
17295
  // wait for other threads to finish
17280
17296
  const int last = node_n;
17281
- do {
17282
- //sched_yield();
17297
+ while (true) {
17298
+ // TODO: this sched_yield can have significant impact on the performance - either positive or negative
17299
+ // depending on the workload and the operating system.
17300
+ // since it is not clear what is the best approach, it should potentially become user-configurable
17301
+ // ref: https://github.com/ggerganov/ggml/issues/291
17302
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
17303
+ sched_yield();
17304
+ #endif
17305
+
17283
17306
  node_n = atomic_load(&state->shared->node_n);
17284
- } while (node_n == last);
17307
+ if (node_n != last) break;
17308
+ };
17285
17309
  }
17286
17310
 
17287
17311
  // check if we should stop
@@ -18332,10 +18356,11 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
18332
18356
  for (int i = 0; i < cgraph->n_leafs; i++) {
18333
18357
  struct ggml_tensor * node = cgraph->leafs[i];
18334
18358
 
18335
- GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
18359
+ GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
18336
18360
  i,
18337
18361
  node->ne[0], node->ne[1],
18338
- ggml_op_name(node->op));
18362
+ ggml_op_name(node->op),
18363
+ ggml_get_name(node));
18339
18364
  }
18340
18365
 
18341
18366
  for (int i = 0; i < GGML_OP_COUNT; i++) {
@@ -18872,7 +18897,6 @@ static enum ggml_opt_result linesearch_backtracking(
18872
18897
  // strong Wolfe condition (GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE)
18873
18898
  return count;
18874
18899
  }
18875
- return count;
18876
18900
  }
18877
18901
  }
18878
18902
 
@@ -20095,27 +20119,27 @@ const char * gguf_type_name(enum gguf_type type) {
20095
20119
  return GGUF_TYPE_NAME[type];
20096
20120
  }
20097
20121
 
20098
- int gguf_get_version(struct gguf_context * ctx) {
20122
+ int gguf_get_version(const struct gguf_context * ctx) {
20099
20123
  return ctx->header.version;
20100
20124
  }
20101
20125
 
20102
- size_t gguf_get_alignment(struct gguf_context * ctx) {
20126
+ size_t gguf_get_alignment(const struct gguf_context * ctx) {
20103
20127
  return ctx->alignment;
20104
20128
  }
20105
20129
 
20106
- size_t gguf_get_data_offset(struct gguf_context * ctx) {
20130
+ size_t gguf_get_data_offset(const struct gguf_context * ctx) {
20107
20131
  return ctx->offset;
20108
20132
  }
20109
20133
 
20110
- void * gguf_get_data(struct gguf_context * ctx) {
20134
+ void * gguf_get_data(const struct gguf_context * ctx) {
20111
20135
  return ctx->data;
20112
20136
  }
20113
20137
 
20114
- int gguf_get_n_kv(struct gguf_context * ctx) {
20138
+ int gguf_get_n_kv(const struct gguf_context * ctx) {
20115
20139
  return ctx->header.n_kv;
20116
20140
  }
20117
20141
 
20118
- int gguf_find_key(struct gguf_context * ctx, const char * key) {
20142
+ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
20119
20143
  // return -1 if key not found
20120
20144
  int keyfound = -1;
20121
20145
 
@@ -20131,85 +20155,85 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) {
20131
20155
  return keyfound;
20132
20156
  }
20133
20157
 
20134
- const char * gguf_get_key(struct gguf_context * ctx, int i) {
20158
+ const char * gguf_get_key(const struct gguf_context * ctx, int i) {
20135
20159
  return ctx->kv[i].key.data;
20136
20160
  }
20137
20161
 
20138
- enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) {
20162
+ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int i) {
20139
20163
  return ctx->kv[i].type;
20140
20164
  }
20141
20165
 
20142
- enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) {
20166
+ enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i) {
20143
20167
  return ctx->kv[i].value.arr.type;
20144
20168
  }
20145
20169
 
20146
- const void * gguf_get_arr_data(struct gguf_context * ctx, int i) {
20170
+ const void * gguf_get_arr_data(const struct gguf_context * ctx, int i) {
20147
20171
  return ctx->kv[i].value.arr.data;
20148
20172
  }
20149
20173
 
20150
- const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) {
20174
+ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
20151
20175
  struct gguf_kv * kv = &ctx->kv[key_id];
20152
20176
  struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
20153
20177
  return str->data;
20154
20178
  }
20155
20179
 
20156
- int gguf_get_arr_n(struct gguf_context * ctx, int i) {
20180
+ int gguf_get_arr_n(const struct gguf_context * ctx, int i) {
20157
20181
  return ctx->kv[i].value.arr.n;
20158
20182
  }
20159
20183
 
20160
- uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) {
20184
+ uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int i) {
20161
20185
  return ctx->kv[i].value.uint8;
20162
20186
  }
20163
20187
 
20164
- int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) {
20188
+ int8_t gguf_get_val_i8(const struct gguf_context * ctx, int i) {
20165
20189
  return ctx->kv[i].value.int8;
20166
20190
  }
20167
20191
 
20168
- uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) {
20192
+ uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int i) {
20169
20193
  return ctx->kv[i].value.uint16;
20170
20194
  }
20171
20195
 
20172
- int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) {
20196
+ int16_t gguf_get_val_i16(const struct gguf_context * ctx, int i) {
20173
20197
  return ctx->kv[i].value.int16;
20174
20198
  }
20175
20199
 
20176
- uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) {
20200
+ uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int i) {
20177
20201
  return ctx->kv[i].value.uint32;
20178
20202
  }
20179
20203
 
20180
- int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) {
20204
+ int32_t gguf_get_val_i32(const struct gguf_context * ctx, int i) {
20181
20205
  return ctx->kv[i].value.int32;
20182
20206
  }
20183
20207
 
20184
- float gguf_get_val_f32(struct gguf_context * ctx, int i) {
20208
+ float gguf_get_val_f32(const struct gguf_context * ctx, int i) {
20185
20209
  return ctx->kv[i].value.float32;
20186
20210
  }
20187
20211
 
20188
- uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
20212
+ uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int i) {
20189
20213
  return ctx->kv[i].value.uint64;
20190
20214
  }
20191
20215
 
20192
- int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
20216
+ int64_t gguf_get_val_i64(const struct gguf_context * ctx, int i) {
20193
20217
  return ctx->kv[i].value.int64;
20194
20218
  }
20195
20219
 
20196
- double gguf_get_val_f64(struct gguf_context * ctx, int i) {
20220
+ double gguf_get_val_f64(const struct gguf_context * ctx, int i) {
20197
20221
  return ctx->kv[i].value.float64;
20198
20222
  }
20199
20223
 
20200
- bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
20224
+ bool gguf_get_val_bool(const struct gguf_context * ctx, int i) {
20201
20225
  return ctx->kv[i].value.bool_;
20202
20226
  }
20203
20227
 
20204
- const char * gguf_get_val_str (struct gguf_context * ctx, int i) {
20228
+ const char * gguf_get_val_str (const struct gguf_context * ctx, int i) {
20205
20229
  return ctx->kv[i].value.str.data;
20206
20230
  }
20207
20231
 
20208
- int gguf_get_n_tensors(struct gguf_context * ctx) {
20232
+ int gguf_get_n_tensors(const struct gguf_context * ctx) {
20209
20233
  return ctx->header.n_tensors;
20210
20234
  }
20211
20235
 
20212
- int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
20236
+ int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
20213
20237
  // return -1 if tensor not found
20214
20238
  int tensorfound = -1;
20215
20239
 
@@ -20225,11 +20249,11 @@ int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
20225
20249
  return tensorfound;
20226
20250
  }
20227
20251
 
20228
- size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
20252
+ size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
20229
20253
  return ctx->infos[i].offset;
20230
20254
  }
20231
20255
 
20232
- char * gguf_get_tensor_name(struct gguf_context * ctx, int i) {
20256
+ char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
20233
20257
  return ctx->infos[i].name.data;
20234
20258
  }
20235
20259
 
@@ -20512,7 +20536,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
20512
20536
  buf->offset += el_size;
20513
20537
  }
20514
20538
 
20515
- static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
20539
+ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
20516
20540
  // write header
20517
20541
  gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
20518
20542
  gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
@@ -20627,7 +20651,7 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
20627
20651
  }
20628
20652
  }
20629
20653
 
20630
- void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) {
20654
+ void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
20631
20655
  FILE * file = fopen(fname, "wb");
20632
20656
  if (!file) {
20633
20657
  GGML_ASSERT(false && "failed to open file for writing");
@@ -20644,7 +20668,7 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only
20644
20668
  fclose(file);
20645
20669
  }
20646
20670
 
20647
- size_t gguf_get_meta_size(struct gguf_context * ctx) {
20671
+ size_t gguf_get_meta_size(const struct gguf_context * ctx) {
20648
20672
  // no allocs - only compute size
20649
20673
  struct gguf_buf buf = gguf_buf_init(0);
20650
20674
 
@@ -20653,7 +20677,7 @@ size_t gguf_get_meta_size(struct gguf_context * ctx) {
20653
20677
  return buf.offset;
20654
20678
  }
20655
20679
 
20656
- void gguf_get_meta_data(struct gguf_context * ctx, void * data) {
20680
+ void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
20657
20681
  struct gguf_buf buf = gguf_buf_init(16*1024);
20658
20682
 
20659
20683
  gguf_write_to_buf(ctx, &buf, true);
@@ -20729,6 +20753,14 @@ int ggml_cpu_has_arm_fma(void) {
20729
20753
  #endif
20730
20754
  }
20731
20755
 
20756
+ int ggml_cpu_has_metal(void) {
20757
+ #if defined(GGML_USE_METAL)
20758
+ return 1;
20759
+ #else
20760
+ return 0;
20761
+ #endif
20762
+ }
20763
+
20732
20764
  int ggml_cpu_has_f16c(void) {
20733
20765
  #if defined(__F16C__)
20734
20766
  return 1;
@@ -195,6 +195,14 @@
195
195
  # define GGML_DEPRECATED(func, hint) func
196
196
  #endif
197
197
 
198
+ #ifndef __GNUC__
199
+ # define GGML_ATTRIBUTE_FORMAT(...)
200
+ #elif defined(__MINGW32__)
201
+ # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
202
+ #else
203
+ # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
204
+ #endif
205
+
198
206
  #include <stdint.h>
199
207
  #include <stddef.h>
200
208
  #include <stdbool.h>
@@ -685,6 +693,7 @@ extern "C" {
685
693
 
686
694
  GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
687
695
  GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
696
+ GGML_ATTRIBUTE_FORMAT(2, 3)
688
697
  GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
689
698
 
690
699
  //
@@ -1866,39 +1875,39 @@ extern "C" {
1866
1875
 
1867
1876
  GGML_API const char * gguf_type_name(enum gguf_type type);
1868
1877
 
1869
- GGML_API int gguf_get_version (struct gguf_context * ctx);
1870
- GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
1871
- GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
1872
- GGML_API void * gguf_get_data (struct gguf_context * ctx);
1878
+ GGML_API int gguf_get_version (const struct gguf_context * ctx);
1879
+ GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
1880
+ GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
1881
+ GGML_API void * gguf_get_data (const struct gguf_context * ctx);
1873
1882
 
1874
- GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
1875
- GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
1876
- GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
1883
+ GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
1884
+ GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
1885
+ GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
1877
1886
 
1878
- GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
1879
- GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
1887
+ GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
1888
+ GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
1880
1889
 
1881
1890
  // results are undefined if the wrong type is used for the key
1882
- GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
1883
- GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
1884
- GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
1885
- GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
1886
- GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
1887
- GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
1888
- GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
1889
- GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
1890
- GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
1891
- GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
1892
- GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
1893
- GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
1894
- GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
1895
- GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
1896
- GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
1897
-
1898
- GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
1899
- GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
1900
- GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
1901
- GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
1891
+ GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
1892
+ GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
1893
+ GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
1894
+ GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
1895
+ GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
1896
+ GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
1897
+ GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
1898
+ GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
1899
+ GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
1900
+ GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
1901
+ GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
1902
+ GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
1903
+ GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
1904
+ GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
1905
+ GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
1906
+
1907
+ GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
1908
+ GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
1909
+ GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
1910
+ GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
1902
1911
 
1903
1912
  // overrides existing values or adds a new one
1904
1913
  GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
@@ -1943,11 +1952,11 @@ extern "C" {
1943
1952
  //
1944
1953
 
1945
1954
  // write the entire context to a binary file
1946
- GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
1955
+ GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
1947
1956
 
1948
1957
  // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
1949
- GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
1950
- GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
1958
+ GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
1959
+ GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
1951
1960
 
1952
1961
  //
1953
1962
  // system info
@@ -1961,6 +1970,7 @@ extern "C" {
1961
1970
  GGML_API int ggml_cpu_has_fma (void);
1962
1971
  GGML_API int ggml_cpu_has_neon (void);
1963
1972
  GGML_API int ggml_cpu_has_arm_fma (void);
1973
+ GGML_API int ggml_cpu_has_metal (void);
1964
1974
  GGML_API int ggml_cpu_has_f16c (void);
1965
1975
  GGML_API int ggml_cpu_has_fp16_va (void);
1966
1976
  GGML_API int ggml_cpu_has_wasm_simd (void);
@@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
2609
2609
 
2610
2610
  memcpy(utmp, x[i].scales, 12);
2611
2611
 
2612
- const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
2612
+ uint32x2_t mins8 = { 0 };
2613
+ mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
2614
+ mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
2615
+
2613
2616
  utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
2614
2617
  utmp[0] &= kmask1;
2615
2618