llama_cpp 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -847,7 +847,7 @@ std::array<std::string, 2> mul_str_values = {
847
847
  "mul_f32", "float"
848
848
  };
849
849
 
850
- std::string& replace(std::string& s, const std::string& from, const std::string& to) {
850
+ static std::string& replace(std::string& s, const std::string& from, const std::string& to) {
851
851
  size_t pos = 0;
852
852
  while ((pos = s.find(from, pos)) != std::string::npos) {
853
853
  s.replace(pos, from.length(), to);
@@ -856,7 +856,7 @@ std::string& replace(std::string& s, const std::string& from, const std::string&
856
856
  return s;
857
857
  }
858
858
 
859
- std::string generate_kernels() {
859
+ static std::string generate_kernels() {
860
860
  std::stringstream src;
861
861
  src << program_source << '\n';
862
862
  src << k_quants_source << '\n';
@@ -1788,7 +1788,7 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
1788
1788
  return false;
1789
1789
  }
1790
1790
 
1791
- bool ggml_cl_mul_mat_use_f16(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * /* dst */) {
1791
+ static bool ggml_cl_mul_mat_use_f16(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * /* dst */) {
1792
1792
  // If device doesn't support FP16
1793
1793
  if (!fp16_support) {
1794
1794
  return false;
@@ -1,4 +1,3 @@
1
- #define _GNU_SOURCE // Defines CLOCK_MONOTONIC on Linux
2
1
  #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
3
2
 
4
3
  #include "ggml.h"
@@ -47,6 +46,10 @@
47
46
  // disable "possible loss of data" to avoid hundreds of casts
48
47
  // we should just be careful :)
49
48
  #pragma warning(disable: 4244 4267)
49
+
50
+ // disable POSIX deprecation warnigns
51
+ // these functions are never going away, anyway
52
+ #pragma warning(disable: 4996)
50
53
  #endif
51
54
 
52
55
  #if defined(_WIN32)
@@ -280,7 +283,7 @@ typedef double ggml_float;
280
283
  // 16-bit float
281
284
  // on Arm, we use __fp16
282
285
  // on x86, we use uint16_t
283
- #ifdef __ARM_NEON
286
+ #if defined(__ARM_NEON) && !defined(_MSC_VER)
284
287
 
285
288
  // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
286
289
  //
@@ -307,12 +310,14 @@ typedef double ggml_float;
307
310
  #if defined(_MSC_VER) || defined(__MINGW32__)
308
311
  #include <intrin.h>
309
312
  #else
313
+ #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
310
314
  #if !defined(__riscv)
311
315
  #include <immintrin.h>
312
316
  #endif
313
317
  #endif
314
318
  #endif
315
319
  #endif
320
+ #endif
316
321
 
317
322
  #ifdef __riscv_v_intrinsic
318
323
  #include <riscv_vector.h>
@@ -4298,10 +4303,21 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
4298
4303
  }
4299
4304
 
4300
4305
  size_t ggml_nbytes(const struct ggml_tensor * tensor) {
4301
- size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type);
4302
- for (int i = 1; i < GGML_MAX_DIMS; ++i) {
4303
- nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
4306
+ size_t nbytes;
4307
+ size_t blck_size = ggml_blck_size(tensor->type);
4308
+ if (blck_size == 1) {
4309
+ nbytes = ggml_type_size(tensor->type);
4310
+ for (int i = 0; i < GGML_MAX_DIMS; ++i) {
4311
+ nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
4312
+ }
4304
4313
  }
4314
+ else {
4315
+ nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
4316
+ for (int i = 1; i < GGML_MAX_DIMS; ++i) {
4317
+ nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
4318
+ }
4319
+ }
4320
+
4305
4321
  return nbytes;
4306
4322
  }
4307
4323
 
@@ -17278,10 +17294,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
17278
17294
  } else {
17279
17295
  // wait for other threads to finish
17280
17296
  const int last = node_n;
17281
- do {
17282
- //sched_yield();
17297
+ while (true) {
17298
+ // TODO: this sched_yield can have significant impact on the performance - either positive or negative
17299
+ // depending on the workload and the operating system.
17300
+ // since it is not clear what is the best approach, it should potentially become user-configurable
17301
+ // ref: https://github.com/ggerganov/ggml/issues/291
17302
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
17303
+ sched_yield();
17304
+ #endif
17305
+
17283
17306
  node_n = atomic_load(&state->shared->node_n);
17284
- } while (node_n == last);
17307
+ if (node_n != last) break;
17308
+ };
17285
17309
  }
17286
17310
 
17287
17311
  // check if we should stop
@@ -18332,10 +18356,11 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
18332
18356
  for (int i = 0; i < cgraph->n_leafs; i++) {
18333
18357
  struct ggml_tensor * node = cgraph->leafs[i];
18334
18358
 
18335
- GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
18359
+ GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
18336
18360
  i,
18337
18361
  node->ne[0], node->ne[1],
18338
- ggml_op_name(node->op));
18362
+ ggml_op_name(node->op),
18363
+ ggml_get_name(node));
18339
18364
  }
18340
18365
 
18341
18366
  for (int i = 0; i < GGML_OP_COUNT; i++) {
@@ -18872,7 +18897,6 @@ static enum ggml_opt_result linesearch_backtracking(
18872
18897
  // strong Wolfe condition (GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE)
18873
18898
  return count;
18874
18899
  }
18875
- return count;
18876
18900
  }
18877
18901
  }
18878
18902
 
@@ -20095,27 +20119,27 @@ const char * gguf_type_name(enum gguf_type type) {
20095
20119
  return GGUF_TYPE_NAME[type];
20096
20120
  }
20097
20121
 
20098
- int gguf_get_version(struct gguf_context * ctx) {
20122
+ int gguf_get_version(const struct gguf_context * ctx) {
20099
20123
  return ctx->header.version;
20100
20124
  }
20101
20125
 
20102
- size_t gguf_get_alignment(struct gguf_context * ctx) {
20126
+ size_t gguf_get_alignment(const struct gguf_context * ctx) {
20103
20127
  return ctx->alignment;
20104
20128
  }
20105
20129
 
20106
- size_t gguf_get_data_offset(struct gguf_context * ctx) {
20130
+ size_t gguf_get_data_offset(const struct gguf_context * ctx) {
20107
20131
  return ctx->offset;
20108
20132
  }
20109
20133
 
20110
- void * gguf_get_data(struct gguf_context * ctx) {
20134
+ void * gguf_get_data(const struct gguf_context * ctx) {
20111
20135
  return ctx->data;
20112
20136
  }
20113
20137
 
20114
- int gguf_get_n_kv(struct gguf_context * ctx) {
20138
+ int gguf_get_n_kv(const struct gguf_context * ctx) {
20115
20139
  return ctx->header.n_kv;
20116
20140
  }
20117
20141
 
20118
- int gguf_find_key(struct gguf_context * ctx, const char * key) {
20142
+ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
20119
20143
  // return -1 if key not found
20120
20144
  int keyfound = -1;
20121
20145
 
@@ -20131,85 +20155,85 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) {
20131
20155
  return keyfound;
20132
20156
  }
20133
20157
 
20134
- const char * gguf_get_key(struct gguf_context * ctx, int i) {
20158
+ const char * gguf_get_key(const struct gguf_context * ctx, int i) {
20135
20159
  return ctx->kv[i].key.data;
20136
20160
  }
20137
20161
 
20138
- enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) {
20162
+ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int i) {
20139
20163
  return ctx->kv[i].type;
20140
20164
  }
20141
20165
 
20142
- enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) {
20166
+ enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i) {
20143
20167
  return ctx->kv[i].value.arr.type;
20144
20168
  }
20145
20169
 
20146
- const void * gguf_get_arr_data(struct gguf_context * ctx, int i) {
20170
+ const void * gguf_get_arr_data(const struct gguf_context * ctx, int i) {
20147
20171
  return ctx->kv[i].value.arr.data;
20148
20172
  }
20149
20173
 
20150
- const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) {
20174
+ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
20151
20175
  struct gguf_kv * kv = &ctx->kv[key_id];
20152
20176
  struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
20153
20177
  return str->data;
20154
20178
  }
20155
20179
 
20156
- int gguf_get_arr_n(struct gguf_context * ctx, int i) {
20180
+ int gguf_get_arr_n(const struct gguf_context * ctx, int i) {
20157
20181
  return ctx->kv[i].value.arr.n;
20158
20182
  }
20159
20183
 
20160
- uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) {
20184
+ uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int i) {
20161
20185
  return ctx->kv[i].value.uint8;
20162
20186
  }
20163
20187
 
20164
- int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) {
20188
+ int8_t gguf_get_val_i8(const struct gguf_context * ctx, int i) {
20165
20189
  return ctx->kv[i].value.int8;
20166
20190
  }
20167
20191
 
20168
- uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) {
20192
+ uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int i) {
20169
20193
  return ctx->kv[i].value.uint16;
20170
20194
  }
20171
20195
 
20172
- int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) {
20196
+ int16_t gguf_get_val_i16(const struct gguf_context * ctx, int i) {
20173
20197
  return ctx->kv[i].value.int16;
20174
20198
  }
20175
20199
 
20176
- uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) {
20200
+ uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int i) {
20177
20201
  return ctx->kv[i].value.uint32;
20178
20202
  }
20179
20203
 
20180
- int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) {
20204
+ int32_t gguf_get_val_i32(const struct gguf_context * ctx, int i) {
20181
20205
  return ctx->kv[i].value.int32;
20182
20206
  }
20183
20207
 
20184
- float gguf_get_val_f32(struct gguf_context * ctx, int i) {
20208
+ float gguf_get_val_f32(const struct gguf_context * ctx, int i) {
20185
20209
  return ctx->kv[i].value.float32;
20186
20210
  }
20187
20211
 
20188
- uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
20212
+ uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int i) {
20189
20213
  return ctx->kv[i].value.uint64;
20190
20214
  }
20191
20215
 
20192
- int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
20216
+ int64_t gguf_get_val_i64(const struct gguf_context * ctx, int i) {
20193
20217
  return ctx->kv[i].value.int64;
20194
20218
  }
20195
20219
 
20196
- double gguf_get_val_f64(struct gguf_context * ctx, int i) {
20220
+ double gguf_get_val_f64(const struct gguf_context * ctx, int i) {
20197
20221
  return ctx->kv[i].value.float64;
20198
20222
  }
20199
20223
 
20200
- bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
20224
+ bool gguf_get_val_bool(const struct gguf_context * ctx, int i) {
20201
20225
  return ctx->kv[i].value.bool_;
20202
20226
  }
20203
20227
 
20204
- const char * gguf_get_val_str (struct gguf_context * ctx, int i) {
20228
+ const char * gguf_get_val_str (const struct gguf_context * ctx, int i) {
20205
20229
  return ctx->kv[i].value.str.data;
20206
20230
  }
20207
20231
 
20208
- int gguf_get_n_tensors(struct gguf_context * ctx) {
20232
+ int gguf_get_n_tensors(const struct gguf_context * ctx) {
20209
20233
  return ctx->header.n_tensors;
20210
20234
  }
20211
20235
 
20212
- int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
20236
+ int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
20213
20237
  // return -1 if tensor not found
20214
20238
  int tensorfound = -1;
20215
20239
 
@@ -20225,11 +20249,11 @@ int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
20225
20249
  return tensorfound;
20226
20250
  }
20227
20251
 
20228
- size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
20252
+ size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
20229
20253
  return ctx->infos[i].offset;
20230
20254
  }
20231
20255
 
20232
- char * gguf_get_tensor_name(struct gguf_context * ctx, int i) {
20256
+ char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
20233
20257
  return ctx->infos[i].name.data;
20234
20258
  }
20235
20259
 
@@ -20512,7 +20536,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
20512
20536
  buf->offset += el_size;
20513
20537
  }
20514
20538
 
20515
- static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
20539
+ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
20516
20540
  // write header
20517
20541
  gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
20518
20542
  gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
@@ -20627,7 +20651,7 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
20627
20651
  }
20628
20652
  }
20629
20653
 
20630
- void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) {
20654
+ void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
20631
20655
  FILE * file = fopen(fname, "wb");
20632
20656
  if (!file) {
20633
20657
  GGML_ASSERT(false && "failed to open file for writing");
@@ -20644,7 +20668,7 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only
20644
20668
  fclose(file);
20645
20669
  }
20646
20670
 
20647
- size_t gguf_get_meta_size(struct gguf_context * ctx) {
20671
+ size_t gguf_get_meta_size(const struct gguf_context * ctx) {
20648
20672
  // no allocs - only compute size
20649
20673
  struct gguf_buf buf = gguf_buf_init(0);
20650
20674
 
@@ -20653,7 +20677,7 @@ size_t gguf_get_meta_size(struct gguf_context * ctx) {
20653
20677
  return buf.offset;
20654
20678
  }
20655
20679
 
20656
- void gguf_get_meta_data(struct gguf_context * ctx, void * data) {
20680
+ void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
20657
20681
  struct gguf_buf buf = gguf_buf_init(16*1024);
20658
20682
 
20659
20683
  gguf_write_to_buf(ctx, &buf, true);
@@ -20729,6 +20753,14 @@ int ggml_cpu_has_arm_fma(void) {
20729
20753
  #endif
20730
20754
  }
20731
20755
 
20756
+ int ggml_cpu_has_metal(void) {
20757
+ #if defined(GGML_USE_METAL)
20758
+ return 1;
20759
+ #else
20760
+ return 0;
20761
+ #endif
20762
+ }
20763
+
20732
20764
  int ggml_cpu_has_f16c(void) {
20733
20765
  #if defined(__F16C__)
20734
20766
  return 1;
@@ -195,6 +195,14 @@
195
195
  # define GGML_DEPRECATED(func, hint) func
196
196
  #endif
197
197
 
198
+ #ifndef __GNUC__
199
+ # define GGML_ATTRIBUTE_FORMAT(...)
200
+ #elif defined(__MINGW32__)
201
+ # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
202
+ #else
203
+ # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
204
+ #endif
205
+
198
206
  #include <stdint.h>
199
207
  #include <stddef.h>
200
208
  #include <stdbool.h>
@@ -685,6 +693,7 @@ extern "C" {
685
693
 
686
694
  GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
687
695
  GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
696
+ GGML_ATTRIBUTE_FORMAT(2, 3)
688
697
  GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
689
698
 
690
699
  //
@@ -1866,39 +1875,39 @@ extern "C" {
1866
1875
 
1867
1876
  GGML_API const char * gguf_type_name(enum gguf_type type);
1868
1877
 
1869
- GGML_API int gguf_get_version (struct gguf_context * ctx);
1870
- GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
1871
- GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
1872
- GGML_API void * gguf_get_data (struct gguf_context * ctx);
1878
+ GGML_API int gguf_get_version (const struct gguf_context * ctx);
1879
+ GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
1880
+ GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
1881
+ GGML_API void * gguf_get_data (const struct gguf_context * ctx);
1873
1882
 
1874
- GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
1875
- GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
1876
- GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
1883
+ GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
1884
+ GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
1885
+ GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
1877
1886
 
1878
- GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
1879
- GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
1887
+ GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
1888
+ GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
1880
1889
 
1881
1890
  // results are undefined if the wrong type is used for the key
1882
- GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
1883
- GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
1884
- GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
1885
- GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
1886
- GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
1887
- GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
1888
- GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
1889
- GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
1890
- GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
1891
- GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
1892
- GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
1893
- GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
1894
- GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
1895
- GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
1896
- GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
1897
-
1898
- GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
1899
- GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
1900
- GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
1901
- GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
1891
+ GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
1892
+ GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
1893
+ GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
1894
+ GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
1895
+ GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
1896
+ GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
1897
+ GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
1898
+ GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
1899
+ GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
1900
+ GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
1901
+ GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
1902
+ GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
1903
+ GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
1904
+ GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
1905
+ GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
1906
+
1907
+ GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
1908
+ GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
1909
+ GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
1910
+ GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
1902
1911
 
1903
1912
  // overrides existing values or adds a new one
1904
1913
  GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
@@ -1943,11 +1952,11 @@ extern "C" {
1943
1952
  //
1944
1953
 
1945
1954
  // write the entire context to a binary file
1946
- GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
1955
+ GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
1947
1956
 
1948
1957
  // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
1949
- GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
1950
- GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
1958
+ GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
1959
+ GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
1951
1960
 
1952
1961
  //
1953
1962
  // system info
@@ -1961,6 +1970,7 @@ extern "C" {
1961
1970
  GGML_API int ggml_cpu_has_fma (void);
1962
1971
  GGML_API int ggml_cpu_has_neon (void);
1963
1972
  GGML_API int ggml_cpu_has_arm_fma (void);
1973
+ GGML_API int ggml_cpu_has_metal (void);
1964
1974
  GGML_API int ggml_cpu_has_f16c (void);
1965
1975
  GGML_API int ggml_cpu_has_fp16_va (void);
1966
1976
  GGML_API int ggml_cpu_has_wasm_simd (void);
@@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
2609
2609
 
2610
2610
  memcpy(utmp, x[i].scales, 12);
2611
2611
 
2612
- const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
2612
+ uint32x2_t mins8 = { 0 };
2613
+ mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
2614
+ mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
2615
+
2613
2616
  utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
2614
2617
  utmp[0] &= kmask1;
2615
2618