llama_cpp 0.7.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13537,7 +13537,7 @@ static void ggml_compute_forward_rope_f16(
13537
13537
  dst_data[n_dims] = GGML_FP32_TO_FP16(x2*cos_block_theta - x3*sin_block_theta);
13538
13538
  dst_data[n_dims/2*3] = GGML_FP32_TO_FP16(x2*sin_block_theta + x3*cos_block_theta);
13539
13539
  }
13540
- } if (!is_neox) {
13540
+ } else if (!is_neox) {
13541
13541
  for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
13542
13542
  const float cos_theta = cosf(theta);
13543
13543
  const float sin_theta = sinf(theta);
@@ -19170,6 +19170,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
19170
19170
 
19171
19171
  if (idx == -1) {
19172
19172
  fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
19173
+ fclose(fout);
19173
19174
  return;
19174
19175
  }
19175
19176
 
@@ -20844,7 +20845,7 @@ struct gguf_kv {
20844
20845
  };
20845
20846
 
20846
20847
  struct gguf_header {
20847
- uint32_t magic;
20848
+ char magic[4];
20848
20849
  uint32_t version;
20849
20850
  uint64_t n_tensors; // GGUFv2
20850
20851
  uint64_t n_kv; // GGUFv2
@@ -20914,7 +20915,7 @@ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset)
20914
20915
  struct gguf_context * gguf_init_empty(void) {
20915
20916
  struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
20916
20917
 
20917
- ctx->header.magic = GGUF_MAGIC;
20918
+ memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
20918
20919
  ctx->header.version = GGUF_VERSION;
20919
20920
  ctx->header.n_tensors = 0;
20920
20921
  ctx->header.n_kv = 0;
@@ -20940,16 +20941,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
20940
20941
  // offset from start of file
20941
20942
  size_t offset = 0;
20942
20943
 
20943
- uint32_t magic = 0;
20944
+ char magic[4];
20944
20945
 
20945
20946
  // check the magic before making allocations
20946
20947
  {
20947
20948
  gguf_fread_el(file, &magic, sizeof(magic), &offset);
20948
20949
 
20949
- if (magic != GGUF_MAGIC) {
20950
- fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
20951
- fclose(file);
20952
- return NULL;
20950
+ for (uint32_t i = 0; i < sizeof(magic); i++) {
20951
+ if (magic[i] != GGUF_MAGIC[i]) {
20952
+ fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic);
20953
+ fclose(file);
20954
+ return NULL;
20955
+ }
20953
20956
  }
20954
20957
  }
20955
20958
 
@@ -20959,7 +20962,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
20959
20962
 
20960
20963
  // read the header
20961
20964
  {
20962
- ctx->header.magic = magic;
20965
+ strncpy(ctx->header.magic, magic, 4);
20966
+
20963
20967
 
20964
20968
  ctx->kv = NULL;
20965
20969
  ctx->infos = NULL;
@@ -231,8 +231,9 @@
231
231
  #define GGML_EXIT_SUCCESS 0
232
232
  #define GGML_EXIT_ABORTED 1
233
233
 
234
- #define GGUF_MAGIC 0x46554747 // "GGUF"
235
- #define GGUF_VERSION 2
234
+ #define GGUF_MAGIC "GGUF"
235
+
236
+ #define GGUF_VERSION 3
236
237
 
237
238
  #define GGUF_DEFAULT_ALIGNMENT 32
238
239
 
@@ -46,7 +46,7 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
46
46
  #if defined(_MSC_VER) || defined(__MINGW32__)
47
47
  #include <intrin.h>
48
48
  #else
49
- #if !defined(__riscv)
49
+ #if !defined(__riscv) && !defined(__s390__)
50
50
  #include <immintrin.h>
51
51
  #endif
52
52
  #endif
@@ -462,12 +462,9 @@ void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
462
462
  }
463
463
 
464
464
  size_t ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
465
- const int nb = k / QK_K;
466
-
467
- // TODO - collect histograms - although, at a second thought, I don't really care about them
468
- (void)hist;
465
+ (void)hist; // TODO: collect histograms
469
466
 
470
- for (int j = 0; j < nb; j += k) {
467
+ for (int j = 0; j < n; j += k) {
471
468
  block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
472
469
  quantize_row_q2_K_reference(src + j, y, k);
473
470
  }
@@ -678,12 +675,9 @@ void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
678
675
  }
679
676
 
680
677
  size_t ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
681
- const int nb = k / QK_K;
682
-
683
- // TODO - collect histograms - although, at a second thought, I don't really care about them
684
- (void)hist;
678
+ (void)hist; // TODO: collect histograms
685
679
 
686
- for (int j = 0; j < nb; j += k) {
680
+ for (int j = 0; j < n; j += k) {
687
681
  block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
688
682
  quantize_row_q3_K_reference(src + j, y, k);
689
683
  }
@@ -846,9 +840,9 @@ void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
846
840
 
847
841
  size_t ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
848
842
  assert(k % QK_K == 0);
849
- const int nb = k / QK_K;
850
843
  (void)hist; // TODO: collect histograms
851
- for (int j = 0; j < nb; j += k) {
844
+
845
+ for (int j = 0; j < n; j += k) {
852
846
  block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
853
847
  quantize_row_q4_K_reference(src + j, y, k);
854
848
  }
@@ -1052,9 +1046,9 @@ void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
1052
1046
 
1053
1047
  size_t ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1054
1048
  assert(k % QK_K == 0);
1055
- const int nb = k / QK_K;
1056
- (void)hist;
1057
- for (int j = 0; j < nb; j += k) {
1049
+ (void)hist; // TODO: collect histograms
1050
+
1051
+ for (int j = 0; j < n; j += k) {
1058
1052
  block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
1059
1053
  quantize_row_q5_K_reference(src + j, y, k);
1060
1054
  }
@@ -1200,11 +1194,9 @@ void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
1200
1194
 
1201
1195
  size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
1202
1196
  assert(k % QK_K == 0);
1203
- const int nb = k / QK_K;
1204
-
1205
- (void)hist; // TODO
1197
+ (void)hist; // TODO: collect histograms
1206
1198
 
1207
- for (int j = 0; j < nb; j += k) {
1199
+ for (int j = 0; j < n; j += k) {
1208
1200
  block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
1209
1201
  quantize_row_q6_K_reference(src + j, y, k);
1210
1202
  }