llama_cpp 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13537,7 +13537,7 @@ static void ggml_compute_forward_rope_f16(
13537
13537
  dst_data[n_dims] = GGML_FP32_TO_FP16(x2*cos_block_theta - x3*sin_block_theta);
13538
13538
  dst_data[n_dims/2*3] = GGML_FP32_TO_FP16(x2*sin_block_theta + x3*cos_block_theta);
13539
13539
  }
13540
- } if (!is_neox) {
13540
+ } else if (!is_neox) {
13541
13541
  for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
13542
13542
  const float cos_theta = cosf(theta);
13543
13543
  const float sin_theta = sinf(theta);
@@ -19170,6 +19170,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
19170
19170
 
19171
19171
  if (idx == -1) {
19172
19172
  fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
19173
+ fclose(fout);
19173
19174
  return;
19174
19175
  }
19175
19176
 
@@ -20844,7 +20845,7 @@ struct gguf_kv {
20844
20845
  };
20845
20846
 
20846
20847
  struct gguf_header {
20847
- uint32_t magic;
20848
+ char magic[4];
20848
20849
  uint32_t version;
20849
20850
  uint64_t n_tensors; // GGUFv2
20850
20851
  uint64_t n_kv; // GGUFv2
@@ -20914,7 +20915,7 @@ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset)
20914
20915
  struct gguf_context * gguf_init_empty(void) {
20915
20916
  struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
20916
20917
 
20917
- ctx->header.magic = GGUF_MAGIC;
20918
+ memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
20918
20919
  ctx->header.version = GGUF_VERSION;
20919
20920
  ctx->header.n_tensors = 0;
20920
20921
  ctx->header.n_kv = 0;
@@ -20940,16 +20941,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
20940
20941
  // offset from start of file
20941
20942
  size_t offset = 0;
20942
20943
 
20943
- uint32_t magic = 0;
20944
+ char magic[4];
20944
20945
 
20945
20946
  // check the magic before making allocations
20946
20947
  {
20947
20948
  gguf_fread_el(file, &magic, sizeof(magic), &offset);
20948
20949
 
20949
- if (magic != GGUF_MAGIC) {
20950
- fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
20951
- fclose(file);
20952
- return NULL;
20950
+ for (uint32_t i = 0; i < sizeof(magic); i++) {
20951
+ if (magic[i] != GGUF_MAGIC[i]) {
20952
+ fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic);
20953
+ fclose(file);
20954
+ return NULL;
20955
+ }
20953
20956
  }
20954
20957
  }
20955
20958
 
@@ -20959,7 +20962,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
20959
20962
 
20960
20963
  // read the header
20961
20964
  {
20962
- ctx->header.magic = magic;
20965
+ strncpy(ctx->header.magic, magic, 4);
20966
+
20963
20967
 
20964
20968
  ctx->kv = NULL;
20965
20969
  ctx->infos = NULL;
@@ -231,8 +231,9 @@
231
231
  #define GGML_EXIT_SUCCESS 0
232
232
  #define GGML_EXIT_ABORTED 1
233
233
 
234
- #define GGUF_MAGIC 0x46554747 // "GGUF"
235
- #define GGUF_VERSION 2
234
+ #define GGUF_MAGIC "GGUF"
235
+
236
+ #define GGUF_VERSION 3
236
237
 
237
238
  #define GGUF_DEFAULT_ALIGNMENT 32
238
239
 
@@ -46,7 +46,7 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
46
46
  #if defined(_MSC_VER) || defined(__MINGW32__)
47
47
  #include <intrin.h>
48
48
  #else
49
- #if !defined(__riscv)
49
+ #if !defined(__riscv) && !defined(__s390__)
50
50
  #include <immintrin.h>
51
51
  #endif
52
52
  #endif
@@ -462,12 +462,9 @@ void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
462
462
  }
463
463
 
464
464
  size_t ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
465
- const int nb = k / QK_K;
466
-
467
- // TODO - collect histograms - although, at a second thought, I don't really care about them
468
- (void)hist;
465
+ (void)hist; // TODO: collect histograms
469
466
 
470
- for (int j = 0; j < nb; j += k) {
467
+ for (int j = 0; j < n; j += k) {
471
468
  block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
472
469
  quantize_row_q2_K_reference(src + j, y, k);
473
470
  }
@@ -678,12 +675,9 @@ void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
678
675
  }
679
676
 
680
677
  size_t ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
681
- const int nb = k / QK_K;
682
-
683
- // TODO - collect histograms - although, at a second thought, I don't really care about them
684
- (void)hist;
678
+ (void)hist; // TODO: collect histograms
685
679
 
686
- for (int j = 0; j < nb; j += k) {
680
+ for (int j = 0; j < n; j += k) {
687
681
  block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
688
682
  quantize_row_q3_K_reference(src + j, y, k);
689
683
  }
@@ -846,9 +840,9 @@ void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
846
840
 
847
841
  size_t ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
848
842
  assert(k % QK_K == 0);
849
- const int nb = k / QK_K;
850
843
  (void)hist; // TODO: collect histograms
851
- for (int j = 0; j < nb; j += k) {
844
+
845
+ for (int j = 0; j < n; j += k) {
852
846
  block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
853
847
  quantize_row_q4_K_reference(src + j, y, k);
854
848
  }
@@ -1052,9 +1046,9 @@ void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
1052
1046
 
1053
1047
  size_t ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
1054
1048
  assert(k % QK_K == 0);
1055
- const int nb = k / QK_K;
1056
- (void)hist;
1057
- for (int j = 0; j < nb; j += k) {
1049
+ (void)hist; // TODO: collect histograms
1050
+
1051
+ for (int j = 0; j < n; j += k) {
1058
1052
  block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
1059
1053
  quantize_row_q5_K_reference(src + j, y, k);
1060
1054
  }
@@ -1200,11 +1194,9 @@ void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
1200
1194
 
1201
1195
  size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
1202
1196
  assert(k % QK_K == 0);
1203
- const int nb = k / QK_K;
1204
-
1205
- (void)hist; // TODO
1197
+ (void)hist; // TODO: collect histograms
1206
1198
 
1207
- for (int j = 0; j < nb; j += k) {
1199
+ for (int j = 0; j < n; j += k) {
1208
1200
  block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
1209
1201
  quantize_row_q6_K_reference(src + j, y, k);
1210
1202
  }