llama_cpp 0.6.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -29,7 +29,7 @@
29
29
 
30
30
  // 2-bit quantization
31
31
  // weight is represented as x = a * q + b
32
- // 16 blocks of 16 elemenets each
32
+ // 16 blocks of 16 elements each
33
33
  // Effectively 2.5625 bits per weight
34
34
  typedef struct {
35
35
  uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
@@ -41,7 +41,7 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "w
41
41
 
42
42
  // 3-bit quantization
43
43
  // weight is represented as x = a * q
44
- // 16 blocks of 16 elemenets each
44
+ // 16 blocks of 16 elements each
45
45
  // Effectively 3.4375 bits per weight
46
46
  #ifdef GGML_QKK_64
47
47
  typedef struct {
@@ -62,7 +62,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 +
62
62
  #endif
63
63
 
64
64
  // 4-bit quantization
65
- // 16 blocks of 32 elements each
65
+ // 8 blocks of 32 elements each
66
66
  // weight is represented as x = a * q + b
67
67
  // Effectively 4.5 bits per weight
68
68
  #ifdef GGML_QKK_64
@@ -83,7 +83,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
83
83
  #endif
84
84
 
85
85
  // 5-bit quantization
86
- // 16 blocks of 32 elements each
86
+ // 8 blocks of 32 elements each
87
87
  // weight is represented as x = a * q + b
88
88
  // Effectively 5.5 bits per weight
89
89
  #ifdef GGML_QKK_64
@@ -107,7 +107,7 @@ static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
107
107
 
108
108
  // 6-bit quantization
109
109
  // weight is represented as x = a * q
110
- // 16 blocks of 16 elemenets each
110
+ // 16 blocks of 16 elements each
111
111
  // Effectively 6.5625 bits per weight
112
112
  typedef struct {
113
113
  uint8_t ql[QK_K/2]; // quants, lower 4 bits