llama_cpp 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,7 @@
29
29
 
30
30
  // 2-bit quantization
31
31
  // weight is represented as x = a * q + b
32
- // 16 blocks of 16 elemenets each
32
+ // 16 blocks of 16 elements each
33
33
  // Effectively 2.5625 bits per weight
34
34
  typedef struct {
35
35
  uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
@@ -41,7 +41,7 @@ static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "w
41
41
 
42
42
  // 3-bit quantization
43
43
  // weight is represented as x = a * q
44
- // 16 blocks of 16 elemenets each
44
+ // 16 blocks of 16 elements each
45
45
  // Effectively 3.4375 bits per weight
46
46
  #ifdef GGML_QKK_64
47
47
  typedef struct {
@@ -62,7 +62,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 +
62
62
  #endif
63
63
 
64
64
  // 4-bit quantization
65
- // 16 blocks of 32 elements each
65
+ // 8 blocks of 32 elements each
66
66
  // weight is represented as x = a * q + b
67
67
  // Effectively 4.5 bits per weight
68
68
  #ifdef GGML_QKK_64
@@ -83,7 +83,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
83
83
  #endif
84
84
 
85
85
  // 5-bit quantization
86
- // 16 blocks of 32 elements each
86
+ // 8 blocks of 32 elements each
87
87
  // weight is represented as x = a * q + b
88
88
  // Effectively 5.5 bits per weight
89
89
  #ifdef GGML_QKK_64
@@ -107,7 +107,7 @@ static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/
107
107
 
108
108
  // 6-bit quantization
109
109
  // weight is represented as x = a * q
110
- // 16 blocks of 16 elemenets each
110
+ // 16 blocks of 16 elements each
111
111
  // Effectively 6.5625 bits per weight
112
112
  typedef struct {
113
113
  uint8_t ql[QK_K/2]; // quants, lower 4 bits