llama_cpp 0.14.4 → 0.14.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/examples/chat.rb +2 -4
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +23 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +10 -0
- data/vendor/tmp/llama.cpp/LICENSE +1 -1
- data/vendor/tmp/llama.cpp/Makefile +11 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +7 -3
- data/vendor/tmp/llama.cpp/ggml-quants.c +155 -155
- data/vendor/tmp/llama.cpp/ggml-quants.h +82 -82
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +878 -216
- data/vendor/tmp/llama.cpp/ggml.c +8 -8
- data/vendor/tmp/llama.cpp/ggml.h +7 -7
- data/vendor/tmp/llama.cpp/llama.cpp +686 -124
- data/vendor/tmp/llama.cpp/llama.h +81 -13
- metadata +2 -2
@@ -544,7 +544,7 @@ static const uint64_t table_b2b_1[1 << 8] = { B8(10, 00) }; // (!b) << 4
|
|
544
544
|
#endif
|
545
545
|
|
546
546
|
// reference implementation for deterministic creation of model files
|
547
|
-
void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y,
|
547
|
+
void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int64_t k) {
|
548
548
|
static const int qk = QK4_0;
|
549
549
|
|
550
550
|
assert(k % qk == 0);
|
@@ -581,12 +581,12 @@ void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict
|
|
581
581
|
}
|
582
582
|
}
|
583
583
|
|
584
|
-
void quantize_row_q4_0(const float * restrict x, void * restrict y,
|
584
|
+
void quantize_row_q4_0(const float * restrict x, void * restrict y, int64_t k) {
|
585
585
|
quantize_row_q4_0_reference(x, y, k);
|
586
586
|
}
|
587
587
|
|
588
588
|
|
589
|
-
void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y,
|
589
|
+
void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int64_t k) {
|
590
590
|
const int qk = QK4_1;
|
591
591
|
|
592
592
|
assert(k % qk == 0);
|
@@ -623,11 +623,11 @@ void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict
|
|
623
623
|
}
|
624
624
|
}
|
625
625
|
|
626
|
-
void quantize_row_q4_1(const float * restrict x, void * restrict y,
|
626
|
+
void quantize_row_q4_1(const float * restrict x, void * restrict y, int64_t k) {
|
627
627
|
quantize_row_q4_1_reference(x, y, k);
|
628
628
|
}
|
629
629
|
|
630
|
-
void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y,
|
630
|
+
void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int64_t k) {
|
631
631
|
static const int qk = QK5_0;
|
632
632
|
|
633
633
|
assert(k % qk == 0);
|
@@ -671,11 +671,11 @@ void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict
|
|
671
671
|
}
|
672
672
|
}
|
673
673
|
|
674
|
-
void quantize_row_q5_0(const float * restrict x, void * restrict y,
|
674
|
+
void quantize_row_q5_0(const float * restrict x, void * restrict y, int64_t k) {
|
675
675
|
quantize_row_q5_0_reference(x, y, k);
|
676
676
|
}
|
677
677
|
|
678
|
-
void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y,
|
678
|
+
void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int64_t k) {
|
679
679
|
const int qk = QK5_1;
|
680
680
|
|
681
681
|
assert(k % qk == 0);
|
@@ -719,12 +719,12 @@ void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict
|
|
719
719
|
}
|
720
720
|
}
|
721
721
|
|
722
|
-
void quantize_row_q5_1(const float * restrict x, void * restrict y,
|
722
|
+
void quantize_row_q5_1(const float * restrict x, void * restrict y, int64_t k) {
|
723
723
|
quantize_row_q5_1_reference(x, y, k);
|
724
724
|
}
|
725
725
|
|
726
726
|
// reference implementation for deterministic creation of model files
|
727
|
-
void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y,
|
727
|
+
void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int64_t k) {
|
728
728
|
assert(k % QK8_0 == 0);
|
729
729
|
const int nb = k / QK8_0;
|
730
730
|
|
@@ -749,7 +749,7 @@ void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict
|
|
749
749
|
}
|
750
750
|
}
|
751
751
|
|
752
|
-
void quantize_row_q8_0(const float * restrict x, void * restrict vy,
|
752
|
+
void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k) {
|
753
753
|
assert(QK8_0 == 32);
|
754
754
|
assert(k % QK8_0 == 0);
|
755
755
|
const int nb = k / QK8_0;
|
@@ -938,7 +938,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
|
|
938
938
|
}
|
939
939
|
|
940
940
|
// reference implementation for deterministic creation of model files
|
941
|
-
void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y,
|
941
|
+
void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int64_t k) {
|
942
942
|
assert(QK8_1 == 32);
|
943
943
|
assert(k % QK8_1 == 0);
|
944
944
|
const int nb = k / QK8_1;
|
@@ -973,7 +973,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
|
|
973
973
|
}
|
974
974
|
}
|
975
975
|
|
976
|
-
void quantize_row_q8_1(const float * restrict x, void * restrict vy,
|
976
|
+
void quantize_row_q8_1(const float * restrict x, void * restrict vy, int64_t k) {
|
977
977
|
assert(k % QK8_1 == 0);
|
978
978
|
const int nb = k / QK8_1;
|
979
979
|
|
@@ -1192,7 +1192,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1192
1192
|
#endif
|
1193
1193
|
}
|
1194
1194
|
|
1195
|
-
void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y,
|
1195
|
+
void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int64_t k) {
|
1196
1196
|
static const int qk = QK4_0;
|
1197
1197
|
|
1198
1198
|
assert(k % qk == 0);
|
@@ -1212,7 +1212,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
|
|
1212
1212
|
}
|
1213
1213
|
}
|
1214
1214
|
|
1215
|
-
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y,
|
1215
|
+
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int64_t k) {
|
1216
1216
|
static const int qk = QK4_1;
|
1217
1217
|
|
1218
1218
|
assert(k % qk == 0);
|
@@ -1233,7 +1233,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
|
|
1233
1233
|
}
|
1234
1234
|
}
|
1235
1235
|
|
1236
|
-
void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y,
|
1236
|
+
void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int64_t k) {
|
1237
1237
|
static const int qk = QK5_0;
|
1238
1238
|
|
1239
1239
|
assert(k % qk == 0);
|
@@ -1259,7 +1259,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int
|
|
1259
1259
|
}
|
1260
1260
|
}
|
1261
1261
|
|
1262
|
-
void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y,
|
1262
|
+
void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int64_t k) {
|
1263
1263
|
static const int qk = QK5_1;
|
1264
1264
|
|
1265
1265
|
assert(k % qk == 0);
|
@@ -1286,7 +1286,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int
|
|
1286
1286
|
}
|
1287
1287
|
}
|
1288
1288
|
|
1289
|
-
void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y,
|
1289
|
+
void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int64_t k) {
|
1290
1290
|
static const int qk = QK8_0;
|
1291
1291
|
|
1292
1292
|
assert(k % qk == 0);
|
@@ -1581,7 +1581,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
|
|
1581
1581
|
|
1582
1582
|
//========================- 2-bit (de)-quantization
|
1583
1583
|
|
1584
|
-
void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y,
|
1584
|
+
void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int64_t k) {
|
1585
1585
|
assert(k % QK_K == 0);
|
1586
1586
|
const int nb = k / QK_K;
|
1587
1587
|
|
@@ -1658,7 +1658,7 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
|
|
1658
1658
|
}
|
1659
1659
|
}
|
1660
1660
|
|
1661
|
-
void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y,
|
1661
|
+
void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int64_t k) {
|
1662
1662
|
assert(k % QK_K == 0);
|
1663
1663
|
const int nb = k / QK_K;
|
1664
1664
|
|
@@ -1704,7 +1704,7 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int
|
|
1704
1704
|
}
|
1705
1705
|
}
|
1706
1706
|
|
1707
|
-
void quantize_row_q2_K(const float * restrict x, void * restrict vy,
|
1707
|
+
void quantize_row_q2_K(const float * restrict x, void * restrict vy, int64_t k) {
|
1708
1708
|
quantize_row_q2_K_reference(x, vy, k);
|
1709
1709
|
}
|
1710
1710
|
|
@@ -1960,14 +1960,14 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
1960
1960
|
}
|
1961
1961
|
}
|
1962
1962
|
|
1963
|
-
size_t quantize_q2_K(const float * restrict src, void * restrict dst,
|
1963
|
+
size_t quantize_q2_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
1964
1964
|
size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
|
1965
1965
|
if (!quant_weights) {
|
1966
|
-
quantize_row_q2_K_reference(src, dst, nrow*n_per_row);
|
1966
|
+
quantize_row_q2_K_reference(src, dst, (int64_t)nrow*n_per_row);
|
1967
1967
|
}
|
1968
1968
|
else {
|
1969
1969
|
char * qrow = (char *)dst;
|
1970
|
-
for (
|
1970
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
1971
1971
|
quantize_row_q2_K_impl(src, (block_q2_K*)qrow, n_per_row, quant_weights);
|
1972
1972
|
src += n_per_row;
|
1973
1973
|
qrow += row_size;
|
@@ -1978,7 +1978,7 @@ size_t quantize_q2_K(const float * restrict src, void * restrict dst, int nrow,
|
|
1978
1978
|
|
1979
1979
|
//========================= 3-bit (de)-quantization
|
1980
1980
|
|
1981
|
-
void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y,
|
1981
|
+
void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int64_t k) {
|
1982
1982
|
assert(k % QK_K == 0);
|
1983
1983
|
const int nb = k / QK_K;
|
1984
1984
|
|
@@ -2092,7 +2092,7 @@ void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict
|
|
2092
2092
|
}
|
2093
2093
|
|
2094
2094
|
#if QK_K == 256
|
2095
|
-
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y,
|
2095
|
+
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int64_t k) {
|
2096
2096
|
assert(k % QK_K == 0);
|
2097
2097
|
const int nb = k / QK_K;
|
2098
2098
|
|
@@ -2142,7 +2142,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
|
|
2142
2142
|
}
|
2143
2143
|
}
|
2144
2144
|
#else
|
2145
|
-
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y,
|
2145
|
+
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int64_t k) {
|
2146
2146
|
assert(k % QK_K == 0);
|
2147
2147
|
assert(QK_K == 64);
|
2148
2148
|
const int nb = k / QK_K;
|
@@ -2175,11 +2175,11 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
|
|
2175
2175
|
}
|
2176
2176
|
#endif
|
2177
2177
|
|
2178
|
-
void quantize_row_q3_K(const float * restrict x, void * restrict vy,
|
2178
|
+
void quantize_row_q3_K(const float * restrict x, void * restrict vy, int64_t k) {
|
2179
2179
|
quantize_row_q3_K_reference(x, vy, k);
|
2180
2180
|
}
|
2181
2181
|
|
2182
|
-
static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restrict y,
|
2182
|
+
static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restrict y, int64_t n_per_row, const float * restrict quant_weights) {
|
2183
2183
|
#if QK_K != 256
|
2184
2184
|
(void)quant_weights;
|
2185
2185
|
quantize_row_q3_K_reference(x, y, n_per_row);
|
@@ -2268,14 +2268,14 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
|
|
2268
2268
|
#endif
|
2269
2269
|
}
|
2270
2270
|
|
2271
|
-
size_t quantize_q3_K(const float * restrict src, void * restrict dst,
|
2271
|
+
size_t quantize_q3_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2272
2272
|
size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
|
2273
2273
|
if (!quant_weights) {
|
2274
|
-
quantize_row_q3_K_reference(src, dst, nrow*n_per_row);
|
2274
|
+
quantize_row_q3_K_reference(src, dst, (int64_t)nrow*n_per_row);
|
2275
2275
|
}
|
2276
2276
|
else {
|
2277
2277
|
char * qrow = (char *)dst;
|
2278
|
-
for (
|
2278
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
2279
2279
|
quantize_row_q3_K_impl(src, (block_q3_K*)qrow, n_per_row, quant_weights);
|
2280
2280
|
src += n_per_row;
|
2281
2281
|
qrow += row_size;
|
@@ -2286,7 +2286,7 @@ size_t quantize_q3_K(const float * restrict src, void * restrict dst, int nrow,
|
|
2286
2286
|
|
2287
2287
|
// ====================== 4-bit (de)-quantization
|
2288
2288
|
|
2289
|
-
void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y,
|
2289
|
+
void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int64_t k) {
|
2290
2290
|
assert(k % QK_K == 0);
|
2291
2291
|
const int nb = k / QK_K;
|
2292
2292
|
|
@@ -2393,7 +2393,7 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
|
|
2393
2393
|
}
|
2394
2394
|
}
|
2395
2395
|
|
2396
|
-
void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y,
|
2396
|
+
void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int64_t k) {
|
2397
2397
|
assert(k % QK_K == 0);
|
2398
2398
|
const int nb = k / QK_K;
|
2399
2399
|
|
@@ -2432,19 +2432,19 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int
|
|
2432
2432
|
}
|
2433
2433
|
}
|
2434
2434
|
|
2435
|
-
void quantize_row_q4_K(const float * restrict x, void * restrict vy,
|
2435
|
+
void quantize_row_q4_K(const float * restrict x, void * restrict vy, int64_t k) {
|
2436
2436
|
assert(k % QK_K == 0);
|
2437
2437
|
block_q4_K * restrict y = vy;
|
2438
2438
|
quantize_row_q4_K_reference(x, y, k);
|
2439
2439
|
}
|
2440
2440
|
|
2441
|
-
static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restrict y,
|
2441
|
+
static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restrict y, int64_t n_per_row, const float * quant_weights) {
|
2442
2442
|
#if QK_K != 256
|
2443
2443
|
(void)quant_weights;
|
2444
2444
|
quantize_row_q4_K_reference(x, y, n_per_row);
|
2445
2445
|
#else
|
2446
2446
|
assert(n_per_row % QK_K == 0);
|
2447
|
-
const
|
2447
|
+
const int64_t nb = n_per_row / QK_K;
|
2448
2448
|
|
2449
2449
|
uint8_t L[QK_K];
|
2450
2450
|
uint8_t Laux[32];
|
@@ -2516,14 +2516,14 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri
|
|
2516
2516
|
#endif
|
2517
2517
|
}
|
2518
2518
|
|
2519
|
-
size_t quantize_q4_K(const float * restrict src, void * restrict dst,
|
2519
|
+
size_t quantize_q4_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2520
2520
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
|
2521
2521
|
if (!quant_weights) {
|
2522
|
-
quantize_row_q4_K_reference(src, dst, nrow*n_per_row);
|
2522
|
+
quantize_row_q4_K_reference(src, dst, (int64_t)nrow*n_per_row);
|
2523
2523
|
}
|
2524
2524
|
else {
|
2525
2525
|
char * qrow = (char *)dst;
|
2526
|
-
for (
|
2526
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
2527
2527
|
quantize_row_q4_K_impl(src, (block_q4_K*)qrow, n_per_row, quant_weights);
|
2528
2528
|
src += n_per_row;
|
2529
2529
|
qrow += row_size;
|
@@ -2534,9 +2534,9 @@ size_t quantize_q4_K(const float * restrict src, void * restrict dst, int nrow,
|
|
2534
2534
|
|
2535
2535
|
// ====================== 5-bit (de)-quantization
|
2536
2536
|
|
2537
|
-
void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y,
|
2537
|
+
void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int64_t k) {
|
2538
2538
|
assert(k % QK_K == 0);
|
2539
|
-
const
|
2539
|
+
const int64_t nb = k / QK_K;
|
2540
2540
|
|
2541
2541
|
#if QK_K == 256
|
2542
2542
|
uint8_t L[QK_K];
|
@@ -2676,9 +2676,9 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
|
|
2676
2676
|
}
|
2677
2677
|
}
|
2678
2678
|
|
2679
|
-
void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y,
|
2679
|
+
void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int64_t k) {
|
2680
2680
|
assert(k % QK_K == 0);
|
2681
|
-
const
|
2681
|
+
const int64_t nb = k / QK_K;
|
2682
2682
|
|
2683
2683
|
for (int i = 0; i < nb; i++) {
|
2684
2684
|
|
@@ -2721,19 +2721,19 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int
|
|
2721
2721
|
}
|
2722
2722
|
}
|
2723
2723
|
|
2724
|
-
void quantize_row_q5_K(const float * restrict x, void * restrict vy,
|
2724
|
+
void quantize_row_q5_K(const float * restrict x, void * restrict vy, int64_t k) {
|
2725
2725
|
assert(k % QK_K == 0);
|
2726
2726
|
block_q5_K * restrict y = vy;
|
2727
2727
|
quantize_row_q5_K_reference(x, y, k);
|
2728
2728
|
}
|
2729
2729
|
|
2730
|
-
static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restrict y,
|
2730
|
+
static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restrict y, int64_t n_per_row, const float * quant_weights) {
|
2731
2731
|
#if QK_K != 256
|
2732
2732
|
(void)quant_weights;
|
2733
2733
|
quantize_row_q5_K_reference(x, y, n_per_row);
|
2734
2734
|
#else
|
2735
2735
|
assert(n_per_row % QK_K == 0);
|
2736
|
-
const
|
2736
|
+
const int64_t nb = n_per_row / QK_K;
|
2737
2737
|
|
2738
2738
|
uint8_t L[QK_K];
|
2739
2739
|
uint8_t Laux[32];
|
@@ -2825,14 +2825,14 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
2825
2825
|
#endif
|
2826
2826
|
}
|
2827
2827
|
|
2828
|
-
size_t quantize_q5_K(const float * restrict src, void * restrict dst,
|
2828
|
+
size_t quantize_q5_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
2829
2829
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
|
2830
2830
|
if (!quant_weights) {
|
2831
|
-
quantize_row_q5_K_reference(src, dst, nrow*n_per_row);
|
2831
|
+
quantize_row_q5_K_reference(src, dst, (int64_t)nrow*n_per_row);
|
2832
2832
|
}
|
2833
2833
|
else {
|
2834
2834
|
char * qrow = (char *)dst;
|
2835
|
-
for (
|
2835
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
2836
2836
|
quantize_row_q5_K_impl(src, (block_q5_K*)qrow, n_per_row, quant_weights);
|
2837
2837
|
src += n_per_row;
|
2838
2838
|
qrow += row_size;
|
@@ -2843,9 +2843,9 @@ size_t quantize_q5_K(const float * restrict src, void * restrict dst, int nrow,
|
|
2843
2843
|
|
2844
2844
|
// ====================== 6-bit (de)-quantization
|
2845
2845
|
|
2846
|
-
void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y,
|
2846
|
+
void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int64_t k) {
|
2847
2847
|
assert(k % QK_K == 0);
|
2848
|
-
const
|
2848
|
+
const int64_t nb = k / QK_K;
|
2849
2849
|
|
2850
2850
|
int8_t L[QK_K];
|
2851
2851
|
float scales[QK_K/16];
|
@@ -2925,9 +2925,9 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
|
|
2925
2925
|
}
|
2926
2926
|
}
|
2927
2927
|
|
2928
|
-
void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y,
|
2928
|
+
void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int64_t k) {
|
2929
2929
|
assert(k % QK_K == 0);
|
2930
|
-
const
|
2930
|
+
const int64_t nb = k / QK_K;
|
2931
2931
|
|
2932
2932
|
for (int i = 0; i < nb; i++) {
|
2933
2933
|
|
@@ -2972,19 +2972,19 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int
|
|
2972
2972
|
}
|
2973
2973
|
}
|
2974
2974
|
|
2975
|
-
void quantize_row_q6_K(const float * restrict x, void * restrict vy,
|
2975
|
+
void quantize_row_q6_K(const float * restrict x, void * restrict vy, int64_t k) {
|
2976
2976
|
assert(k % QK_K == 0);
|
2977
2977
|
block_q6_K * restrict y = vy;
|
2978
2978
|
quantize_row_q6_K_reference(x, y, k);
|
2979
2979
|
}
|
2980
2980
|
|
2981
|
-
static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restrict y,
|
2981
|
+
static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restrict y, int64_t n_per_row, const float * quant_weights) {
|
2982
2982
|
#if QK_K != 256
|
2983
2983
|
(void)quant_weights;
|
2984
2984
|
quantize_row_q6_K_reference(x, y, n_per_row);
|
2985
2985
|
#else
|
2986
2986
|
assert(n_per_row % QK_K == 0);
|
2987
|
-
const
|
2987
|
+
const int64_t nb = n_per_row / QK_K;
|
2988
2988
|
|
2989
2989
|
int8_t L[QK_K];
|
2990
2990
|
float scales[QK_K/16];
|
@@ -3067,14 +3067,14 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
3067
3067
|
#endif
|
3068
3068
|
}
|
3069
3069
|
|
3070
|
-
size_t quantize_q6_K(const float * restrict src, void * restrict dst,
|
3070
|
+
size_t quantize_q6_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3071
3071
|
size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
|
3072
3072
|
if (!quant_weights) {
|
3073
|
-
quantize_row_q6_K_reference(src, dst, nrow*n_per_row);
|
3073
|
+
quantize_row_q6_K_reference(src, dst, (int64_t)nrow*n_per_row);
|
3074
3074
|
}
|
3075
3075
|
else {
|
3076
3076
|
char * qrow = (char *)dst;
|
3077
|
-
for (
|
3077
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
3078
3078
|
quantize_row_q6_K_impl(src, (block_q6_K*)qrow, n_per_row, quant_weights);
|
3079
3079
|
src += n_per_row;
|
3080
3080
|
qrow += row_size;
|
@@ -3083,7 +3083,7 @@ size_t quantize_q6_K(const float * restrict src, void * restrict dst, int nrow,
|
|
3083
3083
|
return nrow * row_size;
|
3084
3084
|
}
|
3085
3085
|
|
3086
|
-
static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restrict y,
|
3086
|
+
static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restrict y, int64_t n_per_row, const float * quant_weights) {
|
3087
3087
|
static_assert(QK4_0 == 32, "QK4_0 must be 32");
|
3088
3088
|
|
3089
3089
|
if (!quant_weights) {
|
@@ -3098,7 +3098,7 @@ static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restri
|
|
3098
3098
|
for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
|
3099
3099
|
float sigma2 = sum_x2/n_per_row;
|
3100
3100
|
|
3101
|
-
const
|
3101
|
+
const int64_t nb = n_per_row/QK4_0;
|
3102
3102
|
for (int ib = 0; ib < nb; ++ib) {
|
3103
3103
|
const float * xb = x + QK4_0 * ib;
|
3104
3104
|
const float * qw = quant_weights + QK4_0 * ib;
|
@@ -3111,14 +3111,14 @@ static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restri
|
|
3111
3111
|
}
|
3112
3112
|
}
|
3113
3113
|
|
3114
|
-
size_t quantize_q4_0(const float * restrict src, void * restrict dst,
|
3114
|
+
size_t quantize_q4_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3115
3115
|
if (!quant_weights) {
|
3116
|
-
quantize_row_q4_0_reference(src, dst, nrow*n_per_row);
|
3116
|
+
quantize_row_q4_0_reference(src, dst, (int64_t)nrow*n_per_row);
|
3117
3117
|
return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
3118
3118
|
}
|
3119
3119
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
3120
3120
|
char * qrow = (char *)dst;
|
3121
|
-
for (
|
3121
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
3122
3122
|
quantize_row_q4_0_impl(src, (block_q4_0*)qrow, n_per_row, quant_weights);
|
3123
3123
|
src += n_per_row;
|
3124
3124
|
qrow += row_size;
|
@@ -3126,7 +3126,7 @@ size_t quantize_q4_0(const float * restrict src, void * restrict dst, int nrow,
|
|
3126
3126
|
return nrow * row_size;
|
3127
3127
|
}
|
3128
3128
|
|
3129
|
-
static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restrict y,
|
3129
|
+
static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restrict y, int64_t n_per_row, const float * quant_weights) {
|
3130
3130
|
static_assert(QK4_1 == 32, "QK4_1 must be 32");
|
3131
3131
|
|
3132
3132
|
if (!quant_weights) {
|
@@ -3141,7 +3141,7 @@ static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restri
|
|
3141
3141
|
for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
|
3142
3142
|
float sigma2 = sum_x2/n_per_row;
|
3143
3143
|
|
3144
|
-
const
|
3144
|
+
const int64_t nb = n_per_row/QK4_1;
|
3145
3145
|
for (int ib = 0; ib < nb; ++ib) {
|
3146
3146
|
const float * xb = x + QK4_1 * ib;
|
3147
3147
|
const float * qw = quant_weights + QK4_1 * ib;
|
@@ -3156,14 +3156,14 @@ static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restri
|
|
3156
3156
|
}
|
3157
3157
|
}
|
3158
3158
|
|
3159
|
-
size_t quantize_q4_1(const float * restrict src, void * restrict dst,
|
3159
|
+
size_t quantize_q4_1(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3160
3160
|
if (!quant_weights) {
|
3161
|
-
quantize_row_q4_1_reference(src, dst, nrow*n_per_row);
|
3161
|
+
quantize_row_q4_1_reference(src, dst, (int64_t)nrow*n_per_row);
|
3162
3162
|
return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
3163
3163
|
}
|
3164
3164
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
3165
3165
|
char * qrow = (char *)dst;
|
3166
|
-
for (
|
3166
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
3167
3167
|
quantize_row_q4_1_impl(src, (block_q4_1*)qrow, n_per_row, quant_weights);
|
3168
3168
|
src += n_per_row;
|
3169
3169
|
qrow += row_size;
|
@@ -3171,7 +3171,7 @@ size_t quantize_q4_1(const float * restrict src, void * restrict dst, int nrow,
|
|
3171
3171
|
return nrow * row_size;
|
3172
3172
|
}
|
3173
3173
|
|
3174
|
-
static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restrict y,
|
3174
|
+
static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restrict y, int64_t n_per_row, const float * quant_weights) {
|
3175
3175
|
static_assert(QK5_0 == 32, "QK5_0 must be 32");
|
3176
3176
|
|
3177
3177
|
if (!quant_weights) {
|
@@ -3186,7 +3186,7 @@ static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restri
|
|
3186
3186
|
for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
|
3187
3187
|
float sigma2 = sum_x2/n_per_row;
|
3188
3188
|
|
3189
|
-
const
|
3189
|
+
const int64_t nb = n_per_row/QK5_0;
|
3190
3190
|
for (int ib = 0; ib < nb; ++ib) {
|
3191
3191
|
const float * xb = x + QK5_0 * ib;
|
3192
3192
|
const float * qw = quant_weights + QK5_0 * ib;
|
@@ -3210,14 +3210,14 @@ static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restri
|
|
3210
3210
|
}
|
3211
3211
|
}
|
3212
3212
|
|
3213
|
-
size_t quantize_q5_0(const float * restrict src, void * restrict dst,
|
3213
|
+
size_t quantize_q5_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3214
3214
|
if (!quant_weights) {
|
3215
|
-
quantize_row_q5_0_reference(src, dst, nrow*n_per_row);
|
3215
|
+
quantize_row_q5_0_reference(src, dst, (int64_t)nrow*n_per_row);
|
3216
3216
|
return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
3217
3217
|
}
|
3218
3218
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
3219
3219
|
char * qrow = (char *)dst;
|
3220
|
-
for (
|
3220
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
3221
3221
|
quantize_row_q5_0_impl(src, (block_q5_0*)qrow, n_per_row, quant_weights);
|
3222
3222
|
src += n_per_row;
|
3223
3223
|
qrow += row_size;
|
@@ -3225,7 +3225,7 @@ size_t quantize_q5_0(const float * restrict src, void * restrict dst, int nrow,
|
|
3225
3225
|
return nrow * row_size;
|
3226
3226
|
}
|
3227
3227
|
|
3228
|
-
static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restrict y,
|
3228
|
+
static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restrict y, int64_t n_per_row, const float * quant_weights) {
|
3229
3229
|
static_assert(QK5_1 == 32, "QK5_1 must be 32");
|
3230
3230
|
|
3231
3231
|
if (!quant_weights) {
|
@@ -3240,7 +3240,7 @@ static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restri
|
|
3240
3240
|
for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
|
3241
3241
|
float sigma2 = sum_x2/n_per_row;
|
3242
3242
|
|
3243
|
-
const
|
3243
|
+
const int64_t nb = n_per_row/QK5_1;
|
3244
3244
|
for (int ib = 0; ib < nb; ++ib) {
|
3245
3245
|
const float * xb = x + QK5_1 * ib;
|
3246
3246
|
const float * qw = quant_weights + QK5_1 * ib;
|
@@ -3263,14 +3263,14 @@ static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restri
|
|
3263
3263
|
}
|
3264
3264
|
}
|
3265
3265
|
|
3266
|
-
size_t quantize_q5_1(const float * restrict src, void * restrict dst,
|
3266
|
+
size_t quantize_q5_1(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3267
3267
|
if (!quant_weights) {
|
3268
|
-
quantize_row_q5_1_reference(src, dst, nrow*n_per_row);
|
3268
|
+
quantize_row_q5_1_reference(src, dst, (int64_t)nrow*n_per_row);
|
3269
3269
|
return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
3270
3270
|
}
|
3271
3271
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
3272
3272
|
char * qrow = (char *)dst;
|
3273
|
-
for (
|
3273
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
3274
3274
|
quantize_row_q5_1_impl(src, (block_q5_1*)qrow, n_per_row, quant_weights);
|
3275
3275
|
src += n_per_row;
|
3276
3276
|
qrow += row_size;
|
@@ -3278,18 +3278,18 @@ size_t quantize_q5_1(const float * restrict src, void * restrict dst, int nrow,
|
|
3278
3278
|
return nrow * row_size;
|
3279
3279
|
}
|
3280
3280
|
|
3281
|
-
size_t quantize_q8_0(const float * restrict src, void * restrict dst,
|
3281
|
+
size_t quantize_q8_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
3282
3282
|
(void)quant_weights; // not used
|
3283
3283
|
const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
|
3284
|
-
quantize_row_q8_0_reference(src, dst, nrow*n_per_row);
|
3284
|
+
quantize_row_q8_0_reference(src, dst, (int64_t)nrow*n_per_row);
|
3285
3285
|
return nrow * row_size;
|
3286
3286
|
}
|
3287
3287
|
|
3288
3288
|
// ====================== "True" 2-bit (de)-quantization
|
3289
3289
|
|
3290
|
-
void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y,
|
3290
|
+
void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y, int64_t k) {
|
3291
3291
|
assert(k % QK_K == 0);
|
3292
|
-
const
|
3292
|
+
const int64_t nb = k / QK_K;
|
3293
3293
|
|
3294
3294
|
uint32_t aux32[2];
|
3295
3295
|
const uint8_t * aux8 = (const uint8_t *)aux32;
|
@@ -3315,9 +3315,9 @@ void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y
|
|
3315
3315
|
|
3316
3316
|
// ====================== 2.3125 bpw (de)-quantization
|
3317
3317
|
|
3318
|
-
void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y,
|
3318
|
+
void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y, int64_t k) {
|
3319
3319
|
assert(k % QK_K == 0);
|
3320
|
-
const
|
3320
|
+
const int64_t nb = k / QK_K;
|
3321
3321
|
|
3322
3322
|
float db[2];
|
3323
3323
|
|
@@ -3342,9 +3342,9 @@ void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y,
|
|
3342
3342
|
|
3343
3343
|
// ====================== 2.5625 bpw (de)-quantization
|
3344
3344
|
|
3345
|
-
void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y,
|
3345
|
+
void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y, int64_t k) {
|
3346
3346
|
assert(k % QK_K == 0);
|
3347
|
-
const
|
3347
|
+
const int64_t nb = k / QK_K;
|
3348
3348
|
|
3349
3349
|
float db[2];
|
3350
3350
|
|
@@ -3374,9 +3374,9 @@ void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y, in
|
|
3374
3374
|
|
3375
3375
|
// ====================== 3.0625 bpw (de)-quantization
|
3376
3376
|
|
3377
|
-
void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y,
|
3377
|
+
void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y, int64_t k) {
|
3378
3378
|
assert(k % QK_K == 0);
|
3379
|
-
const
|
3379
|
+
const int64_t nb = k / QK_K;
|
3380
3380
|
|
3381
3381
|
uint32_t aux32;
|
3382
3382
|
|
@@ -3406,9 +3406,9 @@ void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y
|
|
3406
3406
|
|
3407
3407
|
// ====================== 3.3125 bpw (de)-quantization
|
3408
3408
|
|
3409
|
-
void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y,
|
3409
|
+
void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y, int64_t k) {
|
3410
3410
|
assert(k % QK_K == 0);
|
3411
|
-
const
|
3411
|
+
const int64_t nb = k / QK_K;
|
3412
3412
|
|
3413
3413
|
for (int i = 0; i < nb; i++) {
|
3414
3414
|
|
@@ -3449,9 +3449,9 @@ void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y, in
|
|
3449
3449
|
|
3450
3450
|
// ====================== 1.5625 bpw (de)-quantization
|
3451
3451
|
|
3452
|
-
void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y,
|
3452
|
+
void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, int64_t k) {
|
3453
3453
|
assert(k % QK_K == 0);
|
3454
|
-
const
|
3454
|
+
const int64_t nb = k / QK_K;
|
3455
3455
|
|
3456
3456
|
for (int i = 0; i < nb; i++) {
|
3457
3457
|
|
@@ -3474,9 +3474,9 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
|
|
3474
3474
|
}
|
3475
3475
|
}
|
3476
3476
|
|
3477
|
-
void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y,
|
3477
|
+
void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, int64_t k) {
|
3478
3478
|
assert(k % QK_K == 0);
|
3479
|
-
const
|
3479
|
+
const int64_t nb = k / QK_K;
|
3480
3480
|
|
3481
3481
|
float delta[4];
|
3482
3482
|
uint16_t idx[4];
|
@@ -3535,9 +3535,9 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in
|
|
3535
3535
|
|
3536
3536
|
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
3537
3537
|
|
3538
|
-
void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y,
|
3538
|
+
void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y, int64_t k) {
|
3539
3539
|
assert(k % QK4_NL == 0);
|
3540
|
-
const
|
3540
|
+
const int64_t nb = k / QK4_NL;
|
3541
3541
|
|
3542
3542
|
for (int i = 0; i < nb; i++) {
|
3543
3543
|
|
@@ -3553,12 +3553,12 @@ void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y,
|
|
3553
3553
|
}
|
3554
3554
|
}
|
3555
3555
|
|
3556
|
-
void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y,
|
3556
|
+
void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y, int64_t k) {
|
3557
3557
|
assert(k % QK_K == 0);
|
3558
3558
|
#if QK_K == 64
|
3559
3559
|
dequantize_row_iq4_nl((const block_iq4_nl *)x, y, k);
|
3560
3560
|
#else
|
3561
|
-
const
|
3561
|
+
const int64_t nb = k / QK_K;
|
3562
3562
|
|
3563
3563
|
for (int i = 0; i < nb; i++) {
|
3564
3564
|
|
@@ -3582,9 +3582,9 @@ void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y,
|
|
3582
3582
|
|
3583
3583
|
//===================================== Q8_K ==============================================
|
3584
3584
|
|
3585
|
-
void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y,
|
3585
|
+
void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int64_t k) {
|
3586
3586
|
assert(k % QK_K == 0);
|
3587
|
-
const
|
3587
|
+
const int64_t nb = k / QK_K;
|
3588
3588
|
|
3589
3589
|
for (int i = 0; i < nb; i++) {
|
3590
3590
|
|
@@ -3621,9 +3621,9 @@ void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict
|
|
3621
3621
|
}
|
3622
3622
|
}
|
3623
3623
|
|
3624
|
-
void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y,
|
3624
|
+
void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int64_t k) {
|
3625
3625
|
assert(k % QK_K == 0);
|
3626
|
-
const
|
3626
|
+
const int64_t nb = k / QK_K;
|
3627
3627
|
|
3628
3628
|
for (int i = 0; i < nb; i++) {
|
3629
3629
|
for (int j = 0; j < QK_K; ++j) {
|
@@ -3632,7 +3632,7 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int
|
|
3632
3632
|
}
|
3633
3633
|
}
|
3634
3634
|
|
3635
|
-
void quantize_row_q8_K(const float * restrict x, void * restrict y,
|
3635
|
+
void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) {
|
3636
3636
|
quantize_row_q8_K_reference(x, y, k);
|
3637
3637
|
}
|
3638
3638
|
|
@@ -10648,7 +10648,7 @@ static int iq2_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
10648
10648
|
return grid_index;
|
10649
10649
|
}
|
10650
10650
|
|
10651
|
-
static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict vy,
|
10651
|
+
static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
|
10652
10652
|
|
10653
10653
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
|
10654
10654
|
|
@@ -10664,7 +10664,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
|
|
10664
10664
|
|
10665
10665
|
const int kMaxQ = 3;
|
10666
10666
|
|
10667
|
-
const
|
10667
|
+
const int64_t nbl = n/QK_K;
|
10668
10668
|
|
10669
10669
|
block_iq2_xxs * y = vy;
|
10670
10670
|
|
@@ -10821,7 +10821,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
|
|
10821
10821
|
}
|
10822
10822
|
}
|
10823
10823
|
|
10824
|
-
static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict vy,
|
10824
|
+
static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
|
10825
10825
|
|
10826
10826
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
|
10827
10827
|
|
@@ -10837,7 +10837,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|
10837
10837
|
|
10838
10838
|
const int kMaxQ = 3;
|
10839
10839
|
|
10840
|
-
const
|
10840
|
+
const int64_t nbl = n/QK_K;
|
10841
10841
|
|
10842
10842
|
block_iq2_xs * y = vy;
|
10843
10843
|
|
@@ -11001,11 +11001,11 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|
11001
11001
|
}
|
11002
11002
|
}
|
11003
11003
|
|
11004
|
-
size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst,
|
11004
|
+
size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
11005
11005
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11006
|
-
|
11006
|
+
int64_t nblock = n_per_row/QK_K;
|
11007
11007
|
char * qrow = (char *)dst;
|
11008
|
-
for (
|
11008
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
11009
11009
|
quantize_row_iq2_xxs_impl(src, qrow, n_per_row, quant_weights);
|
11010
11010
|
src += n_per_row;
|
11011
11011
|
qrow += nblock*sizeof(block_iq2_xxs);
|
@@ -11013,11 +11013,11 @@ size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int nro
|
|
11013
11013
|
return nrow * nblock * sizeof(block_iq2_xxs);
|
11014
11014
|
}
|
11015
11015
|
|
11016
|
-
size_t quantize_iq2_xs(const float * restrict src, void * restrict dst,
|
11016
|
+
size_t quantize_iq2_xs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
11017
11017
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11018
|
-
|
11018
|
+
int64_t nblock = n_per_row/QK_K;
|
11019
11019
|
char * qrow = (char *)dst;
|
11020
|
-
for (
|
11020
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
11021
11021
|
quantize_row_iq2_xs_impl(src, qrow, n_per_row, quant_weights);
|
11022
11022
|
src += n_per_row;
|
11023
11023
|
qrow += nblock*sizeof(block_iq2_xs);
|
@@ -11242,7 +11242,7 @@ static int iq3_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
11242
11242
|
return grid_index;
|
11243
11243
|
}
|
11244
11244
|
|
11245
|
-
static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, void * restrict vy,
|
11245
|
+
static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, void * restrict vy, int64_t n,
|
11246
11246
|
const float * restrict quant_weights) {
|
11247
11247
|
|
11248
11248
|
const int gindex = iq3_data_index(grid_size);
|
@@ -11259,7 +11259,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|
11259
11259
|
|
11260
11260
|
const int kMaxQ = 8;
|
11261
11261
|
|
11262
|
-
const
|
11262
|
+
const int64_t nbl = n/QK_K;
|
11263
11263
|
|
11264
11264
|
ggml_fp16_t * dh;
|
11265
11265
|
uint8_t * qs;
|
@@ -11455,11 +11455,11 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|
11455
11455
|
}
|
11456
11456
|
}
|
11457
11457
|
|
11458
|
-
size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst,
|
11458
|
+
size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
11459
11459
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11460
|
-
|
11460
|
+
int64_t nblock = n_per_row/QK_K;
|
11461
11461
|
char * qrow = (char *)dst;
|
11462
|
-
for (
|
11462
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
11463
11463
|
quantize_row_iq3_xxs_impl(256, src, qrow, n_per_row, quant_weights);
|
11464
11464
|
src += n_per_row;
|
11465
11465
|
qrow += nblock*sizeof(block_iq3_xxs);
|
@@ -11467,13 +11467,13 @@ size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int nro
|
|
11467
11467
|
return nrow * nblock * sizeof(block_iq3_xxs);
|
11468
11468
|
}
|
11469
11469
|
|
11470
|
-
void quantize_row_iq3_xxs(const float * restrict x, void * restrict vy,
|
11470
|
+
void quantize_row_iq3_xxs(const float * restrict x, void * restrict vy, int64_t k) {
|
11471
11471
|
assert(k % QK_K == 0);
|
11472
11472
|
block_iq3_xxs * restrict y = vy;
|
11473
11473
|
quantize_row_iq3_xxs_reference(x, y, k);
|
11474
11474
|
}
|
11475
11475
|
|
11476
|
-
void quantize_row_iq3_xxs_reference(const float * restrict x, block_iq3_xxs * restrict y,
|
11476
|
+
void quantize_row_iq3_xxs_reference(const float * restrict x, block_iq3_xxs * restrict y, int64_t k) {
|
11477
11477
|
assert(k % QK_K == 0);
|
11478
11478
|
quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
|
11479
11479
|
}
|
@@ -11504,7 +11504,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|
11504
11504
|
|
11505
11505
|
const int kMaxQ = 8;
|
11506
11506
|
|
11507
|
-
const
|
11507
|
+
const int64_t nbl = n/QK_K;
|
11508
11508
|
|
11509
11509
|
block_iq3_s * y = vy;
|
11510
11510
|
|
@@ -11661,9 +11661,9 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|
11661
11661
|
}
|
11662
11662
|
|
11663
11663
|
#define IQ3S_BLOCK_SIZE 32
|
11664
|
-
size_t quantize_iq3_s(const float * restrict src, void * restrict dst,
|
11664
|
+
size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
11665
11665
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11666
|
-
|
11666
|
+
int64_t nblock = n_per_row/QK_K;
|
11667
11667
|
float scales[QK_K/IQ3S_BLOCK_SIZE];
|
11668
11668
|
float weight[IQ3S_BLOCK_SIZE];
|
11669
11669
|
float xval[IQ3S_BLOCK_SIZE];
|
@@ -11674,7 +11674,7 @@ size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int nrow,
|
|
11674
11674
|
bool is_on_grid_aux[IQ3S_BLOCK_SIZE/4];
|
11675
11675
|
uint8_t block_signs[IQ3S_BLOCK_SIZE/8];
|
11676
11676
|
char * qrow = (char *)dst;
|
11677
|
-
for (
|
11677
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
11678
11678
|
quantize_row_iq3_s_impl(IQ3S_BLOCK_SIZE, src, qrow, n_per_row, quant_weights,
|
11679
11679
|
scales, weight, xval, L, Laux, waux, is_on_grid, is_on_grid_aux, block_signs);
|
11680
11680
|
src += n_per_row;
|
@@ -11683,13 +11683,13 @@ size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int nrow,
|
|
11683
11683
|
return nrow * nblock * sizeof(block_iq3_s);
|
11684
11684
|
}
|
11685
11685
|
|
11686
|
-
void quantize_row_iq3_s(const float * restrict x, void * restrict vy,
|
11686
|
+
void quantize_row_iq3_s(const float * restrict x, void * restrict vy, int64_t k) {
|
11687
11687
|
assert(k % QK_K == 0);
|
11688
11688
|
block_iq3_s * restrict y = vy;
|
11689
11689
|
quantize_row_iq3_s_reference(x, y, k);
|
11690
11690
|
}
|
11691
11691
|
|
11692
|
-
void quantize_row_iq3_s_reference(const float * restrict x, block_iq3_s * restrict y,
|
11692
|
+
void quantize_row_iq3_s_reference(const float * restrict x, block_iq3_s * restrict y, int64_t k) {
|
11693
11693
|
assert(k % QK_K == 0);
|
11694
11694
|
quantize_iq3_s(x, y, 1, k, NULL);
|
11695
11695
|
}
|
@@ -11822,7 +11822,7 @@ static int iq1_sort_helper(const void * left, const void * right) {
|
|
11822
11822
|
|
11823
11823
|
#define IQ1S_BLOCK_SIZE 32
|
11824
11824
|
#define IQ1M_BLOCK_SIZE 16
|
11825
|
-
static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy,
|
11825
|
+
static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
|
11826
11826
|
float * scales,
|
11827
11827
|
float * weight,
|
11828
11828
|
float * sumx,
|
@@ -11846,7 +11846,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
11846
11846
|
|
11847
11847
|
block_iq1_s * y = vy;
|
11848
11848
|
|
11849
|
-
const
|
11849
|
+
const int64_t nbl = n/QK_K;
|
11850
11850
|
|
11851
11851
|
const int block_size = IQ1S_BLOCK_SIZE;
|
11852
11852
|
|
@@ -11980,7 +11980,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
11980
11980
|
}
|
11981
11981
|
}
|
11982
11982
|
|
11983
|
-
size_t quantize_iq1_s(const float * restrict src, void * restrict dst,
|
11983
|
+
size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
11984
11984
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11985
11985
|
float scales[QK_K/IQ1S_BLOCK_SIZE];
|
11986
11986
|
float weight[IQ1S_BLOCK_SIZE];
|
@@ -11990,9 +11990,9 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int nrow,
|
|
11990
11990
|
float pairs[2*IQ1S_BLOCK_SIZE];
|
11991
11991
|
uint16_t index[IQ1S_BLOCK_SIZE/8];
|
11992
11992
|
int8_t shifts[QK_K/IQ1S_BLOCK_SIZE];
|
11993
|
-
|
11993
|
+
int64_t nblock = n_per_row/QK_K;
|
11994
11994
|
char * qrow = (char *)dst;
|
11995
|
-
for (
|
11995
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
11996
11996
|
quantize_row_iq1_s_impl(src, qrow, n_per_row, quant_weights, scales, weight, sumx, sumw, pairs, L, index, shifts);
|
11997
11997
|
src += n_per_row;
|
11998
11998
|
qrow += nblock*sizeof(block_iq1_s);
|
@@ -12000,7 +12000,7 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int nrow,
|
|
12000
12000
|
return nrow * nblock * sizeof(block_iq1_s);
|
12001
12001
|
}
|
12002
12002
|
|
12003
|
-
static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy,
|
12003
|
+
static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
|
12004
12004
|
float * scales,
|
12005
12005
|
float * weight,
|
12006
12006
|
float * pairs,
|
@@ -12022,7 +12022,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
|
12022
12022
|
|
12023
12023
|
block_iq1_m * y = vy;
|
12024
12024
|
|
12025
|
-
const
|
12025
|
+
const int64_t nbl = n/QK_K;
|
12026
12026
|
|
12027
12027
|
const int block_size = IQ1M_BLOCK_SIZE;
|
12028
12028
|
|
@@ -12265,7 +12265,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
|
12265
12265
|
}
|
12266
12266
|
}
|
12267
12267
|
|
12268
|
-
size_t quantize_iq1_m(const float * restrict src, void * restrict dst,
|
12268
|
+
size_t quantize_iq1_m(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
12269
12269
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
12270
12270
|
float scales[QK_K/IQ1M_BLOCK_SIZE];
|
12271
12271
|
float weight[IQ1M_BLOCK_SIZE];
|
@@ -12273,9 +12273,9 @@ size_t quantize_iq1_m(const float * restrict src, void * restrict dst, int nrow,
|
|
12273
12273
|
float pairs[2*IQ1M_BLOCK_SIZE];
|
12274
12274
|
uint16_t index[IQ1M_BLOCK_SIZE/8];
|
12275
12275
|
int8_t shifts[QK_K/IQ1M_BLOCK_SIZE];
|
12276
|
-
|
12276
|
+
int64_t nblock = n_per_row/QK_K;
|
12277
12277
|
char * qrow = (char *)dst;
|
12278
|
-
for (
|
12278
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
12279
12279
|
quantize_row_iq1_m_impl(src, qrow, n_per_row, quant_weights, scales, weight, pairs, L, index, shifts);
|
12280
12280
|
src += n_per_row;
|
12281
12281
|
qrow += nblock*sizeof(block_iq1_m);
|
@@ -12407,16 +12407,16 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|
12407
12407
|
}
|
12408
12408
|
}
|
12409
12409
|
|
12410
|
-
size_t quantize_iq4_nl(const float * restrict src, void * restrict dst,
|
12410
|
+
size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
12411
12411
|
GGML_ASSERT(n_per_row%QK4_NL == 0);
|
12412
|
-
|
12412
|
+
int64_t nblock = n_per_row/QK4_NL;
|
12413
12413
|
char * qrow = (char *)dst;
|
12414
12414
|
uint8_t L[QK4_NL];
|
12415
12415
|
float weight[QK4_NL];
|
12416
12416
|
uint16_t unused_h;
|
12417
12417
|
uint8_t * unused_l = NULL;
|
12418
12418
|
float scale;
|
12419
|
-
for (
|
12419
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
12420
12420
|
block_iq4_nl * iq4 = (block_iq4_nl *)qrow;
|
12421
12421
|
for (int ibl = 0; ibl < nblock; ++ibl) {
|
12422
12422
|
const float * qw = quant_weights ? quant_weights + QK4_NL*ibl : NULL;
|
@@ -12429,9 +12429,9 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow
|
|
12429
12429
|
return nrow * nblock * sizeof(block_iq4_nl);
|
12430
12430
|
}
|
12431
12431
|
|
12432
|
-
void quantize_row_iq4_nl(const float * restrict x, void * restrict vy,
|
12432
|
+
void quantize_row_iq4_nl(const float * restrict x, void * restrict vy, int64_t k) {
|
12433
12433
|
GGML_ASSERT(k%QK4_NL == 0);
|
12434
|
-
|
12434
|
+
int64_t nblock = k/QK4_NL;
|
12435
12435
|
uint8_t L[QK4_NL];
|
12436
12436
|
float weight[QK4_NL];
|
12437
12437
|
uint16_t unused_h;
|
@@ -12444,22 +12444,22 @@ void quantize_row_iq4_nl(const float * restrict x, void * restrict vy, int k) {
|
|
12444
12444
|
}
|
12445
12445
|
}
|
12446
12446
|
|
12447
|
-
void quantize_row_iq4_nl_reference(const float * restrict x, block_iq4_nl * restrict y,
|
12447
|
+
void quantize_row_iq4_nl_reference(const float * restrict x, block_iq4_nl * restrict y, int64_t k) {
|
12448
12448
|
assert(k % QK4_NL == 0);
|
12449
12449
|
quantize_row_iq4_nl(x, y, k);
|
12450
12450
|
}
|
12451
12451
|
|
12452
|
-
size_t quantize_iq4_xs(const float * restrict src, void * restrict dst,
|
12452
|
+
size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
12453
12453
|
#if QK_K == 64
|
12454
12454
|
return quantize_iq4_nl(src, dst, nrow, n_per_row, quant_weights);
|
12455
12455
|
#else
|
12456
12456
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
12457
|
-
|
12457
|
+
int64_t nblock = n_per_row/QK_K;
|
12458
12458
|
char * qrow = (char *)dst;
|
12459
12459
|
uint8_t L[QK_K];
|
12460
12460
|
float weight[32];
|
12461
12461
|
float scales[QK_K/32];
|
12462
|
-
for (
|
12462
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
12463
12463
|
block_iq4_xs * iq4 = (block_iq4_xs *)qrow;
|
12464
12464
|
for (int ibl = 0; ibl < nblock; ++ibl) {
|
12465
12465
|
const float * qw = quant_weights ? quant_weights + QK_K*ibl : NULL;
|
@@ -12473,20 +12473,20 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int nrow
|
|
12473
12473
|
#endif
|
12474
12474
|
}
|
12475
12475
|
|
12476
|
-
void quantize_row_iq4_xs(const float * restrict x, void * restrict vy,
|
12476
|
+
void quantize_row_iq4_xs(const float * restrict x, void * restrict vy, int64_t k) {
|
12477
12477
|
assert(k % QK_K == 0);
|
12478
12478
|
block_iq4_xs * restrict y = vy;
|
12479
12479
|
quantize_row_iq4_xs_reference(x, y, k);
|
12480
12480
|
}
|
12481
12481
|
|
12482
|
-
void quantize_row_iq4_xs_reference(const float * restrict x, block_iq4_xs * restrict y,
|
12482
|
+
void quantize_row_iq4_xs_reference(const float * restrict x, block_iq4_xs * restrict y, int64_t k) {
|
12483
12483
|
assert(k % QK_K == 0);
|
12484
12484
|
quantize_iq4_xs(x, y, 1, k, NULL);
|
12485
12485
|
}
|
12486
12486
|
|
12487
12487
|
// =============================== 2.5625 bpw
|
12488
12488
|
|
12489
|
-
static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy,
|
12489
|
+
static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
|
12490
12490
|
|
12491
12491
|
const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
|
12492
12492
|
|
@@ -12501,7 +12501,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|
12501
12501
|
|
12502
12502
|
const int kMaxQ = 3;
|
12503
12503
|
|
12504
|
-
const
|
12504
|
+
const int64_t nbl = n/QK_K;
|
12505
12505
|
|
12506
12506
|
block_iq2_s * y = vy;
|
12507
12507
|
|
@@ -12654,11 +12654,11 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|
12654
12654
|
}
|
12655
12655
|
}
|
12656
12656
|
|
12657
|
-
size_t quantize_iq2_s(const float * restrict src, void * restrict dst,
|
12657
|
+
size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
12658
12658
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
12659
|
-
|
12659
|
+
int64_t nblock = n_per_row/QK_K;
|
12660
12660
|
char * qrow = (char *)dst;
|
12661
|
-
for (
|
12661
|
+
for (int64_t row = 0; row < nrow; ++row) {
|
12662
12662
|
quantize_row_iq2_s_impl(src, qrow, n_per_row, quant_weights);
|
12663
12663
|
src += n_per_row;
|
12664
12664
|
qrow += nblock*sizeof(block_iq2_s);
|
@@ -12666,12 +12666,12 @@ size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int nrow,
|
|
12666
12666
|
return nrow * nblock * sizeof(block_iq2_s);
|
12667
12667
|
}
|
12668
12668
|
|
12669
|
-
void quantize_row_iq2_s_reference(const float * restrict x, block_iq2_s * restrict y,
|
12669
|
+
void quantize_row_iq2_s_reference(const float * restrict x, block_iq2_s * restrict y, int64_t k) {
|
12670
12670
|
assert(k % QK_K == 0);
|
12671
12671
|
quantize_iq2_s(x, y, 1, k, NULL);
|
12672
12672
|
}
|
12673
12673
|
|
12674
|
-
void quantize_row_iq2_s(const float * restrict x, void * restrict vy,
|
12674
|
+
void quantize_row_iq2_s(const float * restrict x, void * restrict vy, int64_t k) {
|
12675
12675
|
assert(k % QK_K == 0);
|
12676
12676
|
block_iq2_s * restrict y = vy;
|
12677
12677
|
quantize_row_iq2_s_reference(x, y, k);
|