whisper.rn 0.4.0-rc.4 → 0.4.0-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/android/build.gradle +4 -0
- package/android/src/main/CMakeLists.txt +5 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +0 -80
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +51 -133
- package/android/src/main/jni-utils.h +76 -0
- package/android/src/main/jni.cpp +187 -112
- package/cpp/README.md +1 -1
- package/cpp/coreml/whisper-encoder-impl.h +1 -1
- package/cpp/coreml/whisper-encoder.h +4 -0
- package/cpp/coreml/whisper-encoder.mm +4 -2
- package/cpp/ggml-alloc.c +55 -19
- package/cpp/ggml-alloc.h +7 -0
- package/cpp/ggml-backend-impl.h +46 -21
- package/cpp/ggml-backend.c +563 -156
- package/cpp/ggml-backend.h +62 -17
- package/cpp/ggml-impl.h +1 -1
- package/cpp/ggml-metal-whisper.metal +1010 -253
- package/cpp/ggml-metal.h +7 -1
- package/cpp/ggml-metal.m +618 -187
- package/cpp/ggml-quants.c +64 -59
- package/cpp/ggml-quants.h +40 -40
- package/cpp/ggml.c +751 -1466
- package/cpp/ggml.h +90 -25
- package/cpp/rn-audioutils.cpp +68 -0
- package/cpp/rn-audioutils.h +14 -0
- package/cpp/rn-whisper-log.h +11 -0
- package/cpp/rn-whisper.cpp +141 -59
- package/cpp/rn-whisper.h +47 -15
- package/cpp/whisper.cpp +1635 -928
- package/cpp/whisper.h +55 -10
- package/ios/RNWhisper.mm +7 -7
- package/ios/RNWhisperAudioUtils.h +0 -2
- package/ios/RNWhisperAudioUtils.m +0 -56
- package/ios/RNWhisperContext.h +3 -11
- package/ios/RNWhisperContext.mm +62 -134
- package/lib/commonjs/version.json +1 -1
- package/lib/module/version.json +1 -1
- package/package.json +6 -5
- package/src/version.json +1 -1
package/cpp/ggml-quants.c
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#ifdef __wasm_simd128__
|
|
20
20
|
#include <wasm_simd128.h>
|
|
21
21
|
#else
|
|
22
|
-
#
|
|
22
|
+
#if defined(__POWER9_VECTOR__) || defined(__powerpc64__)
|
|
23
23
|
#include <altivec.h>
|
|
24
24
|
#undef bool
|
|
25
25
|
#define bool _Bool
|
|
@@ -425,7 +425,7 @@ static const uint64_t table_b2b_1[1 << 8] = { B8(10, 00) }; // (!b) << 4
|
|
|
425
425
|
#endif
|
|
426
426
|
|
|
427
427
|
// reference implementation for deterministic creation of model files
|
|
428
|
-
void
|
|
428
|
+
void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) {
|
|
429
429
|
static const int qk = QK4_0;
|
|
430
430
|
|
|
431
431
|
assert(k % qk == 0);
|
|
@@ -462,11 +462,11 @@ void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict
|
|
|
462
462
|
}
|
|
463
463
|
}
|
|
464
464
|
|
|
465
|
-
void
|
|
466
|
-
|
|
465
|
+
void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
|
|
466
|
+
wsp_quantize_row_q4_0_reference(x, y, k);
|
|
467
467
|
}
|
|
468
468
|
|
|
469
|
-
void
|
|
469
|
+
void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) {
|
|
470
470
|
const int qk = QK4_1;
|
|
471
471
|
|
|
472
472
|
assert(k % qk == 0);
|
|
@@ -503,11 +503,11 @@ void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict
|
|
|
503
503
|
}
|
|
504
504
|
}
|
|
505
505
|
|
|
506
|
-
void
|
|
507
|
-
|
|
506
|
+
void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
|
|
507
|
+
wsp_quantize_row_q4_1_reference(x, y, k);
|
|
508
508
|
}
|
|
509
509
|
|
|
510
|
-
void
|
|
510
|
+
void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) {
|
|
511
511
|
static const int qk = QK5_0;
|
|
512
512
|
|
|
513
513
|
assert(k % qk == 0);
|
|
@@ -551,11 +551,11 @@ void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict
|
|
|
551
551
|
}
|
|
552
552
|
}
|
|
553
553
|
|
|
554
|
-
void
|
|
555
|
-
|
|
554
|
+
void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k) {
|
|
555
|
+
wsp_quantize_row_q5_0_reference(x, y, k);
|
|
556
556
|
}
|
|
557
557
|
|
|
558
|
-
void
|
|
558
|
+
void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) {
|
|
559
559
|
const int qk = QK5_1;
|
|
560
560
|
|
|
561
561
|
assert(k % qk == 0);
|
|
@@ -599,12 +599,12 @@ void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict
|
|
|
599
599
|
}
|
|
600
600
|
}
|
|
601
601
|
|
|
602
|
-
void
|
|
603
|
-
|
|
602
|
+
void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k) {
|
|
603
|
+
wsp_quantize_row_q5_1_reference(x, y, k);
|
|
604
604
|
}
|
|
605
605
|
|
|
606
606
|
// reference implementation for deterministic creation of model files
|
|
607
|
-
void
|
|
607
|
+
void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) {
|
|
608
608
|
assert(k % QK8_0 == 0);
|
|
609
609
|
const int nb = k / QK8_0;
|
|
610
610
|
|
|
@@ -629,7 +629,7 @@ void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict
|
|
|
629
629
|
}
|
|
630
630
|
}
|
|
631
631
|
|
|
632
|
-
void
|
|
632
|
+
void wsp_quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
|
|
633
633
|
assert(QK8_0 == 32);
|
|
634
634
|
assert(k % QK8_0 == 0);
|
|
635
635
|
const int nb = k / QK8_0;
|
|
@@ -813,12 +813,12 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
|
|
|
813
813
|
#else
|
|
814
814
|
WSP_GGML_UNUSED(nb);
|
|
815
815
|
// scalar
|
|
816
|
-
|
|
816
|
+
wsp_quantize_row_q8_0_reference(x, y, k);
|
|
817
817
|
#endif
|
|
818
818
|
}
|
|
819
819
|
|
|
820
820
|
// reference implementation for deterministic creation of model files
|
|
821
|
-
void
|
|
821
|
+
void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) {
|
|
822
822
|
assert(QK8_1 == 32);
|
|
823
823
|
assert(k % QK8_1 == 0);
|
|
824
824
|
const int nb = k / QK8_1;
|
|
@@ -853,7 +853,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
|
|
|
853
853
|
}
|
|
854
854
|
}
|
|
855
855
|
|
|
856
|
-
void
|
|
856
|
+
void wsp_quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
857
857
|
assert(k % QK8_1 == 0);
|
|
858
858
|
const int nb = k / QK8_1;
|
|
859
859
|
|
|
@@ -1067,11 +1067,11 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
|
1067
1067
|
#else
|
|
1068
1068
|
WSP_GGML_UNUSED(nb);
|
|
1069
1069
|
// scalar
|
|
1070
|
-
|
|
1070
|
+
wsp_quantize_row_q8_1_reference(x, y, k);
|
|
1071
1071
|
#endif
|
|
1072
1072
|
}
|
|
1073
1073
|
|
|
1074
|
-
void
|
|
1074
|
+
void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) {
|
|
1075
1075
|
static const int qk = QK4_0;
|
|
1076
1076
|
|
|
1077
1077
|
assert(k % qk == 0);
|
|
@@ -1091,7 +1091,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
|
|
|
1091
1091
|
}
|
|
1092
1092
|
}
|
|
1093
1093
|
|
|
1094
|
-
void
|
|
1094
|
+
void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
|
|
1095
1095
|
static const int qk = QK4_1;
|
|
1096
1096
|
|
|
1097
1097
|
assert(k % qk == 0);
|
|
@@ -1112,7 +1112,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
|
|
|
1112
1112
|
}
|
|
1113
1113
|
}
|
|
1114
1114
|
|
|
1115
|
-
void
|
|
1115
|
+
void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) {
|
|
1116
1116
|
static const int qk = QK5_0;
|
|
1117
1117
|
|
|
1118
1118
|
assert(k % qk == 0);
|
|
@@ -1138,7 +1138,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int
|
|
|
1138
1138
|
}
|
|
1139
1139
|
}
|
|
1140
1140
|
|
|
1141
|
-
void
|
|
1141
|
+
void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) {
|
|
1142
1142
|
static const int qk = QK5_1;
|
|
1143
1143
|
|
|
1144
1144
|
assert(k % qk == 0);
|
|
@@ -1165,7 +1165,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int
|
|
|
1165
1165
|
}
|
|
1166
1166
|
}
|
|
1167
1167
|
|
|
1168
|
-
void
|
|
1168
|
+
void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k) {
|
|
1169
1169
|
static const int qk = QK8_0;
|
|
1170
1170
|
|
|
1171
1171
|
assert(k % qk == 0);
|
|
@@ -1368,7 +1368,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
|
1368
1368
|
float max = x[0];
|
|
1369
1369
|
float sum_w = weights[0];
|
|
1370
1370
|
float sum_x = sum_w * x[0];
|
|
1371
|
+
#ifdef HAVE_BUGGY_APPLE_LINKER
|
|
1372
|
+
// use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
|
|
1373
|
+
for (volatile int i = 1; i < n; ++i) {
|
|
1374
|
+
#else
|
|
1371
1375
|
for (int i = 1; i < n; ++i) {
|
|
1376
|
+
#endif
|
|
1372
1377
|
if (x[i] < min) min = x[i];
|
|
1373
1378
|
if (x[i] > max) max = x[i];
|
|
1374
1379
|
float w = weights[i];
|
|
@@ -1450,7 +1455,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
|
|
|
1450
1455
|
|
|
1451
1456
|
//========================- 2-bit (de)-quantization
|
|
1452
1457
|
|
|
1453
|
-
void
|
|
1458
|
+
void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) {
|
|
1454
1459
|
assert(k % QK_K == 0);
|
|
1455
1460
|
const int nb = k / QK_K;
|
|
1456
1461
|
|
|
@@ -1527,7 +1532,7 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
|
|
|
1527
1532
|
}
|
|
1528
1533
|
}
|
|
1529
1534
|
|
|
1530
|
-
void
|
|
1535
|
+
void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) {
|
|
1531
1536
|
assert(k % QK_K == 0);
|
|
1532
1537
|
const int nb = k / QK_K;
|
|
1533
1538
|
|
|
@@ -1573,23 +1578,23 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int
|
|
|
1573
1578
|
}
|
|
1574
1579
|
}
|
|
1575
1580
|
|
|
1576
|
-
void
|
|
1577
|
-
|
|
1581
|
+
void wsp_quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
|
|
1582
|
+
wsp_quantize_row_q2_K_reference(x, vy, k);
|
|
1578
1583
|
}
|
|
1579
1584
|
|
|
1580
|
-
size_t
|
|
1585
|
+
size_t wsp_ggml_wsp_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
|
1581
1586
|
(void)hist; // TODO: collect histograms
|
|
1582
1587
|
|
|
1583
1588
|
for (int j = 0; j < n; j += k) {
|
|
1584
1589
|
block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
|
|
1585
|
-
|
|
1590
|
+
wsp_quantize_row_q2_K_reference(src + j, y, k);
|
|
1586
1591
|
}
|
|
1587
1592
|
return (n/QK_K*sizeof(block_q2_K));
|
|
1588
1593
|
}
|
|
1589
1594
|
|
|
1590
1595
|
//========================= 3-bit (de)-quantization
|
|
1591
1596
|
|
|
1592
|
-
void
|
|
1597
|
+
void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) {
|
|
1593
1598
|
assert(k % QK_K == 0);
|
|
1594
1599
|
const int nb = k / QK_K;
|
|
1595
1600
|
|
|
@@ -1703,7 +1708,7 @@ void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict
|
|
|
1703
1708
|
}
|
|
1704
1709
|
|
|
1705
1710
|
#if QK_K == 256
|
|
1706
|
-
void
|
|
1711
|
+
void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
|
|
1707
1712
|
assert(k % QK_K == 0);
|
|
1708
1713
|
const int nb = k / QK_K;
|
|
1709
1714
|
|
|
@@ -1753,7 +1758,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
|
|
|
1753
1758
|
}
|
|
1754
1759
|
}
|
|
1755
1760
|
#else
|
|
1756
|
-
void
|
|
1761
|
+
void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
|
|
1757
1762
|
assert(k % QK_K == 0);
|
|
1758
1763
|
assert(QK_K == 64);
|
|
1759
1764
|
const int nb = k / QK_K;
|
|
@@ -1786,23 +1791,23 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
|
|
|
1786
1791
|
}
|
|
1787
1792
|
#endif
|
|
1788
1793
|
|
|
1789
|
-
void
|
|
1790
|
-
|
|
1794
|
+
void wsp_quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
|
|
1795
|
+
wsp_quantize_row_q3_K_reference(x, vy, k);
|
|
1791
1796
|
}
|
|
1792
1797
|
|
|
1793
|
-
size_t
|
|
1798
|
+
size_t wsp_ggml_wsp_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
|
1794
1799
|
(void)hist; // TODO: collect histograms
|
|
1795
1800
|
|
|
1796
1801
|
for (int j = 0; j < n; j += k) {
|
|
1797
1802
|
block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
|
|
1798
|
-
|
|
1803
|
+
wsp_quantize_row_q3_K_reference(src + j, y, k);
|
|
1799
1804
|
}
|
|
1800
1805
|
return (n/QK_K*sizeof(block_q3_K));
|
|
1801
1806
|
}
|
|
1802
1807
|
|
|
1803
1808
|
// ====================== 4-bit (de)-quantization
|
|
1804
1809
|
|
|
1805
|
-
void
|
|
1810
|
+
void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) {
|
|
1806
1811
|
assert(k % QK_K == 0);
|
|
1807
1812
|
const int nb = k / QK_K;
|
|
1808
1813
|
|
|
@@ -1909,7 +1914,7 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
|
|
|
1909
1914
|
}
|
|
1910
1915
|
}
|
|
1911
1916
|
|
|
1912
|
-
void
|
|
1917
|
+
void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) {
|
|
1913
1918
|
assert(k % QK_K == 0);
|
|
1914
1919
|
const int nb = k / QK_K;
|
|
1915
1920
|
|
|
@@ -1948,26 +1953,26 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int
|
|
|
1948
1953
|
}
|
|
1949
1954
|
}
|
|
1950
1955
|
|
|
1951
|
-
void
|
|
1956
|
+
void wsp_quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
|
|
1952
1957
|
assert(k % QK_K == 0);
|
|
1953
1958
|
block_q4_K * restrict y = vy;
|
|
1954
|
-
|
|
1959
|
+
wsp_quantize_row_q4_K_reference(x, y, k);
|
|
1955
1960
|
}
|
|
1956
1961
|
|
|
1957
|
-
size_t
|
|
1962
|
+
size_t wsp_ggml_wsp_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
|
1958
1963
|
assert(k % QK_K == 0);
|
|
1959
1964
|
(void)hist; // TODO: collect histograms
|
|
1960
1965
|
|
|
1961
1966
|
for (int j = 0; j < n; j += k) {
|
|
1962
1967
|
block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
|
|
1963
|
-
|
|
1968
|
+
wsp_quantize_row_q4_K_reference(src + j, y, k);
|
|
1964
1969
|
}
|
|
1965
1970
|
return (n/QK_K*sizeof(block_q4_K));
|
|
1966
1971
|
}
|
|
1967
1972
|
|
|
1968
1973
|
// ====================== 5-bit (de)-quantization
|
|
1969
1974
|
|
|
1970
|
-
void
|
|
1975
|
+
void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) {
|
|
1971
1976
|
assert(k % QK_K == 0);
|
|
1972
1977
|
const int nb = k / QK_K;
|
|
1973
1978
|
|
|
@@ -2109,7 +2114,7 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
|
|
|
2109
2114
|
}
|
|
2110
2115
|
}
|
|
2111
2116
|
|
|
2112
|
-
void
|
|
2117
|
+
void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) {
|
|
2113
2118
|
assert(k % QK_K == 0);
|
|
2114
2119
|
const int nb = k / QK_K;
|
|
2115
2120
|
|
|
@@ -2154,26 +2159,26 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int
|
|
|
2154
2159
|
}
|
|
2155
2160
|
}
|
|
2156
2161
|
|
|
2157
|
-
void
|
|
2162
|
+
void wsp_quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
|
|
2158
2163
|
assert(k % QK_K == 0);
|
|
2159
2164
|
block_q5_K * restrict y = vy;
|
|
2160
|
-
|
|
2165
|
+
wsp_quantize_row_q5_K_reference(x, y, k);
|
|
2161
2166
|
}
|
|
2162
2167
|
|
|
2163
|
-
size_t
|
|
2168
|
+
size_t wsp_ggml_wsp_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
|
2164
2169
|
assert(k % QK_K == 0);
|
|
2165
2170
|
(void)hist; // TODO: collect histograms
|
|
2166
2171
|
|
|
2167
2172
|
for (int j = 0; j < n; j += k) {
|
|
2168
2173
|
block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
|
|
2169
|
-
|
|
2174
|
+
wsp_quantize_row_q5_K_reference(src + j, y, k);
|
|
2170
2175
|
}
|
|
2171
2176
|
return (n/QK_K*sizeof(block_q5_K));
|
|
2172
2177
|
}
|
|
2173
2178
|
|
|
2174
2179
|
// ====================== 6-bit (de)-quantization
|
|
2175
2180
|
|
|
2176
|
-
void
|
|
2181
|
+
void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) {
|
|
2177
2182
|
assert(k % QK_K == 0);
|
|
2178
2183
|
const int nb = k / QK_K;
|
|
2179
2184
|
|
|
@@ -2255,7 +2260,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
|
|
|
2255
2260
|
}
|
|
2256
2261
|
}
|
|
2257
2262
|
|
|
2258
|
-
void
|
|
2263
|
+
void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) {
|
|
2259
2264
|
assert(k % QK_K == 0);
|
|
2260
2265
|
const int nb = k / QK_K;
|
|
2261
2266
|
|
|
@@ -2302,26 +2307,26 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int
|
|
|
2302
2307
|
}
|
|
2303
2308
|
}
|
|
2304
2309
|
|
|
2305
|
-
void
|
|
2310
|
+
void wsp_quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
|
|
2306
2311
|
assert(k % QK_K == 0);
|
|
2307
2312
|
block_q6_K * restrict y = vy;
|
|
2308
|
-
|
|
2313
|
+
wsp_quantize_row_q6_K_reference(x, y, k);
|
|
2309
2314
|
}
|
|
2310
2315
|
|
|
2311
|
-
size_t
|
|
2316
|
+
size_t wsp_ggml_wsp_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
|
|
2312
2317
|
assert(k % QK_K == 0);
|
|
2313
2318
|
(void)hist; // TODO: collect histograms
|
|
2314
2319
|
|
|
2315
2320
|
for (int j = 0; j < n; j += k) {
|
|
2316
2321
|
block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
|
|
2317
|
-
|
|
2322
|
+
wsp_quantize_row_q6_K_reference(src + j, y, k);
|
|
2318
2323
|
}
|
|
2319
2324
|
return (n/QK_K*sizeof(block_q6_K));
|
|
2320
2325
|
}
|
|
2321
2326
|
|
|
2322
2327
|
//===================================== Q8_K ==============================================
|
|
2323
2328
|
|
|
2324
|
-
void
|
|
2329
|
+
void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) {
|
|
2325
2330
|
assert(k % QK_K == 0);
|
|
2326
2331
|
const int nb = k / QK_K;
|
|
2327
2332
|
|
|
@@ -2358,7 +2363,7 @@ void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict
|
|
|
2358
2363
|
}
|
|
2359
2364
|
}
|
|
2360
2365
|
|
|
2361
|
-
void
|
|
2366
|
+
void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) {
|
|
2362
2367
|
assert(k % QK_K == 0);
|
|
2363
2368
|
const int nb = k / QK_K;
|
|
2364
2369
|
|
|
@@ -2369,8 +2374,8 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int
|
|
|
2369
2374
|
}
|
|
2370
2375
|
}
|
|
2371
2376
|
|
|
2372
|
-
void
|
|
2373
|
-
|
|
2377
|
+
void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k) {
|
|
2378
|
+
wsp_quantize_row_q8_K_reference(x, y, k);
|
|
2374
2379
|
}
|
|
2375
2380
|
|
|
2376
2381
|
//===================================== Dot ptoducts =================================
|
package/cpp/ggml-quants.h
CHANGED
|
@@ -167,48 +167,48 @@ static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_
|
|
|
167
167
|
|
|
168
168
|
|
|
169
169
|
// Quantization
|
|
170
|
-
void
|
|
171
|
-
void
|
|
172
|
-
void
|
|
173
|
-
void
|
|
174
|
-
void
|
|
175
|
-
void
|
|
176
|
-
|
|
177
|
-
void
|
|
178
|
-
void
|
|
179
|
-
void
|
|
180
|
-
void
|
|
181
|
-
void
|
|
182
|
-
void
|
|
183
|
-
|
|
184
|
-
void
|
|
185
|
-
void
|
|
186
|
-
void
|
|
187
|
-
void
|
|
188
|
-
void
|
|
189
|
-
void
|
|
190
|
-
|
|
191
|
-
void
|
|
192
|
-
void
|
|
193
|
-
void
|
|
194
|
-
void
|
|
195
|
-
void
|
|
196
|
-
void
|
|
170
|
+
void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
|
|
171
|
+
void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
|
|
172
|
+
void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
|
|
173
|
+
void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
|
|
174
|
+
void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
|
|
175
|
+
void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
|
|
176
|
+
|
|
177
|
+
void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
|
|
178
|
+
void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
|
|
179
|
+
void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
|
|
180
|
+
void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
|
|
181
|
+
void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
|
|
182
|
+
void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
|
|
183
|
+
|
|
184
|
+
void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
|
|
185
|
+
void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
|
|
186
|
+
void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
|
|
187
|
+
void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
|
|
188
|
+
void wsp_quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
|
|
189
|
+
void wsp_quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
|
|
190
|
+
|
|
191
|
+
void wsp_quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
|
|
192
|
+
void wsp_quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
|
|
193
|
+
void wsp_quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
|
|
194
|
+
void wsp_quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
|
|
195
|
+
void wsp_quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
|
|
196
|
+
void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
|
|
197
197
|
|
|
198
198
|
// Dequantization
|
|
199
|
-
void
|
|
200
|
-
void
|
|
201
|
-
void
|
|
202
|
-
void
|
|
203
|
-
void
|
|
204
|
-
//void
|
|
205
|
-
|
|
206
|
-
void
|
|
207
|
-
void
|
|
208
|
-
void
|
|
209
|
-
void
|
|
210
|
-
void
|
|
211
|
-
void
|
|
199
|
+
void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
|
|
200
|
+
void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
|
|
201
|
+
void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
|
|
202
|
+
void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
|
|
203
|
+
void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
|
|
204
|
+
//void wsp_dewsp_quantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
|
|
205
|
+
|
|
206
|
+
void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
|
|
207
|
+
void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
|
|
208
|
+
void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
|
|
209
|
+
void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
|
|
210
|
+
void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
|
|
211
|
+
void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
|
|
212
212
|
|
|
213
213
|
// Dot product
|
|
214
214
|
void wsp_ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|