npm - whisper.rn - Versions diffs - 0.4.0-rc.4 → 0.4.0-rc.6 - Mend

whisper.rn 0.4.0-rc.4 → 0.4.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +6 -6
package/android/build.gradle +4 -0
package/android/src/main/CMakeLists.txt +5 -0
package/android/src/main/java/com/rnwhisper/AudioUtils.java +0 -80
package/android/src/main/java/com/rnwhisper/WhisperContext.java +57 -134
package/android/src/main/jni-utils.h +76 -0
package/android/src/main/jni.cpp +188 -112
package/cpp/README.md +1 -1
package/cpp/coreml/whisper-encoder-impl.h +1 -1
package/cpp/coreml/whisper-encoder.h +4 -0
package/cpp/coreml/whisper-encoder.mm +4 -2
package/cpp/ggml-alloc.c +55 -19
package/cpp/ggml-alloc.h +8 -1
package/cpp/ggml-backend-impl.h +46 -21
package/cpp/ggml-backend.c +563 -156
package/cpp/ggml-backend.h +62 -17
package/cpp/ggml-impl.h +1 -1
package/cpp/ggml-metal-whisper.metal +2444 -359
package/cpp/ggml-metal.h +7 -1
package/cpp/ggml-metal.m +1105 -197
package/cpp/ggml-quants.c +66 -61
package/cpp/ggml-quants.h +40 -40
package/cpp/ggml.c +1040 -1590
package/cpp/ggml.h +109 -30
package/cpp/rn-audioutils.cpp +68 -0
package/cpp/rn-audioutils.h +14 -0
package/cpp/rn-whisper-log.h +11 -0
package/cpp/rn-whisper.cpp +143 -59
package/cpp/rn-whisper.h +48 -15
package/cpp/whisper.cpp +1635 -928
package/cpp/whisper.h +55 -10
package/ios/RNWhisper.mm +7 -7
package/ios/RNWhisperAudioUtils.h +0 -2
package/ios/RNWhisperAudioUtils.m +0 -56
package/ios/RNWhisperContext.h +3 -11
package/ios/RNWhisperContext.mm +68 -137
package/lib/commonjs/index.js.map +1 -1
package/lib/commonjs/version.json +1 -1
package/lib/module/index.js.map +1 -1
package/lib/module/version.json +1 -1
package/lib/typescript/index.d.ts +5 -0
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +6 -5
package/src/index.ts +5 -0
package/src/version.json +1 -1
package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -4
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -8
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +0 -19

package/cpp/ggml-quants.c CHANGED Viewed

@@ -19,7 +19,7 @@
 #ifdef __wasm_simd128__
 #include <wasm_simd128.h>
 #else
-#ifdef __POWER9_VECTOR__
+#if defined(__POWER9_VECTOR__) || defined(__powerpc64__)
 #include <altivec.h>
 #undef bool
 #define bool _Bool
@@ -425,7 +425,7 @@ static const uint64_t table_b2b_1[1 << 8] = { B8(10, 00) }; // (!b) << 4
 #endif
 // reference implementation for deterministic creation of model files
-void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) {
+void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) {
     static const int qk = QK4_0;
     assert(k % qk == 0);
@@ -462,11 +462,11 @@ void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict
     }
 }
-void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
-    quantize_row_q4_0_reference(x, y, k);
+void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
+    wsp_quantize_row_q4_0_reference(x, y, k);
 }
-void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) {
+void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) {
     const int qk = QK4_1;
     assert(k % qk == 0);
@@ -503,11 +503,11 @@ void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict
     }
 }
-void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
-    quantize_row_q4_1_reference(x, y, k);
+void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
+    wsp_quantize_row_q4_1_reference(x, y, k);
 }
-void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) {
+void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) {
     static const int qk = QK5_0;
     assert(k % qk == 0);
@@ -551,11 +551,11 @@ void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict
     }
 }
-void quantize_row_q5_0(const float * restrict x, void * restrict y, int k) {
-    quantize_row_q5_0_reference(x, y, k);
+void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k) {
+    wsp_quantize_row_q5_0_reference(x, y, k);
 }
-void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) {
+void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) {
     const int qk = QK5_1;
     assert(k % qk == 0);
@@ -599,12 +599,12 @@ void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict
     }
 }
-void quantize_row_q5_1(const float * restrict x, void * restrict y, int k) {
-    quantize_row_q5_1_reference(x, y, k);
+void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k) {
+    wsp_quantize_row_q5_1_reference(x, y, k);
 }
 // reference implementation for deterministic creation of model files
-void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) {
+void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) {
     assert(k % QK8_0 == 0);
     const int nb = k / QK8_0;
@@ -629,7 +629,7 @@ void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict
     }
 }
-void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
+void wsp_quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
     assert(QK8_0 == 32);
     assert(k % QK8_0 == 0);
     const int nb = k / QK8_0;
@@ -813,12 +813,12 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
 #else
     WSP_GGML_UNUSED(nb);
     // scalar
-    quantize_row_q8_0_reference(x, y, k);
+    wsp_quantize_row_q8_0_reference(x, y, k);
 #endif
 }
 // reference implementation for deterministic creation of model files
-void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) {
+void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) {
     assert(QK8_1 == 32);
     assert(k % QK8_1 == 0);
     const int nb = k / QK8_1;
@@ -853,7 +853,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
     }
 }
-void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
+void wsp_quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
     assert(k % QK8_1 == 0);
     const int nb = k / QK8_1;
@@ -1067,11 +1067,11 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
 #else
     WSP_GGML_UNUSED(nb);
     // scalar
-    quantize_row_q8_1_reference(x, y, k);
+    wsp_quantize_row_q8_1_reference(x, y, k);
 #endif
 }
-void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) {
     static const int qk = QK4_0;
     assert(k % qk == 0);
@@ -1091,7 +1091,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
     }
 }
-void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
     static const int qk = QK4_1;
     assert(k % qk == 0);
@@ -1112,7 +1112,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
     }
 }
-void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) {
     static const int qk = QK5_0;
     assert(k % qk == 0);
@@ -1138,7 +1138,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int
     }
 }
-void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) {
     static const int qk = QK5_1;
     assert(k % qk == 0);
@@ -1165,7 +1165,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int
     }
 }
-void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k) {
     static const int qk = QK8_0;
     assert(k % qk == 0);
@@ -1368,7 +1368,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
     float max = x[0];
     float sum_w = weights[0];
     float sum_x = sum_w * x[0];
+#ifdef HAVE_BUGGY_APPLE_LINKER
+    // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
+    for (volatile int i = 1; i < n; ++i) {
+#else
     for (int i = 1; i < n; ++i) {
+#endif
         if (x[i] < min) min = x[i];
         if (x[i] > max) max = x[i];
         float w = weights[i];
@@ -1450,7 +1455,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
 //========================- 2-bit (de)-quantization
-void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) {
+void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -1527,7 +1532,7 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
     }
 }
-void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -1573,23 +1578,23 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int
     }
 }
-void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
-    quantize_row_q2_K_reference(x, vy, k);
+void wsp_quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
+    wsp_quantize_row_q2_K_reference(x, vy, k);
 }
-size_t wsp_ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
+size_t wsp_ggml_wsp_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
     (void)hist; // TODO: collect histograms
     for (int j = 0; j < n; j += k) {
         block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
-        quantize_row_q2_K_reference(src + j, y, k);
+        wsp_quantize_row_q2_K_reference(src + j, y, k);
     }
     return (n/QK_K*sizeof(block_q2_K));
 }
 //========================= 3-bit (de)-quantization
-void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) {
+void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -1703,7 +1708,7 @@ void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict
 }
 #if QK_K == 256
-void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -1753,7 +1758,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
     }
 }
 #else
-void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
     assert(QK_K == 64);
     const int nb = k / QK_K;
@@ -1786,23 +1791,23 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
 }
 #endif
-void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
-    quantize_row_q3_K_reference(x, vy, k);
+void wsp_quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
+    wsp_quantize_row_q3_K_reference(x, vy, k);
 }
-size_t wsp_ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
+size_t wsp_ggml_wsp_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
     (void)hist; // TODO: collect histograms
     for (int j = 0; j < n; j += k) {
         block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
-        quantize_row_q3_K_reference(src + j, y, k);
+        wsp_quantize_row_q3_K_reference(src + j, y, k);
     }
     return (n/QK_K*sizeof(block_q3_K));
 }
 // ====================== 4-bit (de)-quantization
-void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) {
+void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -1909,7 +1914,7 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
     }
 }
-void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -1948,26 +1953,26 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int
     }
 }
-void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
+void wsp_quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
     assert(k % QK_K == 0);
     block_q4_K * restrict y = vy;
-    quantize_row_q4_K_reference(x, y, k);
+    wsp_quantize_row_q4_K_reference(x, y, k);
 }
-size_t wsp_ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
+size_t wsp_ggml_wsp_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
     assert(k % QK_K == 0);
     (void)hist; // TODO: collect histograms
     for (int j = 0; j < n; j += k) {
         block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
-        quantize_row_q4_K_reference(src + j, y, k);
+        wsp_quantize_row_q4_K_reference(src + j, y, k);
     }
     return (n/QK_K*sizeof(block_q4_K));
 }
 // ====================== 5-bit (de)-quantization
-void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) {
+void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -2109,7 +2114,7 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
     }
 }
-void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -2154,26 +2159,26 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int
     }
 }
-void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
+void wsp_quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
     assert(k % QK_K == 0);
     block_q5_K * restrict y = vy;
-    quantize_row_q5_K_reference(x, y, k);
+    wsp_quantize_row_q5_K_reference(x, y, k);
 }
-size_t wsp_ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
+size_t wsp_ggml_wsp_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
     assert(k % QK_K == 0);
     (void)hist; // TODO: collect histograms
     for (int j = 0; j < n; j += k) {
         block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
-        quantize_row_q5_K_reference(src + j, y, k);
+        wsp_quantize_row_q5_K_reference(src + j, y, k);
     }
     return (n/QK_K*sizeof(block_q5_K));
 }
 // ====================== 6-bit (de)-quantization
-void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) {
+void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -2255,7 +2260,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
     }
 }
-void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -2302,26 +2307,26 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int
     }
 }
-void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
+void wsp_quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
     assert(k % QK_K == 0);
     block_q6_K * restrict y = vy;
-    quantize_row_q6_K_reference(x, y, k);
+    wsp_quantize_row_q6_K_reference(x, y, k);
 }
-size_t wsp_ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
+size_t wsp_ggml_wsp_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
     assert(k % QK_K == 0);
     (void)hist; // TODO: collect histograms
     for (int j = 0; j < n; j += k) {
         block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
-        quantize_row_q6_K_reference(src + j, y, k);
+        wsp_quantize_row_q6_K_reference(src + j, y, k);
     }
     return (n/QK_K*sizeof(block_q6_K));
 }
 //===================================== Q8_K ==============================================
-void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) {
+void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -2358,7 +2363,7 @@ void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict
     }
 }
-void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) {
+void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
     const int nb = k / QK_K;
@@ -2369,8 +2374,8 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int
     }
 }
-void quantize_row_q8_K(const float * restrict x, void * restrict y, int k) {
-    quantize_row_q8_K_reference(x, y, k);
+void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k) {
+    wsp_quantize_row_q8_K_reference(x, y, k);
 }
 //===================================== Dot ptoducts =================================
@@ -3109,7 +3114,7 @@ void wsp_ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * re
     size_t vl = __riscv_vsetvl_e8m1(qk/2);
-    // These tempory registers are for masking and shift operations
+    // These temporary registers are for masking and shift operations
     vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
     vuint32m2_t vt_2 = __riscv_vsll_vv_u32m2(__riscv_vmv_v_x_u32m2(1, vl), vt_1, vl);
@@ -4752,7 +4757,7 @@ void wsp_ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * re
             vl = 16;
-            // retreive lane to multiply with scale
+            // retrieve lane to multiply with scale
             vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl);
             vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl);
             vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl);

package/cpp/ggml-quants.h CHANGED Viewed

@@ -167,48 +167,48 @@ static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_
 // Quantization
-void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
-void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
-void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
-void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
-void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
-void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
-void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
-void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
-void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
-void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
-void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
-void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
-void quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
-void quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
-void quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
-void quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
-void quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
-void quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
-void quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
-void quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
-void quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
-void quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
-void quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
-void quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
+void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
+void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
+void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
+void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
+void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
+void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
+void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
+void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
+void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
+void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
+void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
+void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
+void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
 // Dequantization
-void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
-void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
-void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
-void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
-void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
-//void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
-void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
-void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
-void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
-void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
-void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
-void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
+//void wsp_dewsp_quantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
+void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
 // Dot product
 void wsp_ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);