npm - whisper.rn - Versions diffs - 0.5.3 → 0.5.5 - Mend

whisper.rn 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/cpp/ggml-cpu/arch/x86/repack.cpp CHANGED Viewed

@@ -646,7 +646,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * WSP_GGML_RESTRICT s, siz
     __m256i requiredOrder = _mm256_set_epi32(3, 2, 1, 0, 7, 6, 5, 4);
     int64_t xstart = 0;
     int anr = nr - nr%16; // Used to align nr with boundary of 16
-#ifdef __AVX512F__
+#if defined(__AVX512BW__) && defined(__AVX512DQ__)
     int anc = nc - nc%16; // Used to align nc with boundary of 16
                           // Mask to mask out nibbles from packed bytes expanded to 512 bit length
     const __m512i m4bexpanded = _mm512_set1_epi8(0x0F);
@@ -1041,7 +1041,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * WSP_GGML_RESTRICT s, siz
         xstart = anc/8;
         y = 0;
     }
-#endif // __AVX512F__
+#endif // __AVX512BW__ && __AVX512DQ__
     // Take group of four block_q8_0x4 structures at each pass of the loop and perform dot product operation
@@ -1989,7 +1989,7 @@ void wsp_ggml_gemm_q4_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
     __m256i requiredOrder = _mm256_set_epi32(3, 2, 1, 0, 7, 6, 5, 4);
     int64_t xstart = 0;
     int anr = nr - nr % 16;; // Used to align nr with boundary of 16
-#ifdef __AVX512F__
+#if defined(__AVX512BW__) && defined(__AVX512DQ__)
     int anc = nc - nc % 16; // Used to align nc with boundary of 16
     // Mask to mask out nibbles from packed bytes expanded to 512 bit length
     const __m512i m4bexpanded = _mm512_set1_epi8(0x0F);
@@ -2727,7 +2727,7 @@ void wsp_ggml_gemm_q4_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
         xstart = anc/8;
         y = 0;
     }
-#endif //AVX512F
+#endif // __AVX512BW__ && __AVX512DQ__
     // Take group of four block_q8_Kx4 structures at each pass of the loop and perform dot product operation
     for (; y < anr / 4; y += 4) {
@@ -3467,7 +3467,7 @@ void wsp_ggml_gemm_q2_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
     __m256i scalesmask2 = _mm256_castsi128_si256(scalesmask2_sse);
     scalesmask2 = _mm256_permute2f128_si256(scalesmask2, scalesmask2, 0);
-#ifdef __AVX512F__
+#if defined(__AVX512BW__) && defined(__AVX512DQ__)
     int anc = nc - nc % 16; // Used to align nc with boundary of 16
@@ -4947,7 +4947,7 @@ void wsp_ggml_gemm_q2_K_8x8_q8_K(int n, float * WSP_GGML_RESTRICT s, size_t bs,
         y = 0;
     }
-#endif //AVX512F
+#endif // __AVX512BW__ && __AVX512DQ__
     // Take group of four block_q8_Kx4 structures at each pass of the loop and perform dot product operation
     for (; y < anr / 4; y += 4) {

package/cpp/ggml-cpu/arch-fallback.h CHANGED Viewed

@@ -33,39 +33,52 @@
 // repack.cpp
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
 #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
 #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
 #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
+#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
 #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
 #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
 #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
 #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
+#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
 #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
 #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
 #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
 #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
 // repack.cpp
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
-#define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
 #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
 #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
-#define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
 #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
 #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
 #elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
 // repack.cpp
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
 #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
+#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
 #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
 #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
+#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
 #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
 #elif defined(__POWERPC__) || defined(__powerpc__)
 // ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679
 // quants.c
@@ -76,21 +89,28 @@
 // repack.cpp
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
 #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
 #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
 #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
+#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
 #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
 #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
 #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
 #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
+#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
 #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
 #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
 #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
 #elif defined(__loongarch64)
 // quants.c
 #define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
@@ -101,21 +121,28 @@
 // repack.cpp
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
 #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
 #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
 #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
+#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
 #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
 #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
 #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
 #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
+#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
 #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
 #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
 #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
 #elif defined(__riscv)
 // quants.c
 #define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
@@ -134,19 +161,26 @@
 // repack.cpp
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
 #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
 #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
+#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
 #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
 #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
 #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
 #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
+#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
 #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
 #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
 #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
 #elif defined(__s390x__)
 // quants.c
 #define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
@@ -163,21 +197,28 @@
 // repack.cpp
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
 #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
 #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
 #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
+#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
 #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
 #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
 #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
 #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
+#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
 #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
 #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
 #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
 #elif defined(__wasm__)
 // quants.c
 #define wsp_ggml_vec_dot_q4_1_q8_1_generic wsp_ggml_vec_dot_q4_1_q8_1
@@ -196,19 +237,26 @@
 // repack.cpp
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
+#define wsp_ggml_wsp_quantize_mat_q8_K_4x4_generic wsp_ggml_wsp_quantize_mat_q8_K_4x4
 #define wsp_ggml_wsp_quantize_mat_q8_K_4x8_generic wsp_ggml_wsp_quantize_mat_q8_K_4x8
 #define wsp_ggml_gemv_q4_0_4x4_q8_0_generic wsp_ggml_gemv_q4_0_4x4_q8_0
 #define wsp_ggml_gemv_q4_0_4x8_q8_0_generic wsp_ggml_gemv_q4_0_4x8_q8_0
 #define wsp_ggml_gemv_q4_0_8x8_q8_0_generic wsp_ggml_gemv_q4_0_8x8_q8_0
+#define wsp_ggml_gemv_q4_K_8x4_q8_K_generic wsp_ggml_gemv_q4_K_8x4_q8_K
 #define wsp_ggml_gemv_q4_K_8x8_q8_K_generic wsp_ggml_gemv_q4_K_8x8_q8_K
 #define wsp_ggml_gemv_q2_K_8x8_q8_K_generic wsp_ggml_gemv_q2_K_8x8_q8_K
 #define wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic wsp_ggml_gemv_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemv_iq4_nl_8x8_q8_0_generic wsp_ggml_gemv_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemv_q8_0_4x4_q8_0_generic wsp_ggml_gemv_q8_0_4x4_q8_0
+#define wsp_ggml_gemv_q8_0_4x8_q8_0_generic wsp_ggml_gemv_q8_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_4x4_q8_0_generic wsp_ggml_gemm_q4_0_4x4_q8_0
 #define wsp_ggml_gemm_q4_0_4x8_q8_0_generic wsp_ggml_gemm_q4_0_4x8_q8_0
 #define wsp_ggml_gemm_q4_0_8x8_q8_0_generic wsp_ggml_gemm_q4_0_8x8_q8_0
+#define wsp_ggml_gemm_q4_K_8x4_q8_K_generic wsp_ggml_gemm_q4_K_8x4_q8_K
 #define wsp_ggml_gemm_q4_K_8x8_q8_K_generic wsp_ggml_gemm_q4_K_8x8_q8_K
 #define wsp_ggml_gemm_q2_K_8x8_q8_K_generic wsp_ggml_gemm_q2_K_8x8_q8_K
 #define wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic wsp_ggml_gemm_iq4_nl_4x4_q8_0
 #define wsp_ggml_gemm_iq4_nl_8x8_q8_0_generic wsp_ggml_gemm_iq4_nl_8x8_q8_0
+#define wsp_ggml_gemm_q8_0_4x4_q8_0_generic wsp_ggml_gemm_q8_0_4x4_q8_0
+#define wsp_ggml_gemm_q8_0_4x8_q8_0_generic wsp_ggml_gemm_q8_0_4x8_q8_0
 #endif

package/cpp/ggml-cpu/ggml-cpu-impl.h CHANGED Viewed

@@ -328,7 +328,7 @@ inline static int32x4_t wsp_ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <intrin.h>
-#elif defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
+#elif defined(__SSE__) || defined(__SSE3__) || defined(__SSSE3__) || defined(__AVX__) || defined(__F16C__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX512BF16__)
 #include <immintrin.h>
 #endif

package/cpp/ggml-cpu/ggml-cpu.c CHANGED Viewed

@@ -81,6 +81,11 @@ struct wsp_ggml_arm_arch_features_type {
 } wsp_ggml_arm_arch_features = { 0 };
 #endif
+#if defined(__riscv)
+struct wsp_ggml_riscv_arch_features_type {
+    int rvv_vlen;
+} wsp_ggml_riscv_arch_features = { 0 };
+#endif
 #if defined(_WIN32)
@@ -187,6 +192,9 @@ typedef void * thread_ret_t;
 typedef pthread_t wsp_ggml_thread_t;
+#define WSP_GGML_THREADPOOL_N_THREADS_MASK (0xffffU)
+#define WSP_GGML_THREADPOOL_N_THREADS_BITS (16)
 #if defined(__APPLE__)
 #include <unistd.h>
 #include <mach/mach.h>
@@ -449,7 +457,7 @@ struct wsp_ggml_threadpool {
     struct wsp_ggml_cplan  * cplan;
     // synchronization primitives
-    atomic_int n_graph;       // incremented when there is work to be done (i.e each graph)
+    atomic_int n_graph;       // updated when there is work to be done (i.e each graph) holds graph and active thread counts.
     atomic_int WSP_GGML_CACHE_ALIGN n_barrier;
     atomic_int WSP_GGML_CACHE_ALIGN n_barrier_passed;
     atomic_int WSP_GGML_CACHE_ALIGN current_chunk; // currently processing chunk during Mat_Mul, shared between all the threads.
@@ -457,12 +465,10 @@ struct wsp_ggml_threadpool {
     // these are atomic as an annotation for thread-sanitizer
     atomic_bool stop;         // Used for stopping the threadpool altogether
     atomic_bool pause;        // Used for pausing the threadpool or individual threads
-    atomic_int abort;         // Used for aborting processing of a graph
+    atomic_int  abort;        // Used for aborting processing of a graph
     struct wsp_ggml_compute_state * workers;   // per thread state
-    int          n_threads_max; // number of threads in the pool
-    atomic_int   n_threads_cur; // number of threads used in the current graph
+    int          n_threads;   // Number of threads in the pool
     int32_t      prio;        // Scheduling priority
     uint32_t     poll;        // Polling level (0 - no polling)
@@ -490,6 +496,15 @@ static inline void wsp_ggml_thread_cpu_relax(void) {
 static inline void wsp_ggml_thread_cpu_relax(void) {
     _mm_pause();
 }
+#elif defined(__riscv)
+static inline void wsp_ggml_thread_cpu_relax(void) {
+    #ifdef __riscv_zihintpause
+        __asm__ __volatile__ ("pause");
+    #else
+        /* Encoding of the pause instruction */
+        __asm__ __volatile__ (".4byte 0x100000F");
+    #endif
+}
 #else
 static inline void wsp_ggml_thread_cpu_relax(void) {;}
 #endif
@@ -530,7 +545,7 @@ struct wsp_ggml_state {
 static struct wsp_ggml_state g_state = {0};
 void wsp_ggml_barrier(struct wsp_ggml_threadpool * tp) {
-    int n_threads = atomic_load_explicit(&tp->n_threads_cur, memory_order_relaxed);
+    int n_threads = atomic_load_explicit(&tp->n_graph, memory_order_relaxed) & WSP_GGML_THREADPOOL_N_THREADS_MASK;
     if (n_threads == 1) {
         return;
     }
@@ -547,7 +562,7 @@ void wsp_ggml_barrier(struct wsp_ggml_threadpool * tp) {
         // last thread
         atomic_store_explicit(&tp->n_barrier, 0, memory_order_relaxed);
-        // exit barrier (fill seq-cst fence)
+        // exit barrier (full seq-cst fence)
         atomic_fetch_add_explicit(&tp->n_barrier_passed, 1, memory_order_seq_cst);
         return;
     }
@@ -683,24 +698,25 @@ bool wsp_ggml_is_numa(void) {
 }
 #if defined(__ARM_ARCH)
-#if defined(__linux__) && defined(__aarch64__)
-#include <sys/auxv.h>
-#endif
-static void wsp_ggml_init_arm_arch_features(void) {
 #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
-#if defined(__linux__)
-    wsp_ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
+#include <arm_sve.h>
+static void wsp_ggml_init_arm_arch_features(void) {
+    wsp_ggml_arm_arch_features.sve_cnt = svcntb();
+}
 #else
-    // TODO: add support of SVE for non-linux systems
-#error "TODO: SVE is not supported on this platform. To use SVE, sve_cnt needs to be initialized here."
-#endif
+static void wsp_ggml_init_arm_arch_features(void) {}
 #endif
-}
 #endif // __ARM_ARCH
+#if defined(__riscv) && defined(__riscv_v_intrinsic)
+#include <riscv_vector.h>
+static void wsp_ggml_init_riscv_arch_features(void) {
+    wsp_ggml_riscv_arch_features.rvv_vlen = __riscv_vlenb();
+}
+#else
+static void wsp_ggml_init_riscv_arch_features(void) {}
+#endif
 struct wsp_ggml_tensor * wsp_ggml_new_i32(struct wsp_ggml_context * ctx, int32_t value) {
     WSP_GGML_ASSERT(!wsp_ggml_get_no_alloc(ctx));
@@ -1927,6 +1943,10 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
             {
                 wsp_ggml_compute_forward_argsort(params, tensor);
             } break;
+        case WSP_GGML_OP_TOP_K:
+            {
+                wsp_ggml_compute_forward_top_k(params, tensor);
+            } break;
         case WSP_GGML_OP_LEAKY_RELU:
             {
                 wsp_ggml_compute_forward_leaky_relu(params, tensor);
@@ -2311,6 +2331,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
         case WSP_GGML_OP_ARANGE:
         case WSP_GGML_OP_TIMESTEP_EMBEDDING:
         case WSP_GGML_OP_ARGSORT:
+        case WSP_GGML_OP_TOP_K:
         case WSP_GGML_OP_FLASH_ATTN_EXT:
         case WSP_GGML_OP_FLASH_ATTN_BACK:
         case WSP_GGML_OP_SSM_CONV:
@@ -2622,7 +2643,7 @@ static void wsp_ggml_thread_cpumask_next(const bool * global_mask, bool * local_
 void wsp_ggml_threadpool_free(struct wsp_ggml_threadpool* threadpool) {
     if (!threadpool) return;
-    const int n_threads = threadpool->n_threads_max;
+    const int n_threads = threadpool->n_threads;
 #ifndef WSP_GGML_USE_OPENMP
     struct wsp_ggml_compute_state* workers = threadpool->workers;
@@ -2698,9 +2719,14 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
         //WSP_GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
     }
     if (n_threads <= 0) {
-        n_threads = threadpool ? threadpool->n_threads_max : WSP_GGML_DEFAULT_N_THREADS;
+        n_threads = threadpool ? threadpool->n_threads : WSP_GGML_DEFAULT_N_THREADS;
     }
+#if defined(__EMSCRIPTEN__) && !defined(__EMSCRIPTEN_PTHREADS__)
+    // Emscripten without pthreads support can only use a single thread
+    n_threads = 1;
+#endif
     size_t work_size = 0;
     struct wsp_ggml_cplan cplan;
@@ -2834,6 +2860,10 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
                         cur += sizeof(wsp_ggml_fp16_t)*ne00*ne01*ne02*ne03;
                         cur += sizeof(wsp_ggml_fp16_t)*ne10*ne11*ne12;
                     } break;
+                case WSP_GGML_OP_TOP_K:
+                    {
+                        cur += sizeof(int32_t)*node->src[0]->ne[0]*n_tasks;
+                    } break;
                 case WSP_GGML_OP_FLASH_ATTN_EXT:
                     {
                         const int64_t ne10 = node->src[1]->ne[0]; // DK
@@ -2897,12 +2927,14 @@ static thread_ret_t wsp_ggml_graph_compute_thread(void * data) {
     struct wsp_ggml_compute_params params = {
         /*.ith       =*/ state->ith,
-        /*.nth       =*/ atomic_load_explicit(&tp->n_threads_cur, memory_order_relaxed),
+        /*.nth       =*/ atomic_load_explicit(&tp->n_graph, memory_order_relaxed) & WSP_GGML_THREADPOOL_N_THREADS_MASK,
         /*.wsize     =*/ cplan->work_size,
         /*.wdata     =*/ cplan->work_data,
         /*.threadpool=*/ tp,
     };
+    WSP_GGML_PRINT_DEBUG("thread #%d compute-start cplan %p last-graph %d \n", state->ith, cplan, state->last_graph);
     for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
         struct wsp_ggml_tensor * node = cgraph->nodes[node_n];
@@ -2924,6 +2956,8 @@ static thread_ret_t wsp_ggml_graph_compute_thread(void * data) {
         }
     }
+    WSP_GGML_PRINT_DEBUG("thread #%d compute-done cplan %p last-graph %d \n", state->ith, cplan, state->last_graph);
     wsp_ggml_barrier(state->threadpool);
     return 0;
@@ -2931,27 +2965,23 @@ static thread_ret_t wsp_ggml_graph_compute_thread(void * data) {
 #ifndef WSP_GGML_USE_OPENMP
-// check if thread is active
-static inline bool wsp_ggml_graph_compute_thread_active(struct wsp_ggml_compute_state * state) {
-    struct wsp_ggml_threadpool * threadpool = state->threadpool;
-    int n_threads = atomic_load_explicit(&threadpool->n_threads_cur, memory_order_relaxed);
-    return (state->ith < n_threads);
-}
 // check if thread is ready to proceed (exit from polling or sleeping)
+// returns true if loops should exit, sets state->pending to indicate new work
 static inline bool wsp_ggml_graph_compute_thread_ready(struct wsp_ggml_compute_state * state) {
     struct wsp_ggml_threadpool * threadpool = state->threadpool;
     if (state->pending || threadpool->stop || threadpool->pause) { return true; }
     // check for new graph/work
-    int new_graph = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed);
-    if (new_graph != state->last_graph) {
-        state->pending    = wsp_ggml_graph_compute_thread_active(state);
-        state->last_graph = new_graph;
+    int n_graph   = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed);
+    int n_threads = n_graph & WSP_GGML_THREADPOOL_N_THREADS_MASK;
+    if (n_graph != state->last_graph) {
+        state->pending    = (state->ith < n_threads);
+        state->last_graph = n_graph;
+        return true;
     }
-    return state->pending;
+    return false;
 }
 // sync thread state after polling
@@ -2968,11 +2998,6 @@ static inline void wsp_ggml_graph_compute_thread_sync(struct wsp_ggml_compute_st
 static inline bool wsp_ggml_graph_compute_poll_for_work(struct wsp_ggml_compute_state * state) {
     struct wsp_ggml_threadpool * threadpool = state->threadpool;
-    // Skip polling for unused threads
-    if (!wsp_ggml_graph_compute_thread_active(state)) {
-        return state->pending;
-    }
     // This seems to make 0 ... 100 a decent range for polling level across modern processors.
     // Perhaps, we can adjust it dynamically based on load and things.
     const uint64_t n_rounds = 1024UL * 128 * threadpool->poll;
@@ -3034,7 +3059,6 @@ static thread_ret_t wsp_ggml_graph_compute_secondary_thread(void* data) {
         wsp_ggml_graph_compute_check_for_work(state);
         if (state->pending) {
             state->pending = false;
             wsp_ggml_graph_compute_thread(state);
         }
     }
@@ -3049,14 +3073,15 @@ static void wsp_ggml_graph_compute_kickoff(struct wsp_ggml_threadpool * threadpo
     wsp_ggml_mutex_lock(&threadpool->mutex);
-    WSP_GGML_PRINT_DEBUG("threadpool: n_threads_cur %d n_threads %d\n", threadpool->n_threads_cur, n_threads);
+    // Update the number of active threads and the graph count
+    int n_graph = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed) >> WSP_GGML_THREADPOOL_N_THREADS_BITS;
+    n_graph = ((n_graph + 1) << WSP_GGML_THREADPOOL_N_THREADS_BITS) | (n_threads & WSP_GGML_THREADPOOL_N_THREADS_MASK);
-    // Update the number of active threads
-    atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
+    WSP_GGML_PRINT_DEBUG("compute-kickoff: n_threads %d n_graph %d\n", n_threads, n_graph);
     // Indicate the graph is ready to be processed
     // We need the full seq-cst fence here because of the polling threads (used in thread_sync)
-    atomic_fetch_add_explicit(&threadpool->n_graph, 1, memory_order_seq_cst);
+    atomic_store_explicit(&threadpool->n_graph, n_graph, memory_order_seq_cst);
     if (threadpool->pause) {
        // Update main thread prio and affinity to match the threadpool settings
@@ -3094,8 +3119,7 @@ static struct wsp_ggml_threadpool * wsp_ggml_threadpool_new_impl(
         threadpool->pause            = tpp->paused;
         threadpool->abort            = -1;
         threadpool->workers          = NULL;
-        threadpool->n_threads_max    = tpp->n_threads;
-        threadpool->n_threads_cur    = tpp->n_threads;
+        threadpool->n_threads        = tpp->n_threads;
         threadpool->poll             = tpp->poll;
         threadpool->prio             = tpp->prio;
         threadpool->ec               = WSP_GGML_STATUS_SUCCESS;
@@ -3190,7 +3214,7 @@ enum wsp_ggml_status wsp_ggml_graph_compute(struct wsp_ggml_cgraph * cgraph, str
             {
                 // update the number of threads from the actual number of threads that we got from OpenMP
                 n_threads = omp_get_num_threads();
-                atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
+                atomic_store_explicit(&threadpool->n_graph, n_threads, memory_order_relaxed);
             }
             // Apply thread CPU mask and priority
@@ -3203,13 +3227,13 @@ enum wsp_ggml_status wsp_ggml_graph_compute(struct wsp_ggml_cgraph * cgraph, str
             wsp_ggml_graph_compute_thread(&threadpool->workers[ith]);
         }
     } else {
-        atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);
+        atomic_store_explicit(&threadpool->n_graph, 1, memory_order_relaxed);
         wsp_ggml_graph_compute_thread(&threadpool->workers[0]);
     }
 #else
-    if (n_threads > threadpool->n_threads_max) {
-        WSP_GGML_LOG_WARN("cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->n_threads_max);
-        n_threads = threadpool->n_threads_max;
+    if (n_threads > threadpool->n_threads) {
+        WSP_GGML_LOG_WARN("cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->n_threads);
+        n_threads = threadpool->n_threads;
     }
     // Kick all threads to start the new graph
@@ -3296,13 +3320,33 @@ void wsp_ggml_cpu_fp16_to_fp32(const wsp_ggml_fp16_t * x, float * y, int64_t n)
         __m128 y_vec = _mm_cvtph_ps(x_vec);
         _mm_storeu_ps(y + i, y_vec);
     }
-#elif defined(__riscv_zvfh)
-    for (int vl; i < n; i += vl) {
-        vl = __riscv_vsetvl_e16m1(n - i);
-        vfloat16m1_t vx = __riscv_vle16_v_f16m1((_Float16 *)&x[i], vl);
-        vfloat32m2_t vy = __riscv_vfwcvt_f_f_v_f32m2(vx, vl);
-        __riscv_vse32_v_f32m2(&y[i], vy, vl);
+#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfhmin)
+    // calculate step size
+    const int epr = __riscv_vsetvlmax_e16m2();
+    const int step = epr * 2;
+    const int np = (n & ~(step - 1));
+    // unroll by 2
+    for (; i < np; i += step) {
+        vfloat16m2_t ax0 = __riscv_vle16_v_f16m2((const _Float16*)x + i, epr);
+        vfloat32m4_t ay0 = __riscv_vfwcvt_f_f_v_f32m4(ax0, epr);
+        __riscv_vse32_v_f32m4(y + i, ay0, epr);
+        vfloat16m2_t ax1 = __riscv_vle16_v_f16m2((const _Float16*)x + i + epr, epr);
+        vfloat32m4_t ay1 = __riscv_vfwcvt_f_f_v_f32m4(ax1, epr);
+        __riscv_vse32_v_f32m4(y + i + epr, ay1, epr);
     }
+    // leftovers
+    int vl;
+    for (i = np; i < n; i += vl) {
+        vl = __riscv_vsetvl_e16m2(n - i);
+        vfloat16m2_t ax0 = __riscv_vle16_v_f16m2((const _Float16*)x + i, vl);
+        vfloat32m4_t ay0 = __riscv_vfwcvt_f_f_v_f32m4(ax0, vl);
+        __riscv_vse32_v_f32m4(y + i, ay0, vl);
+    }
 #endif
     for (; i < n; ++i) {
@@ -3347,6 +3391,31 @@ void wsp_ggml_cpu_bf16_to_fp32(const wsp_ggml_bf16_t * x, float * y, int64_t n)
                                         (const __m128i *)(x + i))),
                                 16)));
     }
+#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfbfmin)
+    // calculate step size
+    const int epr = __riscv_vsetvlmax_e16m2();
+    const int step = epr * 2;
+    const int np = (n & ~(step - 1));
+    // unroll by 2
+    for (; i < np; i += step) {
+        vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16*)x + i, epr);
+        vfloat32m4_t ay0 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax0, epr);
+        __riscv_vse32_v_f32m4(y + i, ay0, epr);
+        vbfloat16m2_t ax1 = __riscv_vle16_v_bf16m2((const __bf16*)x + i + epr, epr);
+        vfloat32m4_t ay1 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax1, epr);
+        __riscv_vse32_v_f32m4(y + i + epr, ay1, epr);
+    }
+    // leftovers
+    int vl;
+    for (i = np; i < n; i += vl) {
+        vl = __riscv_vsetvl_e16m2(n - i);
+        vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16*)x + i, vl);
+        vfloat32m4_t ay0 = __riscv_vfwcvtbf16_f_f_v_f32m4(ax0, vl);
+        __riscv_vse32_v_f32m4(y + i, ay0, vl);
+    }
 #endif
     for (; i < n; i++) {
         y[i] = WSP_GGML_BF16_TO_FP32(x[i]);
@@ -3449,6 +3518,14 @@ int wsp_ggml_cpu_has_riscv_v(void) {
 #endif
 }
+int wsp_ggml_cpu_get_rvv_vlen(void) {
+#if defined(__riscv) && defined(__riscv_v_intrinsic)
+    return wsp_ggml_riscv_arch_features.rvv_vlen;
+#else
+    return 0;
+#endif
+}
 int wsp_ggml_cpu_has_f16c(void) {
 #if defined(__F16C__)
     return 1;
@@ -3615,6 +3692,10 @@ void wsp_ggml_cpu_init(void) {
         wsp_ggml_init_arm_arch_features();
 #endif
+#if defined(__riscv)
+        wsp_ggml_init_riscv_arch_features();
+#endif
         is_first_call = false;
     }