npm - whisper.rn - Versions diffs - 0.5.1 → 0.5.3 - Mend

whisper.rn 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/cpp/ggml-cpu/ggml-cpu.c CHANGED Viewed

@@ -689,8 +689,13 @@ bool wsp_ggml_is_numa(void) {
 #endif
 static void wsp_ggml_init_arm_arch_features(void) {
-#if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
+#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
+#if defined(__linux__)
     wsp_ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
+#else
+    // TODO: add support of SVE for non-linux systems
+#error "TODO: SVE is not supported on this platform. To use SVE, sve_cnt needs to be initialized here."
+#endif
 #endif
 }
@@ -1608,13 +1613,8 @@ static void wsp_ggml_compute_forward_mul_mat_id(
             chunk_size = 64;
         }
-#if defined(__aarch64__)
-        // disable for ARM
-        const bool disable_chunking = true;
-#else
         // disable for NUMA
         const bool disable_chunking = wsp_ggml_is_numa();
-#endif // defined(__aarch64__)
         int64_t nchunk0 = (nr0 + chunk_size - 1) / chunk_size;
         int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
@@ -1731,6 +1731,10 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
             {
                 wsp_ggml_compute_forward_sum_rows(params, tensor);
             } break;
+        case WSP_GGML_OP_CUMSUM:
+            {
+                wsp_ggml_compute_forward_cumsum(params, tensor);
+            } break;
         case WSP_GGML_OP_MEAN:
             {
                 wsp_ggml_compute_forward_mean(params, tensor);
@@ -1807,22 +1811,6 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
             {
                 wsp_ggml_compute_forward_cont(params, tensor);
             } break;
-        case WSP_GGML_OP_RESHAPE:
-            {
-                wsp_ggml_compute_forward_reshape(params, tensor);
-            } break;
-        case WSP_GGML_OP_VIEW:
-            {
-                wsp_ggml_compute_forward_view(params, tensor);
-            } break;
-        case WSP_GGML_OP_PERMUTE:
-            {
-                wsp_ggml_compute_forward_permute(params, tensor);
-            } break;
-        case WSP_GGML_OP_TRANSPOSE:
-            {
-                wsp_ggml_compute_forward_transpose(params, tensor);
-            } break;
         case WSP_GGML_OP_GET_ROWS:
             {
                 wsp_ggml_compute_forward_get_rows(params, tensor);
@@ -1943,6 +1931,14 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
             {
                 wsp_ggml_compute_forward_leaky_relu(params, tensor);
             } break;
+        case WSP_GGML_OP_TRI:
+            {
+                wsp_ggml_compute_forward_tri(params, tensor);
+            } break;
+        case WSP_GGML_OP_FILL:
+            {
+                wsp_ggml_compute_forward_fill(params, tensor);
+            } break;
         case WSP_GGML_OP_FLASH_ATTN_EXT:
             {
                 wsp_ggml_compute_forward_flash_attn_ext(params, tensor);
@@ -1998,6 +1994,10 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
             {
                 wsp_ggml_compute_forward_rwkv_wkv7(params, tensor);
             } break;
+        case WSP_GGML_OP_SOLVE_TRI:
+            {
+                wsp_ggml_compute_forward_solve_tri(params, tensor);
+            } break;
         case WSP_GGML_OP_MAP_CUSTOM1:
             {
                 wsp_ggml_compute_forward_map_custom1(params, tensor);
@@ -2042,6 +2042,22 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
             {
                 // nop
             } break;
+        case WSP_GGML_OP_RESHAPE:
+            {
+                // nop
+            } break;
+        case WSP_GGML_OP_PERMUTE:
+            {
+                // nop
+            } break;
+        case WSP_GGML_OP_VIEW:
+            {
+                // nop
+            } break;
+        case WSP_GGML_OP_TRANSPOSE:
+            {
+                // nop
+            } break;
         case WSP_GGML_OP_COUNT:
             {
                 WSP_GGML_ABORT("fatal error");
@@ -2140,6 +2156,9 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
         case WSP_GGML_OP_ADD_ID:
         case WSP_GGML_OP_ADD1:
         case WSP_GGML_OP_ACC:
+        case WSP_GGML_OP_CUMSUM:
+        case WSP_GGML_OP_TRI:
+        case WSP_GGML_OP_FILL:
             {
                 n_tasks = n_threads;
             } break;
@@ -2157,6 +2176,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
                 n_tasks = 1;
             } break;
         case WSP_GGML_OP_COUNT_EQUAL:
+        case WSP_GGML_OP_SOLVE_TRI:
             {
                 n_tasks = n_threads;
             } break;
@@ -2179,6 +2199,12 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
                 case WSP_GGML_UNARY_OP_HARDSWISH:
                 case WSP_GGML_UNARY_OP_HARDSIGMOID:
                 case WSP_GGML_UNARY_OP_EXP:
+                case WSP_GGML_UNARY_OP_SOFTPLUS:
+                case WSP_GGML_UNARY_OP_EXPM1:
+                case WSP_GGML_UNARY_OP_FLOOR:
+                case WSP_GGML_UNARY_OP_CEIL:
+                case WSP_GGML_UNARY_OP_ROUND:
+                case WSP_GGML_UNARY_OP_TRUNC:
                     {
                         n_tasks = 1;
                     } break;
@@ -2187,6 +2213,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
                 case WSP_GGML_UNARY_OP_GELU_ERF:
                 case WSP_GGML_UNARY_OP_GELU_QUICK:
                 case WSP_GGML_UNARY_OP_SILU:
+                case WSP_GGML_UNARY_OP_XIELU:
                     {
                         n_tasks = n_threads;
                     } break;
@@ -2879,6 +2906,11 @@ static thread_ret_t wsp_ggml_graph_compute_thread(void * data) {
     for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
         struct wsp_ggml_tensor * node = cgraph->nodes[node_n];
+        if (wsp_ggml_op_is_empty(node->op)) {
+            // skip NOPs
+            continue;
+        }
         wsp_ggml_compute_forward(&params, node);
         if (state->ith == 0 && cplan->abort_callback &&
@@ -3264,6 +3296,13 @@ void wsp_ggml_cpu_fp16_to_fp32(const wsp_ggml_fp16_t * x, float * y, int64_t n)
         __m128 y_vec = _mm_cvtph_ps(x_vec);
         _mm_storeu_ps(y + i, y_vec);
     }
+#elif defined(__riscv_zvfh)
+    for (int vl; i < n; i += vl) {
+        vl = __riscv_vsetvl_e16m1(n - i);
+        vfloat16m1_t vx = __riscv_vle16_v_f16m1((_Float16 *)&x[i], vl);
+        vfloat32m2_t vy = __riscv_vfwcvt_f_f_v_f32m2(vx, vl);
+        __riscv_vse32_v_f32m2(&y[i], vy, vl);
+    }
 #endif
     for (; i < n; ++i) {
@@ -3557,13 +3596,17 @@ void wsp_ggml_cpu_init(void) {
 #ifdef WSP_GGML_USE_OPENMP
             //if (!getenv("OMP_WAIT_POLICY")) {
             //    // set the wait policy to active, so that OpenMP threads don't sleep
-            //    putenv("OMP_WAIT_POLICY=active");
+            //    setenv("OMP_WAIT_POLICY", "active", 0)
             //}
             if (!getenv("KMP_BLOCKTIME")) {
                 // set the time to wait before sleeping a thread
                 // this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases
-                putenv("KMP_BLOCKTIME=200"); // 200ms
+#ifdef _WIN32
+                _putenv_s("KMP_BLOCKTIME", "200"); // 200ms
+#else
+                setenv("KMP_BLOCKTIME", "200", 0); // 200ms
+#endif
             }
 #endif
         }