npm - whisper.rn - Versions diffs - 0.5.4 → 0.5.5 - Mend

whisper.rn 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/cpp/ggml-cpu/ggml-cpu.cpp CHANGED Viewed

@@ -583,6 +583,10 @@ static wsp_ggml_backend_feature * wsp_ggml_backend_cpu_get_features(wsp_ggml_bac
         if (wsp_ggml_cpu_has_riscv_v()) {
             features.push_back({ "RISCV_V", "1" });
         }
+        if (wsp_ggml_cpu_get_rvv_vlen() > 0) {
+            static std::string rvv_vlen = std::to_string(wsp_ggml_cpu_get_rvv_vlen());
+            features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
+        }
         if (wsp_ggml_cpu_has_vsx()) {
             features.push_back({ "VSX", "1" });
         }

package/cpp/ggml-cpu/ops.cpp CHANGED Viewed

@@ -6383,7 +6383,7 @@ static void wsp_ggml_compute_forward_im2col_3d_f16(
                                         const int64_t iih = ioh*s1 + ikh*d1 - p1;
                                         const int64_t iid = iod*s2 + ikd*d2 - p2;
-                                        if (iid < 0 || iid >= ID || iih < 0 || iih >= IH || iiw < 0 || iiw >= IW || iid < 0 || iid >= ID) {
+                                        if (iid < 0 || iid >= ID || iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
                                             dst_data[iic*KD_KH_KW + ikd * KH_KW + ikh*KW + ikw] = 0;
                                         } else {
                                             const float * const s = (const float *) ((const char *)src_data + iid*nb12 + iih*nb11 + iiw*nb10); // [ID, IH, IW]
@@ -6554,8 +6554,13 @@ static void wsp_ggml_call_mul_mat(wsp_ggml_type type, const wsp_ggml_compute_par
     wsp_ggml_compute_forward_mul_mat(params, &dst);
 }
+static inline int64_t wsp_ggml_wrap_around(int64_t coord, int64_t size) {
+    return (coord  + size) % size; // adding size avoids negative number weirdness
+}
 // wsp_ggml_compute_forward_conv_2d
 static void wsp_ggml_compute_forward_conv_2d_impl(const wsp_ggml_compute_params * params,
                                               const wsp_ggml_tensor *         kernel,  // [KW, KH, IC, OC]
                                               const wsp_ggml_tensor *         src,     // [W, H, C, N]
@@ -7420,6 +7425,65 @@ static void wsp_ggml_compute_forward_upscale_f32(
                 }
             }
         }
+    } else if (mode == WSP_GGML_SCALE_MODE_BILINEAR && (mode_flags & WSP_GGML_SCALE_FLAG_ANTIALIAS)) {
+        // Similar to F.interpolate(..., mode="bilinear", align_corners=False, antialias=True)
+        // https://github.com/pytorch/pytorch/blob/8871ff29b743948d1225389d5b7068f37b22750b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
+        auto triangle_filter = [](float x) -> float {
+            return std::max(1.0f - fabsf(x), 0.0f);
+        };
+        // support and invscale, minimum 1 pixel for bilinear
+        const float support1  = std::max(1.0f, 1.0f / sf1);
+        const float invscale1 = 1.0f / support1;
+        const float support0  = std::max(1.0f, 1.0f / sf0);
+        const float invscale0 = 1.0f / support0;
+        for (int64_t i3 = 0; i3 < ne3; i3++) {
+            const int64_t i03 = i3 / sf3;
+            for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
+                const int64_t i02 = i2 / sf2;
+                for (int64_t i1 = 0; i1 < ne1; i1++) {
+                    const float y = ((float) i1 + pixel_offset) / sf1;
+                    for (int64_t i0 = 0; i0 < ne0; i0++) {
+                        const float x = ((float) i0 + pixel_offset) / sf0;
+                        // the range of source pixels that contribute
+                        const int64_t x_min = std::max<int64_t>(x - support0 + pixel_offset, 0);
+                        const int64_t x_max = std::min<int64_t>(x + support0 + pixel_offset, ne00);
+                        const int64_t y_min = std::max<int64_t>(y - support1 + pixel_offset, 0);
+                        const int64_t y_max = std::min<int64_t>(y + support1 + pixel_offset, ne01);
+                        // bilinear filter with antialiasing
+                        float val = 0.0f;
+                        float total_weight = 0.0f;
+                        for (int64_t sy = y_min; sy < y_max; sy++) {
+                            const float weight_y = triangle_filter((sy - y + pixel_offset) * invscale1);
+                            for (int64_t sx = x_min; sx < x_max; sx++) {
+                                const float weight_x = triangle_filter((sx - x + pixel_offset) * invscale0);
+                                const float weight = weight_x * weight_y;
+                                if (weight <= 0.0f) {
+                                    continue;
+                                }
+                                const float pixel = *(const float *)((const char *)src0->data + sx*nb00 + sy*nb01 + i02*nb02 + i03*nb03);
+                                val += pixel * weight;
+                                total_weight += weight;
+                            }
+                        }
+                        if (total_weight > 0.0f) {
+                            val /= total_weight;
+                        }
+                        float * dst_ptr = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
+                        *dst_ptr = val;
+                    }
+                }
+            }
+        }
     } else if (mode == WSP_GGML_SCALE_MODE_BILINEAR) {
         for (int64_t i3 = 0; i3 < ne3; i3++) {
             const int64_t i03 = i3 / sf3;
@@ -7532,6 +7596,7 @@ void wsp_ggml_compute_forward_upscale(
 // wsp_ggml_compute_forward_pad
+template<bool circular_t>
 static void wsp_ggml_compute_forward_pad_f32(
     const wsp_ggml_compute_params * params,
           wsp_ggml_tensor * dst) {
@@ -7556,23 +7621,40 @@ static void wsp_ggml_compute_forward_pad_f32(
     const int32_t lp3 = wsp_ggml_get_op_params_i32(dst, 6);
     const int32_t rp3 = wsp_ggml_get_op_params_i32(dst, 7);
     // TODO: optimize
     for (int64_t i2 = 0; i2 < ne2; ++i2) {
         for (int64_t i1 = ith; i1 < ne1; i1 += nth) {
             for (int64_t i0 = 0; i0 < ne0; ++i0) {
                 for (int64_t i3 = 0; i3 < ne3; ++i3) {
-                    const int64_t dst_idx = i3*(ne0*ne1*ne2) + i2*(ne0*ne1) + i1*ne0 + i0;
-                    if ((i0 >= lp0 && i0 < ne0 - rp0) \
-                         && (i1 >= lp1 && i1 < ne1 - rp1) \
-                         && (i2 >= lp2 && i2 < ne2 - rp2) \
-                         && (i3 >= lp3 && i3 < ne3 - rp3)) {
-                        const int64_t src_idx = (i3 - lp3)*nb03 + (i2 - lp2)*nb02 + (i1 - lp1)*nb01 + (i0 - lp0)*nb00;
+                    // circular means wrap around on a torus, so x and y loop around
+                    if constexpr (circular_t) {
+                        const int64_t dst_idx = i3*(ne0*ne1*ne2) + i2*(ne0*ne1) + i1*ne0 + i0;
+                        const int64_t src_i0 = wsp_ggml_wrap_around(i0 - lp0, ne00);
+                        const int64_t src_i1 = wsp_ggml_wrap_around(i1 - lp1, ne01);
+                        const int64_t src_i2 = wsp_ggml_wrap_around(i2 - lp2, ne02);
+                        const int64_t src_i3 = wsp_ggml_wrap_around(i3 - lp3, ne03);
+                        const int64_t src_idx =
+                            src_i3*nb03 +
+                            src_i2*nb02 +
+                            src_i1*nb01 +
+                            src_i0*nb00;
                         const float * src_ptr = (const float *)((char *) src0->data + src_idx);
                         dst_ptr[dst_idx] = *src_ptr;
                     } else {
-                        dst_ptr[dst_idx] = 0;
+                        const int64_t dst_idx = i3*(ne0*ne1*ne2) + i2*(ne0*ne1) + i1*ne0 + i0;
+                        if ((i0 >= lp0 && i0 < ne0 - rp0) \
+                            && (i1 >= lp1 && i1 < ne1 - rp1) \
+                            && (i2 >= lp2 && i2 < ne2 - rp2) \
+                            && (i3 >= lp3 && i3 < ne3 - rp3)) {
+                            const int64_t src_idx = (i3 - lp3)*nb03 + (i2 - lp2)*nb02 + (i1 - lp1)*nb01 + (i0 - lp0)*nb00;
+                            const float * src_ptr = (const float *)((char *) src0->data + src_idx);
+                            dst_ptr[dst_idx] = *src_ptr;
+                        } else {
+                            dst_ptr[dst_idx] = 0;
+                        }
                     }
                 }
             }
@@ -7580,16 +7662,20 @@ static void wsp_ggml_compute_forward_pad_f32(
     }
 }
 void wsp_ggml_compute_forward_pad(
     const wsp_ggml_compute_params * params,
     wsp_ggml_tensor * dst) {
     const wsp_ggml_tensor * src0 = dst->src[0];
+    const bool circular = (bool) wsp_ggml_get_op_params_i32(dst, 8);
     switch (src0->type) {
         case WSP_GGML_TYPE_F32:
             {
-                wsp_ggml_compute_forward_pad_f32(params, dst);
+                if (circular) {
+                    wsp_ggml_compute_forward_pad_f32<true>(params, dst);
+                } else {
+                    wsp_ggml_compute_forward_pad_f32<false>(params, dst);
+                }
             } break;
         default:
             {
@@ -7794,7 +7880,7 @@ void wsp_ggml_compute_forward_timestep_embedding(
 // wsp_ggml_compute_forward_argsort
 template<enum wsp_ggml_sort_order order>
-struct argsort_cmp {
+struct cmp_argsort {
     const float * data;
     bool operator()(int32_t a, int32_t b) const {
         if constexpr (order == WSP_GGML_SORT_ORDER_ASC) {
@@ -7833,11 +7919,11 @@ static void wsp_ggml_compute_forward_argsort_f32(
         switch (order) {
             case WSP_GGML_SORT_ORDER_ASC:
-                std::sort(dst_data, dst_data + ne0, argsort_cmp<WSP_GGML_SORT_ORDER_ASC>{src_data});
+                std::sort(dst_data, dst_data + ne0, cmp_argsort<WSP_GGML_SORT_ORDER_ASC>{src_data});
                 break;
             case WSP_GGML_SORT_ORDER_DESC:
-                std::sort(dst_data, dst_data + ne0, argsort_cmp<WSP_GGML_SORT_ORDER_DESC>{src_data});
+                std::sort(dst_data, dst_data + ne0, cmp_argsort<WSP_GGML_SORT_ORDER_DESC>{src_data});
                 break;
             default:
@@ -7864,6 +7950,72 @@ void wsp_ggml_compute_forward_argsort(
     }
 }
+// wsp_ggml_compute_forward_top_k
+struct cmp_top_k {
+    const float * data;
+    bool operator()(int32_t a, int32_t b) const {
+        return data[a] > data[b];
+    }
+};
+static void wsp_ggml_compute_forward_top_k_f32(
+    const wsp_ggml_compute_params * params,
+    wsp_ggml_tensor * dst) {
+    const wsp_ggml_tensor * src0 = dst->src[0];
+    WSP_GGML_TENSOR_UNARY_OP_LOCALS
+    WSP_GGML_ASSERT(nb0 == sizeof(float));
+    const int ith = params->ith;
+    const int nth = params->nth;
+    const int64_t nr = wsp_ggml_nrows(src0);
+    const int top_k = ne0;
+    int32_t * tmp = (int32_t *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith;
+    for (int64_t i = ith; i < nr; i += nth) {
+        const float * src_data = (float *)((char *) src0->data + i*nb01);
+        for (int64_t j = 0; j < ne00; j++) {
+            tmp[j] = j;
+        }
+        std::partial_sort(tmp, tmp + top_k, tmp + ne00, cmp_top_k{src_data});
+        int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
+        std::copy(tmp, tmp + top_k, dst_data);
+        // emphasize that the order is not important
+        if (top_k > 1) {
+            std::swap(dst_data[0], dst_data[1]);
+        }
+    }
+}
+void wsp_ggml_compute_forward_top_k(
+    const wsp_ggml_compute_params * params,
+    wsp_ggml_tensor * dst) {
+    const wsp_ggml_tensor * src0 = dst->src[0];
+    switch (src0->type) {
+        case WSP_GGML_TYPE_F32:
+            {
+                wsp_ggml_compute_forward_top_k_f32(params, dst);
+            } break;
+        default:
+            {
+                WSP_GGML_ABORT("fatal error");
+            }
+    }
+}
 // wsp_ggml_compute_forward_flash_attn_ext
 static void wsp_ggml_compute_forward_flash_attn_ext_f16_one_chunk(
@@ -9696,13 +9848,13 @@ static void wsp_ggml_compute_forward_solve_tri_f32(const struct wsp_ggml_compute
         for (int64_t i00 = 0; i00 < n; ++i00) {
             float sum = 0.0f;
             for (int64_t t = 0; t < i00; ++t) {
-                sum += A_batch[i00 * n + t] * X_batch[i01 * n + t];
+                sum += A_batch[i00 * n + t] * X_batch[t * k + i01];
             }
             const float diag = A_batch[i00 * n + i00];
-            WSP_GGML_ASSERT(diag != 0.0f && "Zero diagonal in triangular matrix");
+            assert(diag != 0.0f && "Zero diagonal in triangular matrix");
-            X_batch[i01 * n + i00] = (B_batch[i00 * k + i01] - sum) / diag;
+            X_batch[i00 * k + i01] = (B_batch[i00 * k + i01] - sum) / diag;
         }
     }
 }

package/cpp/ggml-cpu/ops.h CHANGED Viewed

@@ -81,6 +81,7 @@ void wsp_ggml_compute_forward_roll(const struct wsp_ggml_compute_params * params
 void wsp_ggml_compute_forward_arange(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
 void wsp_ggml_compute_forward_timestep_embedding(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
 void wsp_ggml_compute_forward_argsort(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
+void wsp_ggml_compute_forward_top_k(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
 void wsp_ggml_compute_forward_leaky_relu(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
 void wsp_ggml_compute_forward_tri(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
 void wsp_ggml_compute_forward_fill(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);