npm - whisper.rn - Versions diffs - 0.5.0 → 0.5.2 - Mend

whisper.rn 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/cpp/ggml.c CHANGED Viewed

@@ -982,7 +982,9 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "CONV_TRANSPOSE_1D",
     "IM2COL",
     "IM2COL_BACK",
+    "IM2COL_3D",
     "CONV_2D",
+    "CONV_3D",
     "CONV_2D_DW",
     "CONV_TRANSPOSE_2D",
     "POOL_1D",
@@ -1025,7 +1027,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "GLU",
 };
-static_assert(WSP_GGML_OP_COUNT == 88, "WSP_GGML_OP_COUNT != 88");
+static_assert(WSP_GGML_OP_COUNT == 90, "WSP_GGML_OP_COUNT != 90");
 static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "none",
@@ -1084,7 +1086,9 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "conv_transpose_1d(x)",
     "im2col(x)",
     "im2col_back(x)",
+    "im2col_3d(x)",
     "conv_2d(x)",
+    "conv_3d(x)",
     "conv_2d_dw(x)",
     "conv_transpose_2d(x)",
     "pool_1d(x)",
@@ -1127,7 +1131,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "glu(x)",
 };
-static_assert(WSP_GGML_OP_COUNT == 88, "WSP_GGML_OP_COUNT != 88");
+static_assert(WSP_GGML_OP_COUNT == 90, "WSP_GGML_OP_COUNT != 90");
 static_assert(WSP_GGML_OP_POOL_COUNT == 2, "WSP_GGML_OP_POOL_COUNT != 2");
@@ -1147,10 +1151,14 @@ static const char * WSP_GGML_UNARY_OP_NAME[WSP_GGML_UNARY_OP_COUNT] = {
     "HARDSIGMOID",
     "EXP",
     "GELU_ERF",
+    "XIELU",
+    "FLOOR",
+    "CEIL",
+    "ROUND",
+    "TRUNC",
 };
-static_assert(WSP_GGML_UNARY_OP_COUNT == 15, "WSP_GGML_UNARY_OP_COUNT != 15");
+static_assert(WSP_GGML_UNARY_OP_COUNT == 20, "WSP_GGML_UNARY_OP_COUNT != 20");
 static const char * WSP_GGML_GLU_OP_NAME[WSP_GGML_GLU_OP_COUNT] = {
     "REGLU",
@@ -2656,6 +2664,29 @@ struct wsp_ggml_tensor * wsp_ggml_silu_inplace(
     return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_SILU);
 }
+// wsp_ggml_xielu
+struct wsp_ggml_tensor * wsp_ggml_xielu(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        float alpha_n,
+        float alpha_p,
+        float beta,
+        float eps) {
+    struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
+    wsp_ggml_set_op_params_i32(result, 0, (int32_t) WSP_GGML_UNARY_OP_XIELU);
+    wsp_ggml_set_op_params_f32(result, 1, beta + wsp_ggml_softplus(alpha_n));
+    wsp_ggml_set_op_params_f32(result, 2, wsp_ggml_softplus(alpha_p));
+    wsp_ggml_set_op_params_f32(result, 3, beta);
+    wsp_ggml_set_op_params_f32(result, 4, eps);
+    result->op     = WSP_GGML_OP_UNARY;
+    result->src[0] = a;
+    return result;
+}
 // wsp_ggml_silu_back
 struct wsp_ggml_tensor * wsp_ggml_silu_back(
@@ -2730,6 +2761,62 @@ static struct wsp_ggml_tensor * wsp_ggml_glu_impl(
     return result;
 }
+// wsp_ggml_floor
+struct wsp_ggml_tensor * wsp_ggml_floor(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
+}
+struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
+}
+// wsp_ggml_ceil
+struct wsp_ggml_tensor * wsp_ggml_ceil(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_CEIL);
+}
+struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_CEIL);
+}
+//wsp_ggml_round
+struct wsp_ggml_tensor * wsp_ggml_round(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_ROUND);
+}
+struct wsp_ggml_tensor * wsp_ggml_round_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_ROUND);
+}
+//wsp_ggml_trunc
+struct wsp_ggml_tensor * wsp_ggml_trunc(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
+}
+struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
+}
 struct wsp_ggml_tensor * wsp_ggml_glu(
         struct wsp_ggml_context * ctx,
         struct wsp_ggml_tensor  * a,
@@ -3627,6 +3714,7 @@ struct wsp_ggml_tensor * wsp_ggml_get_rows(
         struct wsp_ggml_tensor  * a,
         struct wsp_ggml_tensor  * b) {
     WSP_GGML_ASSERT(a->ne[2] == b->ne[1]);
+    WSP_GGML_ASSERT(a->ne[3] == b->ne[2]);
     WSP_GGML_ASSERT(b->ne[3] == 1);
     WSP_GGML_ASSERT(b->type == WSP_GGML_TYPE_I32);
@@ -3680,7 +3768,7 @@ struct wsp_ggml_tensor * wsp_ggml_set_rows(
     WSP_GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
     WSP_GGML_ASSERT(c->ne[3] == 1);
     WSP_GGML_ASSERT(b->type == WSP_GGML_TYPE_F32);
-    WSP_GGML_ASSERT(c->type == WSP_GGML_TYPE_I64);
+    WSP_GGML_ASSERT(c->type == WSP_GGML_TYPE_I64 || c->type == WSP_GGML_TYPE_I32);
     WSP_GGML_ASSERT(wsp_ggml_is_contiguous_rows(a));
     WSP_GGML_ASSERT(wsp_ggml_is_contiguous_rows(b));
@@ -3690,6 +3778,7 @@ struct wsp_ggml_tensor * wsp_ggml_set_rows(
     result->op     = WSP_GGML_OP_SET_ROWS;
     result->src[0] = b;
     result->src[1] = c;
+    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
     return result;
 }
@@ -3831,6 +3920,15 @@ struct wsp_ggml_tensor * wsp_ggml_soft_max_ext(
     return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
 }
+struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * mask,
+        float                 scale,
+        float                 max_bias) {
+    return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
+}
 void wsp_ggml_soft_max_add_sinks(
         struct wsp_ggml_tensor * a,
         struct wsp_ggml_tensor * sinks) {
@@ -3930,7 +4028,7 @@ static struct wsp_ggml_tensor * wsp_ggml_rope_impl(
     memcpy(params +  8, &attn_factor,  sizeof(float));
     memcpy(params +  9, &beta_fast,    sizeof(float));
     memcpy(params + 10, &beta_slow,    sizeof(float));
-    if (mrope_used) {
+    if (mrope_used && sections) {
         memcpy(params + 11, sections,  sizeof(int32_t) * WSP_GGML_MROPE_SECTIONS);
     } else {
         memset(params + 11, 0,         sizeof(int32_t) * WSP_GGML_MROPE_SECTIONS);
@@ -4367,6 +4465,91 @@ struct wsp_ggml_tensor * wsp_ggml_conv_2d(
     return result;
 }
+// a: [OC*IC, KD, KH, KW]
+// b: [N*IC, ID, IH, IW]
+// result: [N*OD, OH, OW, IC * KD * KH * KW]
+struct wsp_ggml_tensor * wsp_ggml_im2col_3d(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * b,
+        int64_t               IC,
+        int                   s0, // stride width
+        int                   s1, // stride height
+        int                   s2, // stride depth
+        int                   p0, // padding width
+        int                   p1, // padding height
+        int                   p2, // padding depth
+        int                   d0, // dilation width
+        int                   d1, // dilation height
+        int                   d2, // dilation depth
+        enum wsp_ggml_type        dst_type) {
+    const int64_t N = b->ne[3] / IC;
+    const int64_t ID = b->ne[2];
+    const int64_t IH = b->ne[1];
+    const int64_t IW = b->ne[0];
+    const int64_t OC = a->ne[3] / IC;
+    UNUSED(OC);
+    const int64_t KD = a->ne[2];
+    const int64_t KH = a->ne[1];
+    const int64_t KW = a->ne[0];
+    const int64_t OD = wsp_ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
+    const int64_t OH = wsp_ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
+    const int64_t OW = wsp_ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
+    WSP_GGML_ASSERT((OD > 0)  && "b too small compared to a");
+    WSP_GGML_ASSERT((OH > 0)  && "b too small compared to a");
+    WSP_GGML_ASSERT((OW > 0)  && "b too small compared to a");
+    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
+    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor(ctx, dst_type, 4, ne);
+    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
+    wsp_ggml_set_op_params(result, params, sizeof(params));
+    result->op     = WSP_GGML_OP_IM2COL_3D;
+    result->src[0] = a;
+    result->src[1] = b;
+    return result;
+}
+// a: [OC*IC, KD, KH, KW]
+// b: [N*IC, ID, IH, IW]
+// result: [N*OC, OD, OH, OW]
+struct wsp_ggml_tensor * wsp_ggml_conv_3d(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * b,
+        int64_t               IC,
+        int                   s0, // stride width
+        int                   s1, // stride height
+        int                   s2, // stride depth
+        int                   p0, // padding width
+        int                   p1, // padding height
+        int                   p2, // padding depth
+        int                   d0, // dilation width
+        int                   d1, // dilation height
+        int                   d2  // dilation depth
+        ) {
+    struct wsp_ggml_tensor * im2col = wsp_ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
+    int64_t OC = a->ne[3] / IC;
+    int64_t N = b->ne[3] / IC;
+    struct wsp_ggml_tensor * result =
+        wsp_ggml_mul_mat(ctx,
+                wsp_ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
+                wsp_ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
+    int64_t OD = im2col->ne[3] / N;
+    result = wsp_ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
+    result = wsp_ggml_cont(ctx, wsp_ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
+    result = wsp_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
+    return result;
+}
 // wsp_ggml_conv_2d_sk_p0
 struct wsp_ggml_tensor * wsp_ggml_conv_2d_sk_p0(
@@ -4488,6 +4671,56 @@ struct wsp_ggml_tensor * wsp_ggml_conv_2d_direct(
     return result;
 }
+// wsp_ggml_conv_3d_direct
+struct wsp_ggml_tensor * wsp_ggml_conv_3d_direct(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * b,
+        int                   s0,
+        int                   s1,
+        int                   s2,
+        int                   p0,
+        int                   p1,
+        int                   p2,
+        int                   d0,
+        int                   d1,
+        int                   d2,
+        int                   c,
+        int                   n,
+        int                   oc) {
+    WSP_GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
+    WSP_GGML_ASSERT(b->ne[3] == (int64_t) c * n);
+    int64_t ne[4];
+    ne[0] = wsp_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
+    ne[1] = wsp_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
+    ne[2] = wsp_ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
+    ne[3] = (int64_t) oc * n;
+    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor(ctx, WSP_GGML_TYPE_F32, 4, ne);
+    wsp_ggml_set_op_params_i32(result, 0,  s0);
+    wsp_ggml_set_op_params_i32(result, 1,  s1);
+    wsp_ggml_set_op_params_i32(result, 2,  s2);
+    wsp_ggml_set_op_params_i32(result, 3,  p0);
+    wsp_ggml_set_op_params_i32(result, 4,  p1);
+    wsp_ggml_set_op_params_i32(result, 5,  p2);
+    wsp_ggml_set_op_params_i32(result, 6,  d0);
+    wsp_ggml_set_op_params_i32(result, 7,  d1);
+    wsp_ggml_set_op_params_i32(result, 8,  d2);
+    wsp_ggml_set_op_params_i32(result, 9,  c);
+    wsp_ggml_set_op_params_i32(result, 10, n);
+    wsp_ggml_set_op_params_i32(result, 11, oc);
+    result->op = WSP_GGML_OP_CONV_3D;
+    result->src[0] = a;
+    result->src[1] = b;
+    return result;
+}
 // wsp_ggml_conv_transpose_2d_p0
 static int64_t wsp_ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
@@ -4666,11 +4899,36 @@ struct wsp_ggml_tensor * wsp_ggml_pad(
         int                   p1,
         int                   p2,
         int                   p3) {
+    return wsp_ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
+}
+struct wsp_ggml_tensor * wsp_ggml_pad_ext(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            int                  lp0,
+            int                  rp0,
+            int                  lp1,
+            int                  rp1,
+            int                  lp2,
+            int                  rp2,
+            int                  lp3,
+            int                  rp3
+            ) {
     struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_4d(ctx, a->type,
-            a->ne[0] + p0,
-            a->ne[1] + p1,
-            a->ne[2] + p2,
-            a->ne[3] + p3);
+            a->ne[0] + lp0 + rp0,
+            a->ne[1] + lp1 + rp1,
+            a->ne[2] + lp2 + rp2,
+            a->ne[3] + lp3 + rp3);
+    wsp_ggml_set_op_params_i32(result, 0, lp0);
+    wsp_ggml_set_op_params_i32(result, 1, rp0);
+    wsp_ggml_set_op_params_i32(result, 2, lp1);
+    wsp_ggml_set_op_params_i32(result, 3, rp1);
+    wsp_ggml_set_op_params_i32(result, 4, lp2);
+    wsp_ggml_set_op_params_i32(result, 5, rp2);
+    wsp_ggml_set_op_params_i32(result, 6, lp3);
+    wsp_ggml_set_op_params_i32(result, 7, rp3);
     result->op     = WSP_GGML_OP_PAD;
     result->src[0] = a;
@@ -4766,12 +5024,8 @@ struct wsp_ggml_tensor * wsp_ggml_timestep_embedding(
         struct wsp_ggml_tensor  * timesteps,
         int                   dim,
         int                   max_period) {
-    int actual_dim = dim;
-    if (dim % 2 != 0) {
-        actual_dim = dim + 1;
-    }
-    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_2d(ctx, WSP_GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
+    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_2d(ctx, WSP_GGML_TYPE_F32, dim, timesteps->ne[0]);
     wsp_ggml_set_op_params_i32(result, 0, dim);
     wsp_ggml_set_op_params_i32(result, 1, max_period);
@@ -6718,6 +6972,78 @@ void wsp_ggml_graph_print(const struct wsp_ggml_cgraph * cgraph) {
     WSP_GGML_LOG_INFO("========================================\n");
 }
+static int wsp_ggml_node_list_find_tensor(const struct wsp_ggml_cgraph * cgraph,
+                                      const int *                idxs,
+                                      int                        count,
+                                      const struct wsp_ggml_tensor * tensor) {
+    WSP_GGML_ASSERT(cgraph && idxs);
+    for (int i = 0; i < count; ++i) {
+        const int node_idx = idxs[i];
+        if (node_idx >= cgraph->n_nodes) {
+            return -1;
+        }
+        if (cgraph->nodes[node_idx] == tensor) {
+            return i;
+        }
+    }
+    return -1;
+}
+bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
+                                const int *                node_idxs,
+                                int                        count,
+                                const enum wsp_ggml_op *       ops,
+                                const int *                outputs,
+                                int                        num_outputs) {
+    WSP_GGML_ASSERT(outputs && num_outputs > 0);
+    for (int i = 0; i < count; ++i) {
+        if (node_idxs[i] >= cgraph->n_nodes) {
+            return false;
+        }
+        const struct wsp_ggml_tensor * node = cgraph->nodes[node_idxs[i]];
+        if (node->op != ops[i]) {
+            return false;
+        }
+        if (wsp_ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
+            continue;
+        }
+        if (node->flags & WSP_GGML_TENSOR_FLAG_OUTPUT) {
+            return false;
+        }
+        int subgraph_uses = 0;
+        for (int j = i + 1; j < count; ++j) {
+            const struct wsp_ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
+            for (int src_idx = 0; src_idx < WSP_GGML_MAX_SRC; src_idx++) {
+                if (other_node->src[src_idx] == node) {
+                    subgraph_uses++;
+                }
+            }
+        }
+        if (subgraph_uses != wsp_ggml_node_get_use_count(cgraph, node_idxs[i])) {
+            return false;
+        }
+        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
+        struct wsp_ggml_tensor * view_src = node->view_src;
+        while (view_src) {
+            if (wsp_ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
+                return false;
+            }
+            view_src = view_src->view_src;
+        }
+    }
+    return true;
+}
 // check if node is part of the graph
 static bool wsp_ggml_graph_find(const struct wsp_ggml_cgraph * cgraph, const struct wsp_ggml_tensor * node) {
     if (cgraph == NULL) {