npm - whisper.rn - Versions diffs - 0.5.1 → 0.5.3 - Mend

whisper.rn 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/cpp/ggml.c CHANGED Viewed

@@ -943,6 +943,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "COS",
     "SUM",
     "SUM_ROWS",
+    "CUMSUM",
     "MEAN",
     "ARGMAX",
     "COUNT_EQUAL",
@@ -998,6 +999,8 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "TIMESTEP_EMBEDDING",
     "ARGSORT",
     "LEAKY_RELU",
+    "TRI",
+    "FILL",
     "FLASH_ATTN_EXT",
     "FLASH_ATTN_BACK",
@@ -1010,6 +1013,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "RWKV_WKV6",
     "GATED_LINEAR_ATTN",
     "RWKV_WKV7",
+    "SOLVE_TRI",
     "UNARY",
@@ -1027,7 +1031,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "GLU",
 };
-static_assert(WSP_GGML_OP_COUNT == 90, "WSP_GGML_OP_COUNT != 90");
+static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
 static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "none",
@@ -1047,6 +1051,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "cos(x)",
     "Σx",
     "Σx_k",
+    "cumsum(x)",
     "Σx/n",
     "argmax(x)",
     "count_equal(x)",
@@ -1102,6 +1107,8 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "timestep_embedding(timesteps, dim, max_period)",
     "argsort(x)",
     "leaky_relu(x)",
+    "tri(x)",
+    "fill(x, c)",
     "flash_attn_ext(x)",
     "flash_attn_back(x)",
@@ -1114,6 +1121,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "rwkv_wkv6(k, v, r, tf, td, s)",
     "gated_linear_attn(k, v, q, gate, s)",
     "rwkv_wkv7(r, w, k, v, a, b, s)",
+    "A X = B, A triangular, solve X",
     "unary(x)",
@@ -1131,7 +1139,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "glu(x)",
 };
-static_assert(WSP_GGML_OP_COUNT == 90, "WSP_GGML_OP_COUNT != 90");
+static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
 static_assert(WSP_GGML_OP_POOL_COUNT == 2, "WSP_GGML_OP_POOL_COUNT != 2");
@@ -1150,11 +1158,17 @@ static const char * WSP_GGML_UNARY_OP_NAME[WSP_GGML_UNARY_OP_COUNT] = {
     "HARDSWISH",
     "HARDSIGMOID",
     "EXP",
+    "EXPM1",
+    "SOFTPLUS",
     "GELU_ERF",
+    "XIELU",
+    "FLOOR",
+    "CEIL",
+    "ROUND",
+    "TRUNC",
 };
-static_assert(WSP_GGML_UNARY_OP_COUNT == 15, "WSP_GGML_UNARY_OP_COUNT != 15");
+static_assert(WSP_GGML_UNARY_OP_COUNT == 22, "WSP_GGML_UNARY_OP_COUNT != 22");
 static const char * WSP_GGML_GLU_OP_NAME[WSP_GGML_GLU_OP_COUNT] = {
     "REGLU",
@@ -2262,6 +2276,30 @@ struct wsp_ggml_tensor * wsp_ggml_log_inplace(
     return wsp_ggml_log_impl(ctx, a, true);
 }
+struct wsp_ggml_tensor * wsp_ggml_expm1(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_EXPM1);
+}
+struct wsp_ggml_tensor * wsp_ggml_expm1_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_EXPM1);
+}
+struct wsp_ggml_tensor * wsp_ggml_softplus(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_SOFTPLUS);
+}
+struct wsp_ggml_tensor * wsp_ggml_softplus_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_SOFTPLUS);
+}
 // wsp_ggml_sin
 static struct wsp_ggml_tensor * wsp_ggml_sin_impl(
@@ -2345,6 +2383,21 @@ struct wsp_ggml_tensor * wsp_ggml_sum_rows(
     return result;
 }
+// wsp_ggml_cumsum
+struct wsp_ggml_tensor * wsp_ggml_cumsum(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
+    struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
+    result->op     = WSP_GGML_OP_CUMSUM;
+    result->src[0] = a;
+    return result;
+}
 // wsp_ggml_mean
 struct wsp_ggml_tensor * wsp_ggml_mean(
@@ -2660,6 +2713,29 @@ struct wsp_ggml_tensor * wsp_ggml_silu_inplace(
     return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_SILU);
 }
+// wsp_ggml_xielu
+struct wsp_ggml_tensor * wsp_ggml_xielu(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        float alpha_n,
+        float alpha_p,
+        float beta,
+        float eps) {
+    struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
+    wsp_ggml_set_op_params_i32(result, 0, (int32_t) WSP_GGML_UNARY_OP_XIELU);
+    wsp_ggml_set_op_params_f32(result, 1, beta + wsp_ggml_compute_softplus_f32(alpha_n));
+    wsp_ggml_set_op_params_f32(result, 2, wsp_ggml_compute_softplus_f32(alpha_p));
+    wsp_ggml_set_op_params_f32(result, 3, beta);
+    wsp_ggml_set_op_params_f32(result, 4, eps);
+    result->op     = WSP_GGML_OP_UNARY;
+    result->src[0] = a;
+    return result;
+}
 // wsp_ggml_silu_back
 struct wsp_ggml_tensor * wsp_ggml_silu_back(
@@ -2734,6 +2810,62 @@ static struct wsp_ggml_tensor * wsp_ggml_glu_impl(
     return result;
 }
+// wsp_ggml_floor
+struct wsp_ggml_tensor * wsp_ggml_floor(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
+}
+struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
+}
+// wsp_ggml_ceil
+struct wsp_ggml_tensor * wsp_ggml_ceil(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_CEIL);
+}
+struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_CEIL);
+}
+//wsp_ggml_round
+struct wsp_ggml_tensor * wsp_ggml_round(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_ROUND);
+}
+struct wsp_ggml_tensor * wsp_ggml_round_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_ROUND);
+}
+//wsp_ggml_trunc
+struct wsp_ggml_tensor * wsp_ggml_trunc(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
+}
+struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
+}
 struct wsp_ggml_tensor * wsp_ggml_glu(
         struct wsp_ggml_context * ctx,
         struct wsp_ggml_tensor  * a,
@@ -3837,6 +3969,15 @@ struct wsp_ggml_tensor * wsp_ggml_soft_max_ext(
     return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
 }
+struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * mask,
+        float                 scale,
+        float                 max_bias) {
+    return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
+}
 void wsp_ggml_soft_max_add_sinks(
         struct wsp_ggml_tensor * a,
         struct wsp_ggml_tensor * sinks) {
@@ -4944,6 +5085,61 @@ struct wsp_ggml_tensor * wsp_ggml_timestep_embedding(
     return result;
 }
+// wsp_ggml_tri
+struct wsp_ggml_tensor * wsp_ggml_tri(
+    struct wsp_ggml_context * ctx,
+    struct wsp_ggml_tensor  * a,
+    enum wsp_ggml_tri_type    type) {
+    WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
+    WSP_GGML_ASSERT(wsp_ggml_is_contiguous(a));
+    WSP_GGML_ASSERT(a->ne[0] == a->ne[1]);
+    struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
+    wsp_ggml_set_op_params_i32(result, 0, type);
+    result->op = WSP_GGML_OP_TRI;
+    result->src[0] = a;
+    return result;
+}
+// wsp_ggml_fill
+static struct wsp_ggml_tensor * wsp_ggml_fill_impl(
+    struct wsp_ggml_context * ctx,
+    struct wsp_ggml_tensor  * a,
+    float                 c,
+    bool                  inplace) {
+    WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
+    WSP_GGML_ASSERT(wsp_ggml_is_contiguous(a));
+    struct wsp_ggml_tensor * result = inplace ? wsp_ggml_view_tensor(ctx, a) : wsp_ggml_dup_tensor(ctx, a);
+    wsp_ggml_set_op_params_f32(result, 0, c);
+    result->op = WSP_GGML_OP_FILL;
+    result->src[0] = a;
+    return result;
+}
+struct wsp_ggml_tensor * wsp_ggml_fill(
+    struct wsp_ggml_context * ctx,
+    struct wsp_ggml_tensor  * a,
+    float                 c) {
+    return wsp_ggml_fill_impl(ctx, a, c, false);
+}
+struct wsp_ggml_tensor * wsp_ggml_fill_inplace(
+    struct wsp_ggml_context * ctx,
+    struct wsp_ggml_tensor  * a,
+    float                 c) {
+    return wsp_ggml_fill_impl(ctx, a, c, true);
+}
 // wsp_ggml_argsort
 struct wsp_ggml_tensor * wsp_ggml_argsort(
@@ -5798,6 +5994,41 @@ struct wsp_ggml_tensor * wsp_ggml_opt_step_sgd(
     return result;
 }
+// solve_tri
+struct wsp_ggml_tensor * wsp_ggml_solve_tri(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * b,
+        bool                  left,
+        bool                  lower,
+        bool                  uni) {
+    WSP_GGML_ASSERT(a->type == WSP_GGML_TYPE_F32);
+    WSP_GGML_ASSERT(b->type == WSP_GGML_TYPE_F32);
+    // A must be square and lower diagonal
+    WSP_GGML_ASSERT(a->ne[0] == a->ne[1]);
+    // B must have same outer dimension as A
+    WSP_GGML_ASSERT(a->ne[1] == b->ne[1]);
+    // batch dimensions must be equal
+    WSP_GGML_ASSERT(a->ne[2] == b->ne[2]);
+    WSP_GGML_ASSERT(a->ne[3] == b->ne[3]);
+    WSP_GGML_ASSERT(wsp_ggml_is_contiguous(a));
+    WSP_GGML_ASSERT(wsp_ggml_is_contiguous(b));
+    WSP_GGML_ASSERT(lower && left && !uni); // TODO: support other variants
+    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_4d(ctx, WSP_GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
+    result->op     = WSP_GGML_OP_SOLVE_TRI;
+    result->src[0] = a;
+    result->src[1] = b;
+    return result;
+}
 ////////////////////////////////////////////////////////////////////////////////
 struct wsp_ggml_hash_set wsp_ggml_hash_set_new(size_t size) {
@@ -6370,6 +6601,16 @@ static void wsp_ggml_compute_backward(
                         wsp_ggml_add_or_set(ctx, cgraph, isrc0, wsp_ggml_mul(ctx, tensor, grad));
                     }
                 } break;
+                case WSP_GGML_UNARY_OP_EXPM1: {
+                    if (src0_needs_grads) {
+                        wsp_ggml_add_or_set(ctx, cgraph, isrc0, wsp_ggml_mul(ctx, grad, wsp_ggml_exp(ctx, src0)));
+                    }
+                } break;
+                case WSP_GGML_UNARY_OP_SOFTPLUS: {
+                    if (src0_needs_grads) {
+                        wsp_ggml_add_or_set(ctx, cgraph, isrc0, wsp_ggml_mul(ctx, grad, wsp_ggml_sigmoid(ctx, src0)));
+                    }
+                } break;
                 default: {
                     fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
                         __func__, wsp_ggml_unary_op_name(wsp_ggml_get_unary_op(tensor)));
@@ -6880,6 +7121,78 @@ void wsp_ggml_graph_print(const struct wsp_ggml_cgraph * cgraph) {
     WSP_GGML_LOG_INFO("========================================\n");
 }
+static int wsp_ggml_node_list_find_tensor(const struct wsp_ggml_cgraph * cgraph,
+                                      const int *                idxs,
+                                      int                        count,
+                                      const struct wsp_ggml_tensor * tensor) {
+    WSP_GGML_ASSERT(cgraph && idxs);
+    for (int i = 0; i < count; ++i) {
+        const int node_idx = idxs[i];
+        if (node_idx >= cgraph->n_nodes) {
+            return -1;
+        }
+        if (cgraph->nodes[node_idx] == tensor) {
+            return i;
+        }
+    }
+    return -1;
+}
+bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
+                                const int *                node_idxs,
+                                int                        count,
+                                const enum wsp_ggml_op *       ops,
+                                const int *                outputs,
+                                int                        num_outputs) {
+    WSP_GGML_ASSERT(outputs && num_outputs > 0);
+    for (int i = 0; i < count; ++i) {
+        if (node_idxs[i] >= cgraph->n_nodes) {
+            return false;
+        }
+        const struct wsp_ggml_tensor * node = cgraph->nodes[node_idxs[i]];
+        if (node->op != ops[i]) {
+            return false;
+        }
+        if (wsp_ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
+            continue;
+        }
+        if (node->flags & WSP_GGML_TENSOR_FLAG_OUTPUT) {
+            return false;
+        }
+        int subgraph_uses = 0;
+        for (int j = i + 1; j < count; ++j) {
+            const struct wsp_ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
+            for (int src_idx = 0; src_idx < WSP_GGML_MAX_SRC; src_idx++) {
+                if (other_node->src[src_idx] == node) {
+                    subgraph_uses++;
+                }
+            }
+        }
+        if (subgraph_uses != wsp_ggml_node_get_use_count(cgraph, node_idxs[i])) {
+            return false;
+        }
+        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
+        struct wsp_ggml_tensor * view_src = node->view_src;
+        while (view_src) {
+            if (wsp_ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
+                return false;
+            }
+            view_src = view_src->view_src;
+        }
+    }
+    return true;
+}
 // check if node is part of the graph
 static bool wsp_ggml_graph_find(const struct wsp_ggml_cgraph * cgraph, const struct wsp_ggml_tensor * node) {
     if (cgraph == NULL) {

package/cpp/ggml.h CHANGED Viewed

@@ -237,9 +237,12 @@
 #define WSP_GGML_EXIT_SUCCESS 0
 #define WSP_GGML_EXIT_ABORTED 1
+// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
+#define WSP_GGML_ROPE_TYPE_NORMAL 0
 #define WSP_GGML_ROPE_TYPE_NEOX   2
 #define WSP_GGML_ROPE_TYPE_MROPE  8
 #define WSP_GGML_ROPE_TYPE_VISION 24
+#define WSP_GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
 #define WSP_GGML_MROPE_SECTIONS   4
@@ -472,6 +475,7 @@ extern "C" {
         WSP_GGML_OP_COS,
         WSP_GGML_OP_SUM,
         WSP_GGML_OP_SUM_ROWS,
+        WSP_GGML_OP_CUMSUM,
         WSP_GGML_OP_MEAN,
         WSP_GGML_OP_ARGMAX,
         WSP_GGML_OP_COUNT_EQUAL,
@@ -527,6 +531,8 @@ extern "C" {
         WSP_GGML_OP_TIMESTEP_EMBEDDING,
         WSP_GGML_OP_ARGSORT,
         WSP_GGML_OP_LEAKY_RELU,
+        WSP_GGML_OP_TRI,
+        WSP_GGML_OP_FILL,
         WSP_GGML_OP_FLASH_ATTN_EXT,
         WSP_GGML_OP_FLASH_ATTN_BACK,
@@ -539,6 +545,7 @@ extern "C" {
         WSP_GGML_OP_RWKV_WKV6,
         WSP_GGML_OP_GATED_LINEAR_ATTN,
         WSP_GGML_OP_RWKV_WKV7,
+        WSP_GGML_OP_SOLVE_TRI,
         WSP_GGML_OP_UNARY,
@@ -573,7 +580,14 @@ extern "C" {
         WSP_GGML_UNARY_OP_HARDSWISH,
         WSP_GGML_UNARY_OP_HARDSIGMOID,
         WSP_GGML_UNARY_OP_EXP,
+        WSP_GGML_UNARY_OP_EXPM1,
+        WSP_GGML_UNARY_OP_SOFTPLUS,
         WSP_GGML_UNARY_OP_GELU_ERF,
+        WSP_GGML_UNARY_OP_XIELU,
+        WSP_GGML_UNARY_OP_FLOOR,
+        WSP_GGML_UNARY_OP_CEIL,
+        WSP_GGML_UNARY_OP_ROUND,
+        WSP_GGML_UNARY_OP_TRUNC,
         WSP_GGML_UNARY_OP_COUNT,
     };
@@ -612,6 +626,13 @@ extern "C" {
         WSP_GGML_TENSOR_FLAG_LOSS   =  8, // ...defines loss for numerical optimization (multiple loss tensors add up)
     };
+    enum wsp_ggml_tri_type {
+        WSP_GGML_TRI_TYPE_UPPER_DIAG = 0,
+        WSP_GGML_TRI_TYPE_UPPER      = 1,
+        WSP_GGML_TRI_TYPE_LOWER_DIAG = 2,
+        WSP_GGML_TRI_TYPE_LOWER      = 3
+    };
     struct wsp_ggml_init_params {
         // memory pool
         size_t mem_size;   // bytes
@@ -949,6 +970,22 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sin(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
@@ -975,6 +1012,10 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cumsum(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a);
     // mean along rows
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
             struct wsp_ggml_context * ctx,
@@ -1148,6 +1189,58 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+     /**
+     * Truncates the fractional part of each element in the tensor (towards zero).
+     * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
+     * Similar to std::trunc in C/C++.
+     */
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    // xIELU activation function
+    // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
+    // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
+    // that constrain the positive and negative source alpha values respectively
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float alpha_n,
+            float alpha_p,
+            float beta,
+            float eps);
     // gated linear unit ops
     // A: n columns, r rows,
     // result is n / 2 columns, r rows,
@@ -1615,6 +1708,13 @@ extern "C" {
             float                 scale,
             float                 max_bias);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * mask,
+            float                 scale,
+            float                 max_bias);
     WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
             struct wsp_ggml_tensor * a,
             struct wsp_ggml_tensor * sinks);
@@ -2041,6 +2141,7 @@ extern "C" {
     enum wsp_ggml_scale_mode {
         WSP_GGML_SCALE_MODE_NEAREST  = 0,
         WSP_GGML_SCALE_MODE_BILINEAR = 1,
+        WSP_GGML_SCALE_MODE_BICUBIC  = 2,
         WSP_GGML_SCALE_MODE_COUNT
     };
@@ -2119,6 +2220,23 @@ extern "C" {
             int                   shift2,
             int                   shift3);
+    // Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
+    // zeroes everywhere outside the masked area
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tri(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            enum wsp_ggml_tri_type    type);
+    // Fill tensor a with constant c
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float                 c);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float                 c);
     // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
     // timesteps: [N,]
@@ -2288,6 +2406,27 @@ extern "C" {
             struct wsp_ggml_tensor  * b,
             struct wsp_ggml_tensor  * state);
+    /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
+    *  without zeroes on the diagonal (i.e. invertible).
+    *  B can have any number of columns, but must have the same number of rows as A
+    *  If A is [n, n] and B is [n, m], then the result will be [n, m] as well
+    *  Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
+    *  where n > 100 sparingly, pre-chunk if necessary.
+    *
+    *  If left = false, solves xA=B instead
+    *  If lower = false, assumes upper triangular instead
+    *  If uni = true, assumes diagonal of A to be all ones (will override actual values)
+    *
+    *  TODO: currently only lower, right, non-unitriangular variant is implemented
+    */
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_solve_tri(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * b,
+        bool                  left,
+        bool                  lower,
+        bool                  uni);
     // custom operators
     typedef void (*wsp_ggml_custom1_op_t)(struct wsp_ggml_tensor * dst , const struct wsp_ggml_tensor * a, int ith, int nth, void * userdata);