npm - whisper.rn - Versions diffs - 0.5.1 → 0.5.2 - Mend

whisper.rn 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/cpp/ggml-metal/ggml-metal-ops.h CHANGED Viewed

@@ -39,6 +39,8 @@ size_t wsp_ggml_metal_op_mul_mat_id_extra_ids(const struct wsp_ggml_tensor * op)
 // return true if we should use the FA vector kernel for this op
 bool wsp_ggml_metal_op_flash_attn_ext_use_vec(const struct wsp_ggml_tensor * op);
+size_t wsp_ggml_metal_op_flash_attn_ext_extra_pad(const struct wsp_ggml_tensor * op);
+size_t wsp_ggml_metal_op_flash_attn_ext_extra_blk(const struct wsp_ggml_tensor * op);
 size_t wsp_ggml_metal_op_flash_attn_ext_extra_tmp(const struct wsp_ggml_tensor * op);
 int wsp_ggml_metal_op_concat            (wsp_ggml_metal_op_t ctx, int idx);
@@ -48,6 +50,7 @@ int wsp_ggml_metal_op_scale             (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_clamp             (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_unary             (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_glu               (wsp_ggml_metal_op_t ctx, int idx);
+int wsp_ggml_metal_op_sum               (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_sum_rows          (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_get_rows          (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_set_rows          (wsp_ggml_metal_op_t ctx, int idx);
@@ -68,6 +71,7 @@ int wsp_ggml_metal_op_norm              (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_rope              (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_im2col            (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_conv_transpose_1d (wsp_ggml_metal_op_t ctx, int idx);
+int wsp_ggml_metal_op_conv_transpose_2d (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_upscale           (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_pad               (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_pad_reflect_1d    (wsp_ggml_metal_op_t ctx, int idx);
@@ -76,6 +80,8 @@ int wsp_ggml_metal_op_timestep_embedding(wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_argmax            (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_argsort           (wsp_ggml_metal_op_t ctx, int idx);
 int wsp_ggml_metal_op_leaky_relu        (wsp_ggml_metal_op_t ctx, int idx);
+int wsp_ggml_metal_op_opt_step_adamw    (wsp_ggml_metal_op_t ctx, int idx);
+int wsp_ggml_metal_op_opt_step_sgd      (wsp_ggml_metal_op_t ctx, int idx);
 #ifdef __cplusplus
 }

package/cpp/ggml-metal/ggml-metal.cpp CHANGED Viewed

@@ -193,9 +193,9 @@ static size_t wsp_ggml_backend_metal_buffer_type_get_alloc_size(wsp_ggml_backend
             } break;
         case WSP_GGML_OP_FLASH_ATTN_EXT:
             {
-                if (wsp_ggml_metal_op_flash_attn_ext_use_vec(tensor)) {
-                    res += wsp_ggml_metal_op_flash_attn_ext_extra_tmp(tensor);
-                }
+                res += wsp_ggml_metal_op_flash_attn_ext_extra_pad(tensor);
+                res += wsp_ggml_metal_op_flash_attn_ext_extra_blk(tensor);
+                res += wsp_ggml_metal_op_flash_attn_ext_extra_tmp(tensor);
             } break;
         default:
             break;

package/cpp/ggml-metal/ggml-whisper-sim.metallib CHANGED Viewed

Binary file

package/cpp/ggml-metal/ggml-whisper.metallib CHANGED Viewed

Binary file

package/cpp/ggml.c CHANGED Viewed

@@ -1151,10 +1151,14 @@ static const char * WSP_GGML_UNARY_OP_NAME[WSP_GGML_UNARY_OP_COUNT] = {
     "HARDSIGMOID",
     "EXP",
     "GELU_ERF",
+    "XIELU",
+    "FLOOR",
+    "CEIL",
+    "ROUND",
+    "TRUNC",
 };
-static_assert(WSP_GGML_UNARY_OP_COUNT == 15, "WSP_GGML_UNARY_OP_COUNT != 15");
+static_assert(WSP_GGML_UNARY_OP_COUNT == 20, "WSP_GGML_UNARY_OP_COUNT != 20");
 static const char * WSP_GGML_GLU_OP_NAME[WSP_GGML_GLU_OP_COUNT] = {
     "REGLU",
@@ -2660,6 +2664,29 @@ struct wsp_ggml_tensor * wsp_ggml_silu_inplace(
     return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_SILU);
 }
+// wsp_ggml_xielu
+struct wsp_ggml_tensor * wsp_ggml_xielu(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        float alpha_n,
+        float alpha_p,
+        float beta,
+        float eps) {
+    struct wsp_ggml_tensor * result = wsp_ggml_dup_tensor(ctx, a);
+    wsp_ggml_set_op_params_i32(result, 0, (int32_t) WSP_GGML_UNARY_OP_XIELU);
+    wsp_ggml_set_op_params_f32(result, 1, beta + wsp_ggml_softplus(alpha_n));
+    wsp_ggml_set_op_params_f32(result, 2, wsp_ggml_softplus(alpha_p));
+    wsp_ggml_set_op_params_f32(result, 3, beta);
+    wsp_ggml_set_op_params_f32(result, 4, eps);
+    result->op     = WSP_GGML_OP_UNARY;
+    result->src[0] = a;
+    return result;
+}
 // wsp_ggml_silu_back
 struct wsp_ggml_tensor * wsp_ggml_silu_back(
@@ -2734,6 +2761,62 @@ static struct wsp_ggml_tensor * wsp_ggml_glu_impl(
     return result;
 }
+// wsp_ggml_floor
+struct wsp_ggml_tensor * wsp_ggml_floor(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
+}
+struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_FLOOR);
+}
+// wsp_ggml_ceil
+struct wsp_ggml_tensor * wsp_ggml_ceil(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_CEIL);
+}
+struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_CEIL);
+}
+//wsp_ggml_round
+struct wsp_ggml_tensor * wsp_ggml_round(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_ROUND);
+}
+struct wsp_ggml_tensor * wsp_ggml_round_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_ROUND);
+}
+//wsp_ggml_trunc
+struct wsp_ggml_tensor * wsp_ggml_trunc(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
+}
+struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a) {
+    return wsp_ggml_unary_inplace(ctx, a, WSP_GGML_UNARY_OP_TRUNC);
+}
 struct wsp_ggml_tensor * wsp_ggml_glu(
         struct wsp_ggml_context * ctx,
         struct wsp_ggml_tensor  * a,
@@ -3837,6 +3920,15 @@ struct wsp_ggml_tensor * wsp_ggml_soft_max_ext(
     return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
 }
+struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * mask,
+        float                 scale,
+        float                 max_bias) {
+    return wsp_ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
+}
 void wsp_ggml_soft_max_add_sinks(
         struct wsp_ggml_tensor * a,
         struct wsp_ggml_tensor * sinks) {
@@ -6880,6 +6972,78 @@ void wsp_ggml_graph_print(const struct wsp_ggml_cgraph * cgraph) {
     WSP_GGML_LOG_INFO("========================================\n");
 }
+static int wsp_ggml_node_list_find_tensor(const struct wsp_ggml_cgraph * cgraph,
+                                      const int *                idxs,
+                                      int                        count,
+                                      const struct wsp_ggml_tensor * tensor) {
+    WSP_GGML_ASSERT(cgraph && idxs);
+    for (int i = 0; i < count; ++i) {
+        const int node_idx = idxs[i];
+        if (node_idx >= cgraph->n_nodes) {
+            return -1;
+        }
+        if (cgraph->nodes[node_idx] == tensor) {
+            return i;
+        }
+    }
+    return -1;
+}
+bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
+                                const int *                node_idxs,
+                                int                        count,
+                                const enum wsp_ggml_op *       ops,
+                                const int *                outputs,
+                                int                        num_outputs) {
+    WSP_GGML_ASSERT(outputs && num_outputs > 0);
+    for (int i = 0; i < count; ++i) {
+        if (node_idxs[i] >= cgraph->n_nodes) {
+            return false;
+        }
+        const struct wsp_ggml_tensor * node = cgraph->nodes[node_idxs[i]];
+        if (node->op != ops[i]) {
+            return false;
+        }
+        if (wsp_ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
+            continue;
+        }
+        if (node->flags & WSP_GGML_TENSOR_FLAG_OUTPUT) {
+            return false;
+        }
+        int subgraph_uses = 0;
+        for (int j = i + 1; j < count; ++j) {
+            const struct wsp_ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
+            for (int src_idx = 0; src_idx < WSP_GGML_MAX_SRC; src_idx++) {
+                if (other_node->src[src_idx] == node) {
+                    subgraph_uses++;
+                }
+            }
+        }
+        if (subgraph_uses != wsp_ggml_node_get_use_count(cgraph, node_idxs[i])) {
+            return false;
+        }
+        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
+        struct wsp_ggml_tensor * view_src = node->view_src;
+        while (view_src) {
+            if (wsp_ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
+                return false;
+            }
+            view_src = view_src->view_src;
+        }
+    }
+    return true;
+}
 // check if node is part of the graph
 static bool wsp_ggml_graph_find(const struct wsp_ggml_cgraph * cgraph, const struct wsp_ggml_tensor * node) {
     if (cgraph == NULL) {

package/cpp/ggml.h CHANGED Viewed

@@ -237,6 +237,8 @@
 #define WSP_GGML_EXIT_SUCCESS 0
 #define WSP_GGML_EXIT_ABORTED 1
+// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
+#define WSP_GGML_ROPE_TYPE_NORMAL 0
 #define WSP_GGML_ROPE_TYPE_NEOX   2
 #define WSP_GGML_ROPE_TYPE_MROPE  8
 #define WSP_GGML_ROPE_TYPE_VISION 24
@@ -574,6 +576,11 @@ extern "C" {
         WSP_GGML_UNARY_OP_HARDSIGMOID,
         WSP_GGML_UNARY_OP_EXP,
         WSP_GGML_UNARY_OP_GELU_ERF,
+        WSP_GGML_UNARY_OP_XIELU,
+        WSP_GGML_UNARY_OP_FLOOR,
+        WSP_GGML_UNARY_OP_CEIL,
+        WSP_GGML_UNARY_OP_ROUND,
+        WSP_GGML_UNARY_OP_TRUNC,
         WSP_GGML_UNARY_OP_COUNT,
     };
@@ -1148,6 +1155,58 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+     /**
+     * Truncates the fractional part of each element in the tensor (towards zero).
+     * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
+     * Similar to std::trunc in C/C++.
+     */
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    // xIELU activation function
+    // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
+    // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
+    // that constrain the positive and negative source alpha values respectively
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float alpha_n,
+            float alpha_p,
+            float beta,
+            float eps);
     // gated linear unit ops
     // A: n columns, r rows,
     // result is n / 2 columns, r rows,
@@ -1615,6 +1674,13 @@ extern "C" {
             float                 scale,
             float                 max_bias);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * mask,
+            float                 scale,
+            float                 max_bias);
     WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
             struct wsp_ggml_tensor * a,
             struct wsp_ggml_tensor * sinks);

package/cpp/jsi/RNWhisperJSI.cpp CHANGED Viewed

@@ -269,11 +269,13 @@ struct CallbackInfo {
     std::shared_ptr<Function> onProgressCallback;
     std::shared_ptr<Function> onNewSegmentsCallback;
     int jobId;
+    int nProcessors;
 };
 CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
     CallbackInfo info;
     info.jobId = rand(); // Default fallback jobId
+    info.nProcessors = 1; // Default to 1 processor
     try {
         auto propNames = optionsObj.getPropertyNames(runtime);
@@ -288,6 +290,8 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
                 info.onNewSegmentsCallback = std::make_shared<Function>(propValue.getObject(runtime).getFunction(runtime));
             } else if (propName == "jobId" && propValue.isNumber()) {
                 info.jobId = (int)propValue.getNumber();
+            } else if (propName == "nProcessors" && propValue.isNumber()) {
+                info.nProcessors = (int)propValue.getNumber();
             }
         }
     } catch (...) {
@@ -551,12 +555,13 @@ void installJSIBindings(
                                 code = -2;
                             } else {
                                 try {
-                                    code = whisper_full(context, job->params, audioResult.data.data(), audioResult.count);
+                                    job->n_processors = callbackInfo.nProcessors;
+                                    code = whisper_full_parallel(context, job->params, audioResult.data.data(), audioResult.count, job->n_processors);
                                     if (job->is_aborted()) {
                                         code = -999;
                                     }
                                 } catch (...) {
-                                    logError("Exception during whisper_full transcription");
+                                    logError("Exception during whisper_full_parallel transcription");
                                     code = -3;
                                 }
                                 rnwhisper::job_remove(callbackInfo.jobId);

package/cpp/rn-whisper.h CHANGED Viewed

@@ -24,6 +24,7 @@ struct job {
     int job_id;
     bool aborted = false;
     whisper_full_params params;
+    int n_processors = 1;
     ~job();
     bool is_aborted();

package/cpp/whisper.cpp CHANGED Viewed

@@ -1296,7 +1296,7 @@ static wsp_ggml_backend_t whisper_backend_init_gpu(const whisper_context_params
     if (params.use_gpu) {
         for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
             wsp_ggml_backend_dev_t dev_cur = wsp_ggml_backend_dev_get(i);
-            if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
+            if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
                 if (cnt == params.gpu_device) {
                     dev = dev_cur;
                 }
@@ -1365,7 +1365,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
         int cnt = 0;
         for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
             wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
-            if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
+            if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
                 if (cnt == params.gpu_device) {
                     auto * buft = wsp_ggml_backend_dev_buffer_type(dev);
                     if (buft) {
@@ -1403,6 +1403,7 @@ static bool weight_buft_supported(const whisper_hparams & hparams, wsp_ggml_tens
     bool op_supported = true;
     if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
+        wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
         (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
         // GPU and default CPU backend support all operators
         op_supported = true;
@@ -4459,6 +4460,7 @@ static bool weight_buft_supported(const whisper_vad_hparams & hparams, wsp_ggml_
     bool op_supported = true;
     if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
+        wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
         (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
         // GPU and default CPU backend support all operators
         op_supported = true;

package/ios/RNWhisperContext.mm CHANGED Viewed

@@ -168,6 +168,7 @@ static void* retained_log_block = nullptr;
     self->recordState.sliceNSamples.push_back(0);
     self->recordState.job = rnwhisper::job_new(jobId, [self createParams:options jobId:jobId]);
+    self->recordState.job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
     self->recordState.job->set_realtime_params(
         {
             .use_vad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false,
@@ -471,6 +472,7 @@ struct rnwhisper_segments_callback_data {
         }
         rnwhisper::job* job = rnwhisper::job_new(jobId, params);
+        job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
         self->recordState.job = job;
         int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
         rnwhisper::job_remove(jobId);
@@ -572,7 +574,7 @@ struct rnwhisper_segments_callback_data {
   audioDataCount:(int)audioDataCount
 {
     whisper_reset_timings(self->ctx);
-    int code = whisper_full(self->ctx, job->params, audioData, audioDataCount);
+    int code = whisper_full_parallel(self->ctx, job->params, audioData, audioDataCount, job->n_processors);
     if (job && job->is_aborted()) code = -999;
     // if (code == 0) {
     //     whisper_print_timings(self->ctx);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h CHANGED Viewed

@@ -209,9 +209,6 @@ extern "C" {
         void * context;
     };
-    // Internal backend registry API
-    WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
     // Add backend dynamic loading support to the backend
     // Initialize the backend

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h CHANGED Viewed

@@ -215,6 +215,8 @@ extern "C" {
     // Backend registry
     //
+    WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
     WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
     // Backend (reg) enumeration

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h CHANGED Viewed

@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
     }
 }
+static inline float wsp_ggml_softplus(float input) {
+    return (input > 20.0f) ? input : logf(1 + expf(input));
+}
 //
 // logging
 //
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
 #define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
 #define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
+static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
+    const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
+    size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
+    if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
+        return 0;
+    }
+    return cgraph->use_counts[hash_pos];
+}
 // return true if the node's results are only used by N other nodes
 // and can be fused into their calculations.
 static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
     const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
     // check the use count against how many we're replacing
-    size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
-    if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
+    if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
         return false;
     }
@@ -635,6 +647,36 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
     return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
 }
+WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
+                                         const int *                node_idxs,
+                                         int                        count,
+                                         const enum wsp_ggml_op *       ops,
+                                         const int *                outputs,
+                                         int                        num_outputs);
+// Returns true if the subgraph formed by {node_idxs} can be fused
+// checks whethers all nodes which are not part of outputs can be elided
+// by checking if their num_uses are confined to the subgraph
+static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
+                                          int                        node_idx,
+                                          int                        count,
+                                          const enum wsp_ggml_op *       ops,
+                                          const int *                outputs,
+                                          int                        num_outputs) {
+    WSP_GGML_ASSERT(count < 32);
+    if (node_idx + count > cgraph->n_nodes) {
+        return false;
+    }
+    int idxs[32];
+    for (int i = 0; i < count; ++i) {
+        idxs[i] = node_idx + i;
+    }
+    return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
+}
 #ifdef __cplusplus
 }
 #endif
@@ -648,6 +690,13 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
     return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
 }
+inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph *          cgraph,
+                                   int                                 start_idx,
+                                   std::initializer_list<enum wsp_ggml_op> ops,
+                                   std::initializer_list<int>          outputs = {}) {
+    return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
+}
 // expose GGUF internals for test code
 WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
 WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h CHANGED Viewed

@@ -237,6 +237,8 @@
 #define WSP_GGML_EXIT_SUCCESS 0
 #define WSP_GGML_EXIT_ABORTED 1
+// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
+#define WSP_GGML_ROPE_TYPE_NORMAL 0
 #define WSP_GGML_ROPE_TYPE_NEOX   2
 #define WSP_GGML_ROPE_TYPE_MROPE  8
 #define WSP_GGML_ROPE_TYPE_VISION 24
@@ -574,6 +576,11 @@ extern "C" {
         WSP_GGML_UNARY_OP_HARDSIGMOID,
         WSP_GGML_UNARY_OP_EXP,
         WSP_GGML_UNARY_OP_GELU_ERF,
+        WSP_GGML_UNARY_OP_XIELU,
+        WSP_GGML_UNARY_OP_FLOOR,
+        WSP_GGML_UNARY_OP_CEIL,
+        WSP_GGML_UNARY_OP_ROUND,
+        WSP_GGML_UNARY_OP_TRUNC,
         WSP_GGML_UNARY_OP_COUNT,
     };
@@ -1148,6 +1155,58 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+     /**
+     * Truncates the fractional part of each element in the tensor (towards zero).
+     * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
+     * Similar to std::trunc in C/C++.
+     */
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    // xIELU activation function
+    // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
+    // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
+    // that constrain the positive and negative source alpha values respectively
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float alpha_n,
+            float alpha_p,
+            float beta,
+            float eps);
     // gated linear unit ops
     // A: n columns, r rows,
     // result is n / 2 columns, r rows,
@@ -1615,6 +1674,13 @@ extern "C" {
             float                 scale,
             float                 max_bias);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * mask,
+            float                 scale,
+            float                 max_bias);
     WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
             struct wsp_ggml_tensor * a,
             struct wsp_ggml_tensor * sinks);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h CHANGED Viewed

@@ -24,6 +24,7 @@ struct job {
     int job_id;
     bool aborted = false;
     whisper_full_params params;
+    int n_processors = 1;
     ~job();
     bool is_aborted();

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib CHANGED Viewed

Binary file

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper CHANGED Viewed

Binary file