npm - whisper.rn - Versions diffs - 0.5.1 → 0.5.3 - Mend

whisper.rn 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/cpp/jsi/RNWhisperJSI.cpp CHANGED Viewed

@@ -269,11 +269,13 @@ struct CallbackInfo {
     std::shared_ptr<Function> onProgressCallback;
     std::shared_ptr<Function> onNewSegmentsCallback;
     int jobId;
+    int nProcessors;
 };
 CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
     CallbackInfo info;
     info.jobId = rand(); // Default fallback jobId
+    info.nProcessors = 1; // Default to 1 processor
     try {
         auto propNames = optionsObj.getPropertyNames(runtime);
@@ -288,6 +290,8 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
                 info.onNewSegmentsCallback = std::make_shared<Function>(propValue.getObject(runtime).getFunction(runtime));
             } else if (propName == "jobId" && propValue.isNumber()) {
                 info.jobId = (int)propValue.getNumber();
+            } else if (propName == "nProcessors" && propValue.isNumber()) {
+                info.nProcessors = (int)propValue.getNumber();
             }
         }
     } catch (...) {
@@ -551,12 +555,13 @@ void installJSIBindings(
                                 code = -2;
                             } else {
                                 try {
-                                    code = whisper_full(context, job->params, audioResult.data.data(), audioResult.count);
+                                    job->n_processors = callbackInfo.nProcessors;
+                                    code = whisper_full_parallel(context, job->params, audioResult.data.data(), audioResult.count, job->n_processors);
                                     if (job->is_aborted()) {
                                         code = -999;
                                     }
                                 } catch (...) {
-                                    logError("Exception during whisper_full transcription");
+                                    logError("Exception during whisper_full_parallel transcription");
                                     code = -3;
                                 }
                                 rnwhisper::job_remove(callbackInfo.jobId);

package/cpp/rn-whisper.h CHANGED Viewed

@@ -24,6 +24,7 @@ struct job {
     int job_id;
     bool aborted = false;
     whisper_full_params params;
+    int n_processors = 1;
     ~job();
     bool is_aborted();

package/cpp/whisper.cpp CHANGED Viewed

@@ -1296,7 +1296,11 @@ static wsp_ggml_backend_t whisper_backend_init_gpu(const whisper_context_params
     if (params.use_gpu) {
         for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
             wsp_ggml_backend_dev_t dev_cur = wsp_ggml_backend_dev_get(i);
-            if (wsp_ggml_backend_dev_type(dev_cur) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
+            enum wsp_ggml_backend_dev_type dev_type = wsp_ggml_backend_dev_type(dev_cur);
+            const char * dev_name = wsp_ggml_backend_dev_name(dev_cur);
+            WHISPER_LOG_INFO("%s: device %zu: %s (type: %d)\n", __func__, i, dev_name, dev_type);
+            if (dev_type == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || dev_type == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
+                WHISPER_LOG_INFO("%s: found GPU device %zu: %s (type: %d, cnt: %d)\n", __func__, i, dev_name, dev_type, cnt);
                 if (cnt == params.gpu_device) {
                     dev = dev_cur;
                 }
@@ -1365,7 +1369,7 @@ static buft_list_t make_buft_list(whisper_context_params & params) {
         int cnt = 0;
         for (size_t i = 0; i < wsp_ggml_backend_dev_count(); ++i) {
             wsp_ggml_backend_dev_t dev = wsp_ggml_backend_dev_get(i);
-            if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU) {
+            if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU || wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU) {
                 if (cnt == params.gpu_device) {
                     auto * buft = wsp_ggml_backend_dev_buffer_type(dev);
                     if (buft) {
@@ -1403,6 +1407,7 @@ static bool weight_buft_supported(const whisper_hparams & hparams, wsp_ggml_tens
     bool op_supported = true;
     if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
+        wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
         (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
         // GPU and default CPU backend support all operators
         op_supported = true;
@@ -4459,6 +4464,7 @@ static bool weight_buft_supported(const whisper_vad_hparams & hparams, wsp_ggml_
     bool op_supported = true;
     if (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_GPU ||
+        wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_IGPU ||
         (wsp_ggml_backend_dev_type(dev) == WSP_GGML_BACKEND_DEVICE_TYPE_CPU && buft == wsp_ggml_backend_cpu_buffer_type())) {
         // GPU and default CPU backend support all operators
         op_supported = true;

package/ios/RNWhisperContext.mm CHANGED Viewed

@@ -168,6 +168,7 @@ static void* retained_log_block = nullptr;
     self->recordState.sliceNSamples.push_back(0);
     self->recordState.job = rnwhisper::job_new(jobId, [self createParams:options jobId:jobId]);
+    self->recordState.job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
     self->recordState.job->set_realtime_params(
         {
             .use_vad = options[@"useVad"] != nil ? [options[@"useVad"] boolValue] : false,
@@ -471,6 +472,7 @@ struct rnwhisper_segments_callback_data {
         }
         rnwhisper::job* job = rnwhisper::job_new(jobId, params);
+        job->n_processors = options[@"nProcessors"] != nil ? [options[@"nProcessors"] intValue] : 1;
         self->recordState.job = job;
         int code = [self fullTranscribe:job audioData:audioData audioDataCount:audioDataCount];
         rnwhisper::job_remove(jobId);
@@ -572,7 +574,7 @@ struct rnwhisper_segments_callback_data {
   audioDataCount:(int)audioDataCount
 {
     whisper_reset_timings(self->ctx);
-    int code = whisper_full(self->ctx, job->params, audioData, audioDataCount);
+    int code = whisper_full_parallel(self->ctx, job->params, audioData, audioDataCount, job->n_processors);
     if (job && job->is_aborted()) code = -999;
     // if (code == 0) {
     //     whisper_print_timings(self->ctx);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h CHANGED Viewed

@@ -209,9 +209,6 @@ extern "C" {
         void * context;
     };
-    // Internal backend registry API
-    WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
     // Add backend dynamic loading support to the backend
     // Initialize the backend

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h CHANGED Viewed

@@ -215,6 +215,8 @@ extern "C" {
     // Backend registry
     //
+    WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
     WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
     // Backend (reg) enumeration

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h CHANGED Viewed

@@ -102,6 +102,9 @@ static bool wsp_ggml_op_is_empty(enum wsp_ggml_op op) {
     }
 }
+static inline float wsp_ggml_compute_softplus_f32(float input) {
+    return (input > 20.0f) ? input : logf(1 + expf(input));
+}
 //
 // logging
 //
@@ -562,14 +565,23 @@ static inline wsp_ggml_bf16_t wsp_ggml_compute_fp32_to_bf16(float s) {
 #define WSP_GGML_FP32_TO_BF16(x) wsp_ggml_compute_fp32_to_bf16(x)
 #define WSP_GGML_BF16_TO_FP32(x) wsp_ggml_compute_bf16_to_fp32(x)
+static inline int32_t wsp_ggml_node_get_use_count(const struct wsp_ggml_cgraph * cgraph, int node_idx) {
+    const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
+    size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
+    if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
+        return 0;
+    }
+    return cgraph->use_counts[hash_pos];
+}
 // return true if the node's results are only used by N other nodes
 // and can be fused into their calculations.
 static inline bool wsp_ggml_node_has_n_uses(const struct wsp_ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
     const struct wsp_ggml_tensor * node = cgraph->nodes[node_idx];
     // check the use count against how many we're replacing
-    size_t hash_pos = wsp_ggml_hash_find(&cgraph->visited_hash_set, node);
-    if (!wsp_ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) {
+    if (wsp_ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
         return false;
     }
@@ -635,11 +647,42 @@ static inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int
     return wsp_ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
 }
+WSP_GGML_API bool wsp_ggml_can_fuse_subgraph_ext(const struct wsp_ggml_cgraph * cgraph,
+                                         const int *                node_idxs,
+                                         int                        count,
+                                         const enum wsp_ggml_op *       ops,
+                                         const int *                outputs,
+                                         int                        num_outputs);
+// Returns true if the subgraph formed by {node_idxs} can be fused
+// checks whethers all nodes which are not part of outputs can be elided
+// by checking if their num_uses are confined to the subgraph
+static inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph * cgraph,
+                                          int                        node_idx,
+                                          int                        count,
+                                          const enum wsp_ggml_op *       ops,
+                                          const int *                outputs,
+                                          int                        num_outputs) {
+    WSP_GGML_ASSERT(count < 32);
+    if (node_idx + count > cgraph->n_nodes) {
+        return false;
+    }
+    int idxs[32];
+    for (int i = 0; i < count; ++i) {
+        idxs[i] = node_idx + i;
+    }
+    return wsp_ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
+}
 #ifdef __cplusplus
 }
 #endif
 #ifdef __cplusplus
+#include <array>
 #include <initializer_list>
 #include <vector>
@@ -648,6 +691,28 @@ inline bool wsp_ggml_can_fuse(const struct wsp_ggml_cgraph * cgraph, int node_id
     return wsp_ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
 }
+inline bool wsp_ggml_can_fuse_subgraph(const struct wsp_ggml_cgraph *          cgraph,
+                                   int                                 start_idx,
+                                   std::initializer_list<enum wsp_ggml_op> ops,
+                                   std::initializer_list<int>          outputs = {}) {
+    return wsp_ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
+}
+// Return true if the edges in the graph match expectations.
+inline bool wsp_ggml_check_edges(const struct wsp_ggml_cgraph *                cgraph,
+                             int                                       start_idx,
+                             std::initializer_list<std::array<int, 3>> edges) {
+    for (const auto & edge : edges) {
+        int dst_node = edge[0];
+        int src_idx  = edge[1];
+        int src_node = edge[2];
+        if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) {
+            return false;
+        }
+    }
+    return true;
+}
 // expose GGUF internals for test code
 WSP_GGML_API size_t wsp_gguf_type_size(enum wsp_gguf_type type);
 WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file_impl(FILE * file, struct wsp_gguf_init_params params);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h CHANGED Viewed

@@ -237,9 +237,12 @@
 #define WSP_GGML_EXIT_SUCCESS 0
 #define WSP_GGML_EXIT_ABORTED 1
+// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
+#define WSP_GGML_ROPE_TYPE_NORMAL 0
 #define WSP_GGML_ROPE_TYPE_NEOX   2
 #define WSP_GGML_ROPE_TYPE_MROPE  8
 #define WSP_GGML_ROPE_TYPE_VISION 24
+#define WSP_GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
 #define WSP_GGML_MROPE_SECTIONS   4
@@ -472,6 +475,7 @@ extern "C" {
         WSP_GGML_OP_COS,
         WSP_GGML_OP_SUM,
         WSP_GGML_OP_SUM_ROWS,
+        WSP_GGML_OP_CUMSUM,
         WSP_GGML_OP_MEAN,
         WSP_GGML_OP_ARGMAX,
         WSP_GGML_OP_COUNT_EQUAL,
@@ -527,6 +531,8 @@ extern "C" {
         WSP_GGML_OP_TIMESTEP_EMBEDDING,
         WSP_GGML_OP_ARGSORT,
         WSP_GGML_OP_LEAKY_RELU,
+        WSP_GGML_OP_TRI,
+        WSP_GGML_OP_FILL,
         WSP_GGML_OP_FLASH_ATTN_EXT,
         WSP_GGML_OP_FLASH_ATTN_BACK,
@@ -539,6 +545,7 @@ extern "C" {
         WSP_GGML_OP_RWKV_WKV6,
         WSP_GGML_OP_GATED_LINEAR_ATTN,
         WSP_GGML_OP_RWKV_WKV7,
+        WSP_GGML_OP_SOLVE_TRI,
         WSP_GGML_OP_UNARY,
@@ -573,7 +580,14 @@ extern "C" {
         WSP_GGML_UNARY_OP_HARDSWISH,
         WSP_GGML_UNARY_OP_HARDSIGMOID,
         WSP_GGML_UNARY_OP_EXP,
+        WSP_GGML_UNARY_OP_EXPM1,
+        WSP_GGML_UNARY_OP_SOFTPLUS,
         WSP_GGML_UNARY_OP_GELU_ERF,
+        WSP_GGML_UNARY_OP_XIELU,
+        WSP_GGML_UNARY_OP_FLOOR,
+        WSP_GGML_UNARY_OP_CEIL,
+        WSP_GGML_UNARY_OP_ROUND,
+        WSP_GGML_UNARY_OP_TRUNC,
         WSP_GGML_UNARY_OP_COUNT,
     };
@@ -612,6 +626,13 @@ extern "C" {
         WSP_GGML_TENSOR_FLAG_LOSS   =  8, // ...defines loss for numerical optimization (multiple loss tensors add up)
     };
+    enum wsp_ggml_tri_type {
+        WSP_GGML_TRI_TYPE_UPPER_DIAG = 0,
+        WSP_GGML_TRI_TYPE_UPPER      = 1,
+        WSP_GGML_TRI_TYPE_LOWER_DIAG = 2,
+        WSP_GGML_TRI_TYPE_LOWER      = 3
+    };
     struct wsp_ggml_init_params {
         // memory pool
         size_t mem_size;   // bytes
@@ -949,6 +970,22 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_expm1_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_softplus_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_sin(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
@@ -975,6 +1012,10 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_cumsum(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a);
     // mean along rows
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_mean(
             struct wsp_ggml_context * ctx,
@@ -1148,6 +1189,58 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_floor_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_ceil_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_round_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+     /**
+     * Truncates the fractional part of each element in the tensor (towards zero).
+     * For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
+     * Similar to std::trunc in C/C++.
+     */
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_trunc_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    // xIELU activation function
+    // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
+    // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
+    // that constrain the positive and negative source alpha values respectively
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_xielu(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float alpha_n,
+            float alpha_p,
+            float beta,
+            float eps);
     // gated linear unit ops
     // A: n columns, r rows,
     // result is n / 2 columns, r rows,
@@ -1615,6 +1708,13 @@ extern "C" {
             float                 scale,
             float                 max_bias);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * mask,
+            float                 scale,
+            float                 max_bias);
     WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
             struct wsp_ggml_tensor * a,
             struct wsp_ggml_tensor * sinks);
@@ -2041,6 +2141,7 @@ extern "C" {
     enum wsp_ggml_scale_mode {
         WSP_GGML_SCALE_MODE_NEAREST  = 0,
         WSP_GGML_SCALE_MODE_BILINEAR = 1,
+        WSP_GGML_SCALE_MODE_BICUBIC  = 2,
         WSP_GGML_SCALE_MODE_COUNT
     };
@@ -2119,6 +2220,23 @@ extern "C" {
             int                   shift2,
             int                   shift3);
+    // Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
+    // zeroes everywhere outside the masked area
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_tri(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            enum wsp_ggml_tri_type    type);
+    // Fill tensor a with constant c
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float                 c);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_fill_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            float                 c);
     // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
     // timesteps: [N,]
@@ -2288,6 +2406,27 @@ extern "C" {
             struct wsp_ggml_tensor  * b,
             struct wsp_ggml_tensor  * state);
+    /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
+    *  without zeroes on the diagonal (i.e. invertible).
+    *  B can have any number of columns, but must have the same number of rows as A
+    *  If A is [n, n] and B is [n, m], then the result will be [n, m] as well
+    *  Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
+    *  where n > 100 sparingly, pre-chunk if necessary.
+    *
+    *  If left = false, solves xA=B instead
+    *  If lower = false, assumes upper triangular instead
+    *  If uni = true, assumes diagonal of A to be all ones (will override actual values)
+    *
+    *  TODO: currently only lower, right, non-unitriangular variant is implemented
+    */
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_solve_tri(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        struct wsp_ggml_tensor  * b,
+        bool                  left,
+        bool                  lower,
+        bool                  uni);
     // custom operators
     typedef void (*wsp_ggml_custom1_op_t)(struct wsp_ggml_tensor * dst , const struct wsp_ggml_tensor * a, int ith, int nth, void * userdata);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h CHANGED Viewed

@@ -24,6 +24,7 @@ struct job {
     int job_id;
     bool aborted = false;
     whisper_full_params params;
+    int n_processors = 1;
     ~job();
     bool is_aborted();

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist CHANGED Viewed

Binary file