npm - whisper.rn - Versions diffs - 0.5.3 → 0.5.5 - Mend

whisper.rn 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/cpp/ggml.c CHANGED Viewed

@@ -61,13 +61,15 @@
 #define UNUSED WSP_GGML_UNUSED
+// Needed for wsp_ggml_fp32_to_bf16_row()
+#if defined(__AVX512BF16__)
 #if defined(_MSC_VER)
-#define m512bh(p) p
 #define m512i(p) p
 #else
-#define m512bh(p) (__m512bh)(p)
+#include <immintrin.h>
 #define m512i(p) (__m512i)(p)
-#endif
+#endif // defined(_MSC_VER)
+#endif // defined(__AVX512BF16__)
 #if defined(__linux__) || \
     defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
@@ -132,6 +134,13 @@ static void wsp_ggml_print_backtrace_symbols(void) {
     int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
     backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
 }
+#elif defined(__APPLE__)
+#include <execinfo.h>
+static void wsp_ggml_print_backtrace_symbols(void) {
+    void * trace[100];
+    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
+    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
+}
 #else
 static void wsp_ggml_print_backtrace_symbols(void) {
     // platform not supported
@@ -143,6 +152,20 @@ void wsp_ggml_print_backtrace(void) {
     if (WSP_GGML_NO_BACKTRACE) {
         return;
     }
+#if defined(__APPLE__)
+    // On macOS, fork+debugger attachment is problematic due to:
+    // 1. libdispatch "poisons" forked child processes
+    // 2. lldb has issues attaching to parent from forked child
+    // Use simple backtrace() instead to avoid Terminal.app crashes
+    const char * WSP_GGML_BACKTRACE_LLDB = getenv("WSP_GGML_BACKTRACE_LLDB");
+    if (!WSP_GGML_BACKTRACE_LLDB) {
+        fprintf(stderr, "WARNING: Using native backtrace. Set WSP_GGML_BACKTRACE_LLDB for more info.\n");
+        fprintf(stderr, "WARNING: WSP_GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
+        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
+        wsp_ggml_print_backtrace_symbols();
+        return;
+    }
+#endif
 #if defined(__linux__)
     FILE * f = fopen("/proc/self/status", "r");
     size_t size = 0;
@@ -998,6 +1021,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "ARANGE",
     "TIMESTEP_EMBEDDING",
     "ARGSORT",
+    "TOP_K",
     "LEAKY_RELU",
     "TRI",
     "FILL",
@@ -1031,7 +1055,7 @@ static const char * WSP_GGML_OP_NAME[WSP_GGML_OP_COUNT] = {
     "GLU",
 };
-static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
+static_assert(WSP_GGML_OP_COUNT == 95, "WSP_GGML_OP_COUNT != 95");
 static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "none",
@@ -1106,6 +1130,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "arange(start, stop, step)",
     "timestep_embedding(timesteps, dim, max_period)",
     "argsort(x)",
+    "top_k(x)",
     "leaky_relu(x)",
     "tri(x)",
     "fill(x, c)",
@@ -1139,7 +1164,7 @@ static const char * WSP_GGML_OP_SYMBOL[WSP_GGML_OP_COUNT] = {
     "glu(x)",
 };
-static_assert(WSP_GGML_OP_COUNT == 94, "WSP_GGML_OP_COUNT != 94");
+static_assert(WSP_GGML_OP_COUNT == 95, "WSP_GGML_OP_COUNT != 95");
 static_assert(WSP_GGML_OP_POOL_COUNT == 2, "WSP_GGML_OP_POOL_COUNT != 2");
@@ -4897,6 +4922,8 @@ static struct wsp_ggml_tensor * wsp_ggml_interpolate_impl(
         int64_t               ne3,
         uint32_t              mode) {
     WSP_GGML_ASSERT((mode & 0xFF) < WSP_GGML_SCALE_MODE_COUNT);
+    // TODO: implement antialias for modes other than bilinear
+    WSP_GGML_ASSERT(!(mode & WSP_GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == WSP_GGML_SCALE_MODE_BILINEAR);
     struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
@@ -4951,6 +4978,18 @@ struct wsp_ggml_tensor * wsp_ggml_pad(
     return wsp_ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
 }
+// wsp_ggml_pad_circular
+struct wsp_ggml_tensor * wsp_ggml_pad_circular(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        int                   p0,
+        int                   p1,
+        int                   p2,
+        int                   p3) {
+    return wsp_ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
+}
 struct wsp_ggml_tensor * wsp_ggml_pad_ext(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a,
@@ -4977,6 +5016,7 @@ struct wsp_ggml_tensor * wsp_ggml_pad_ext(
     wsp_ggml_set_op_params_i32(result, 5, rp2);
     wsp_ggml_set_op_params_i32(result, 6, lp3);
     wsp_ggml_set_op_params_i32(result, 7, rp3);
+    wsp_ggml_set_op_params_i32(result, 8, 0); // not circular by default
     result->op     = WSP_GGML_OP_PAD;
@@ -4985,6 +5025,25 @@ struct wsp_ggml_tensor * wsp_ggml_pad_ext(
     return result;
 }
+// wsp_ggml_pad_ext_circular
+struct wsp_ggml_tensor * wsp_ggml_pad_ext_circular(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        int                  lp0,
+        int                  rp0,
+        int                  lp1,
+        int                  rp1,
+        int                  lp2,
+        int                  rp2,
+        int                  lp3,
+        int                  rp3
+        ) {
+    struct wsp_ggml_tensor * result = wsp_ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
+    wsp_ggml_set_op_params_i32(result, 8, 1); // circular
+    return result;
+}
 // wsp_ggml_pad_reflect_1d
 struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
@@ -5044,28 +5103,6 @@ struct wsp_ggml_tensor * wsp_ggml_roll(
     return result;
 }
-// wsp_ggml_arange
-struct wsp_ggml_tensor * wsp_ggml_arange(
-        struct wsp_ggml_context * ctx,
-        float                 start,
-        float                 stop,
-        float                 step) {
-    WSP_GGML_ASSERT(stop > start);
-    const int64_t steps = (int64_t) ceilf((stop - start) / step);
-    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, steps);
-    wsp_ggml_set_op_params_f32(result, 0, start);
-    wsp_ggml_set_op_params_f32(result, 1, stop);
-    wsp_ggml_set_op_params_f32(result, 2, step);
-    result->op = WSP_GGML_OP_ARANGE;
-    return result;
-}
 // wsp_ggml_timestep_embedding
 struct wsp_ggml_tensor * wsp_ggml_timestep_embedding(
@@ -5147,6 +5184,7 @@ struct wsp_ggml_tensor * wsp_ggml_argsort(
         struct wsp_ggml_tensor   * a,
         enum wsp_ggml_sort_order   order) {
     WSP_GGML_ASSERT(a->ne[0] <= INT32_MAX);
     struct wsp_ggml_tensor * result = wsp_ggml_new_tensor(ctx, WSP_GGML_TYPE_I32, WSP_GGML_MAX_DIMS, a->ne);
     wsp_ggml_set_op_params_i32(result, 0, (int32_t) order);
@@ -5157,9 +5195,9 @@ struct wsp_ggml_tensor * wsp_ggml_argsort(
     return result;
 }
-// wsp_ggml_top_k
+// wsp_ggml_argsort_top_k
-struct wsp_ggml_tensor * wsp_ggml_top_k(
+struct wsp_ggml_tensor * wsp_ggml_argsort_top_k(
         struct wsp_ggml_context * ctx,
         struct wsp_ggml_tensor  * a,
         int                   k) {
@@ -5175,6 +5213,44 @@ struct wsp_ggml_tensor * wsp_ggml_top_k(
     return result;
 }
+// wsp_ggml_top_k
+struct wsp_ggml_tensor * wsp_ggml_top_k(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        int                   k) {
+    WSP_GGML_ASSERT(a->ne[0] >= k);
+    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_4d(ctx, WSP_GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
+    result->op     = WSP_GGML_OP_TOP_K;
+    result->src[0] = a;
+    return result;
+}
+// wsp_ggml_arange
+struct wsp_ggml_tensor * wsp_ggml_arange(
+        struct wsp_ggml_context * ctx,
+        float                 start,
+        float                 stop,
+        float                 step) {
+    WSP_GGML_ASSERT(stop > start);
+    const int64_t steps = (int64_t) ceilf((stop - start) / step);
+    struct wsp_ggml_tensor * result = wsp_ggml_new_tensor_1d(ctx, WSP_GGML_TYPE_F32, steps);
+    wsp_ggml_set_op_params_f32(result, 0, start);
+    wsp_ggml_set_op_params_f32(result, 1, stop);
+    wsp_ggml_set_op_params_f32(result, 2, step);
+    result->op = WSP_GGML_OP_ARANGE;
+    return result;
+}
 // wsp_ggml_flash_attn_ext
 struct wsp_ggml_tensor * wsp_ggml_flash_attn_ext(
@@ -5194,8 +5270,6 @@ struct wsp_ggml_tensor * wsp_ggml_flash_attn_ext(
     if (mask) {
         WSP_GGML_ASSERT(wsp_ggml_is_contiguous(mask));
-        WSP_GGML_ASSERT(mask->ne[1] >= WSP_GGML_PAD(q->ne[1], WSP_GGML_KQ_MASK_PAD) &&
-                "the Flash-Attention kernel requires the mask to be padded to WSP_GGML_KQ_MASK_PAD and at least n_queries big");
         //WSP_GGML_ASSERT(wsp_ggml_can_repeat_rows(mask, qk));
         WSP_GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
@@ -7502,6 +7576,11 @@ size_t wsp_ggml_wsp_quantize_chunk(
 ////////////////////////////////////////////////////////////////////////////////
+void wsp_ggml_log_get(wsp_ggml_log_callback * log_callback, void ** user_data) {
+    *log_callback = g_logger_state.log_callback;
+    *user_data    = g_logger_state.log_callback_user_data;
+}
 void wsp_ggml_log_set(wsp_ggml_log_callback log_callback, void * user_data) {
     g_logger_state.log_callback = log_callback ? log_callback : wsp_ggml_log_callback_default;
     g_logger_state.log_callback_user_data = user_data;

package/cpp/ggml.h CHANGED Viewed

@@ -204,6 +204,10 @@
 #    define WSP_GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
 #endif
+#if defined(_WIN32) && !defined(_WIN32_WINNT)
+#    define _WIN32_WINNT 0x0A00
+#endif
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -230,6 +234,11 @@
 #if UINTPTR_MAX == 0xFFFFFFFF
     #define WSP_GGML_MEM_ALIGN 4
+#elif defined(__EMSCRIPTEN__)
+// emscripten uses max_align_t == 8, so we need WSP_GGML_MEM_ALIGN == 8 for 64-bit wasm.
+// (for 32-bit wasm, the first conditional is true and WSP_GGML_MEM_ALIGN stays 4.)
+// ref: https://github.com/ggml-org/llama.cpp/pull/18628
+    #define WSP_GGML_MEM_ALIGN 8
 #else
     #define WSP_GGML_MEM_ALIGN 16
 #endif
@@ -530,6 +539,7 @@ extern "C" {
         WSP_GGML_OP_ARANGE,
         WSP_GGML_OP_TIMESTEP_EMBEDDING,
         WSP_GGML_OP_ARGSORT,
+        WSP_GGML_OP_TOP_K,
         WSP_GGML_OP_LEAKY_RELU,
         WSP_GGML_OP_TRI,
         WSP_GGML_OP_FILL,
@@ -2147,7 +2157,8 @@ extern "C" {
     };
     enum wsp_ggml_scale_flag {
-        WSP_GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
+        WSP_GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8),
+        WSP_GGML_SCALE_FLAG_ANTIALIAS     = (1 << 9),
     };
     // interpolate
@@ -2190,6 +2201,15 @@ extern "C" {
             int                  p2,
             int                  p3);
+    // pad each dimension with values on the other side of the torus (looping around)
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_circular(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            int                   p0,
+            int                   p1,
+            int                   p2,
+            int                   p3);
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_ext(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a,
@@ -2203,6 +2223,19 @@ extern "C" {
             int                  rp3
             );
+    // pad each dimension with values on the other side of the torus (looping around)
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_ext_circular(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            int                   lp0,
+            int                   rp0,
+            int                   lp1,
+            int                   rp1,
+            int                   lp2,
+            int                   rp2,
+            int                   lp3,
+            int                   rp3);
     // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
             struct wsp_ggml_context * ctx,
@@ -2258,25 +2291,30 @@ extern "C" {
             struct wsp_ggml_tensor  * a,
             enum wsp_ggml_sort_order  order);
-    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_arange(
+    // similar to wsp_ggml_top_k but implemented as `argsort` + `view`
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_argsort_top_k(
             struct wsp_ggml_context * ctx,
-            float                 start,
-            float                 stop,
-            float                 step);
+            struct wsp_ggml_tensor  * a,
+            int                   k);
     // top k elements per row
+    // note: the resulting top k indices are in no particular order
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_top_k(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a,
             int                   k);
-#define WSP_GGML_KQ_MASK_PAD 64
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_arange(
+            struct wsp_ggml_context * ctx,
+            float                 start,
+            float                 stop,
+            float                 step);
-    // q:    [n_embd_k, n_batch,     n_head,    ne3 ]
-    // k:    [n_embd_k, n_kv,        n_head_kv, ne3 ]
-    // v:    [n_embd_v, n_kv,        n_head_kv, ne3 ] !! not transposed !!
-    // mask: [n_kv,     n_batch_pad, ne32,      ne33] !! n_batch_pad = WSP_GGML_PAD(n_batch, WSP_GGML_KQ_MASK_PAD) !!
-    // res:  [n_embd_v, n_head,      n_batch,   ne3 ] !! permuted !!
+    // q:    [n_embd_k, n_batch, n_head,    ne3 ]
+    // k:    [n_embd_k, n_kv,    n_head_kv, ne3 ]
+    // v:    [n_embd_v, n_kv,    n_head_kv, ne3 ] !! not transposed !!
+    // mask: [n_kv,     n_batch, ne32,      ne33]
+    // res:  [n_embd_v, n_head,  n_batch,   ne3 ] !! permuted !!
     //
     // broadcast:
     //   n_head % n_head_kv == 0
@@ -2582,7 +2620,8 @@ extern "C" {
     // Set callback for all future logging events.
     // If this is not called, or NULL is supplied, everything is output on stderr.
-    WSP_GGML_API void wsp_ggml_log_set(wsp_ggml_log_callback log_callback, void * user_data);
+    WSP_GGML_API void wsp_ggml_log_get(wsp_ggml_log_callback * log_callback, void ** user_data);
+    WSP_GGML_API void wsp_ggml_log_set(wsp_ggml_log_callback   log_callback, void *  user_data);
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set_zero(struct wsp_ggml_tensor * tensor);

package/cpp/jsi/RNWhisperJSI.cpp CHANGED Viewed

@@ -574,6 +574,7 @@ void installJSIBindings(
                                     if (code == 0) {
                                         auto resultObj = Object(runtime);
                                         resultObj.setProperty(runtime, "code", Value(code));
+                                        resultObj.setProperty(runtime, "language", String::createFromUtf8(runtime, whisper_lang_str(whisper_full_lang_id(context))));
                                         resultObj.setProperty(runtime, "result", String::createFromUtf8(runtime, createFullTextFromSegments(context, 0)));
                                         resultObj.setProperty(runtime, "segments", createSegmentsArray(runtime, context, 0));
                                         resolvePtr->call(runtime, resultObj);

package/cpp/whisper.cpp CHANGED Viewed

@@ -2505,7 +2505,7 @@ static struct wsp_ggml_cgraph * whisper_build_graph_decoder(
     const float KQscale = pow(float(n_state_head), -0.25);
-    struct wsp_ggml_tensor * KQ_mask = wsp_ggml_new_tensor_3d(ctx0, WSP_GGML_TYPE_F32, n_kv, WSP_GGML_PAD(n_tokens, WSP_GGML_KQ_MASK_PAD), 1);
+    struct wsp_ggml_tensor * KQ_mask = wsp_ggml_new_tensor_3d(ctx0, WSP_GGML_TYPE_F32, n_kv, n_tokens, 1);
     wsp_ggml_set_name(KQ_mask, "KQ_mask");
     wsp_ggml_set_input(KQ_mask);
@@ -2929,7 +2929,7 @@ static bool whisper_decode_internal(
                     }
                 }
-                for (int i = n_tokens; i < WSP_GGML_PAD(n_tokens, WSP_GGML_KQ_MASK_PAD); ++i) {
+                for (int i = n_tokens; i < n_tokens; ++i) {
                     for (int j = 0; j < n_kv; ++j) {
                         data[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY;
                     }
@@ -6030,6 +6030,19 @@ static inline bool should_split_on_word(const char * txt, bool split_on_word) {
     return txt[0] == ' ';
 }
+// Count UTF-8 characters (not bytes) in a string
+static int utf8_len(const char * str) {
+    int count = 0;
+    while (*str) {
+        // Skip continuation bytes (10xxxxxx)
+        if ((*str & 0xC0) != 0x80) {
+            count++;
+        }
+        str++;
+    }
+    return count;
+}
 static void whisper_exp_compute_token_level_timestamps_dtw(
             struct whisper_context * ctx,
               struct whisper_state * state,
@@ -6058,7 +6071,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
         }
         const auto txt = whisper_token_to_str(&ctx, token.id);
-        const int cur = strlen(txt);
+        const int cur = utf8_len(txt);  // Use UTF-8 character count instead of byte count
         if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
             state.result_all.back().text = std::move(text);
@@ -6697,7 +6710,7 @@ static bool whisper_vad(
             }
             segment_start_samples = std::min(segment_start_samples, n_samples - 1);
-            segment_end_samples = std::min(segment_end_samples, n_samples);
+            segment_end_samples = std::min(segment_end_samples, n_samples - 1);
             int segment_length = segment_end_samples - segment_start_samples;
             if (segment_length > 0) {
                 whisper_state::vad_segment_info segment;

package/ios/CMakeLists.txt CHANGED Viewed

@@ -16,6 +16,8 @@ add_definitions(
     -DWSP_GGML_USE_ACCELERATE
     -DWSP_GGML_USE_METAL
     -DWSP_GGML_METAL_USE_BF16
+    -DWHISPER_USE_COREML
+    -DWHISPER_COREML_ALLOW_FALLBACK
 )
 if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64;x86_64")
@@ -31,6 +33,14 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
     )
 endif ()
+# CoreML source files
+set(SOURCE_FILES_COREML
+    ${SOURCE_DIR}/coreml/whisper-encoder.mm
+    ${SOURCE_DIR}/coreml/whisper-encoder-impl.m
+    ${SOURCE_DIR}/coreml/whisper-decoder-impl.m
+    ${SOURCE_DIR}/coreml/whisper-compat.m
+)
 # Define public headers
 set(PUBLIC_HEADERS
     ${SOURCE_DIR}/rn-whisper.h
@@ -69,6 +79,7 @@ add_library(rnwhisper SHARED
     ${SOURCE_DIR}/rn-whisper.cpp
     ${SOURCE_DIR}/rn-audioutils.cpp
     ${SOURCE_FILES_ARCH}
+    ${SOURCE_FILES_COREML}
 )
 # Setup include directories
@@ -76,6 +87,7 @@ target_include_directories(rnwhisper
     PUBLIC
         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp>
         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/ggml-cpu>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../cpp/coreml>
         $<INSTALL_INTERFACE:include>
 )
@@ -85,6 +97,7 @@ target_link_libraries(rnwhisper PRIVATE
     "-framework Foundation"
     "-framework Metal"
     "-framework MetalKit"
+    "-framework CoreML"
 )
 # Set properties for framework
@@ -96,5 +109,12 @@ set_target_properties(rnwhisper PROPERTIES
     FRAMEWORK_VERSION 1.0.0
     VERSION 1.0.0
     PUBLIC_HEADER "${PUBLIC_HEADERS}"
-    XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC NO
+    XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES
+)
+# Disable ARC for Metal Objective-C files (they don't support ARC)
+set_source_files_properties(
+    ${SOURCE_DIR}/ggml-metal/ggml-metal-context.m
+    ${SOURCE_DIR}/ggml-metal/ggml-metal-device.m
+    PROPERTIES COMPILE_FLAGS "-fno-objc-arc"
 )

package/ios/RNWhisperContext.mm CHANGED Viewed

@@ -612,6 +612,11 @@ struct rnwhisper_segments_callback_data {
     NSMutableDictionary *result = [[NSMutableDictionary alloc] init];
     result[@"result"] = text;
     result[@"segments"] = segments;
+    int lang_id = whisper_full_lang_id(self->ctx);
+    const char *lang_str = whisper_lang_str(lang_id);
+    if (lang_str != nullptr) {
+        result[@"language"] = [NSString stringWithUTF8String:lang_str];
+    }
     return result;
 }

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h CHANGED Viewed

@@ -53,7 +53,14 @@ WSP_GGML_API void           wsp_ggml_gallocr_free(wsp_ggml_gallocr_t galloc);
 // call with a worst-case graph to avoid buffer reallocations
 // not strictly required for single buffer usage: wsp_ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
 // returns false if the buffer allocation failed
+// wsp_ggml_gallocr_resrve_n_size writes the buffer sizes per galloc buffer that would be allocated by wsp_ggml_gallocr_reserve_n to sizes
 WSP_GGML_API bool wsp_ggml_gallocr_reserve(wsp_ggml_gallocr_t galloc, struct wsp_ggml_cgraph * graph);
+WSP_GGML_API void wsp_ggml_gallocr_reserve_n_size(
+    wsp_ggml_gallocr_t galloc,
+    struct wsp_ggml_cgraph * graph,
+    const int * node_buffer_ids,
+    const int * leaf_buffer_ids,
+    size_t * sizes);
 WSP_GGML_API bool wsp_ggml_gallocr_reserve_n(
     wsp_ggml_gallocr_t galloc,
     struct wsp_ggml_cgraph * graph,
@@ -68,6 +75,8 @@ WSP_GGML_API size_t wsp_ggml_gallocr_get_buffer_size(wsp_ggml_gallocr_t galloc,
 // Utils
 // Create a buffer and allocate all the tensors in a wsp_ggml_context
+// wsp_ggml_backend_alloc_ctx_tensors_from_buft_size returns the size of the buffer that would be allocated by wsp_ggml_backend_alloc_ctx_tensors_from_buft
+WSP_GGML_API size_t                       wsp_ggml_backend_alloc_ctx_tensors_from_buft_size(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
 WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors_from_buft(struct wsp_ggml_context * ctx, wsp_ggml_backend_buffer_type_t buft);
 WSP_GGML_API struct wsp_ggml_backend_buffer * wsp_ggml_backend_alloc_ctx_tensors(struct wsp_ggml_context * ctx, wsp_ggml_backend_t backend);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h CHANGED Viewed

@@ -144,7 +144,7 @@ extern "C" {
         // device description: short informative description of the device, could be the model name
         const char * (*get_description)(wsp_ggml_backend_dev_t dev);
-        // device memory in bytes
+        // device memory in bytes: 0 bytes to indicate no memory to report
         void         (*get_memory)(wsp_ggml_backend_dev_t dev, size_t * free, size_t * total);
         // device type

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h CHANGED Viewed

@@ -307,6 +307,7 @@ extern "C" {
     WSP_GGML_API void                 wsp_ggml_backend_sched_free(wsp_ggml_backend_sched_t sched);
     // Initialize backend buffers from a measure graph
+    WSP_GGML_API void                 wsp_ggml_backend_sched_reserve_size(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph, size_t * sizes);
     WSP_GGML_API bool                 wsp_ggml_backend_sched_reserve(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * measure_graph); // returns success
     WSP_GGML_API int                  wsp_ggml_backend_sched_get_n_backends(wsp_ggml_backend_sched_t sched);
@@ -357,7 +358,7 @@ extern "C" {
     typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
     // Compare the output of two backends
-    WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node);
+    WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor const * const * test_nodes, size_t num_test_nodes);
     // Tensor initialization
     WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h CHANGED Viewed

@@ -99,6 +99,7 @@ extern "C" {
     WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_sme        (void);
     // other
     WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_riscv_v    (void);
+    WSP_GGML_BACKEND_API int wsp_ggml_cpu_get_rvv_vlen   (void);  // risc-v vector length in bytes
     WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vsx        (void);
     WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vxe        (void);
     WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_wasm_simd  (void);

package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h CHANGED Viewed

@@ -24,10 +24,6 @@
 #include <arm_neon.h>
 #endif
-#if defined(__F16C__)
-#include <immintrin.h>
-#endif
 #ifdef __cplusplus
 extern "C" {
 #endif