npm - whisper.rn - Versions diffs - 0.5.0-rc.9 → 0.5.0 - Mend

whisper.rn 0.5.0-rc.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h CHANGED Viewed

@@ -23,6 +23,9 @@
 #define N_R0_Q8_0 4
 #define N_SG_Q8_0 2
+#define N_R0_MXFP4 2
+#define N_SG_MXFP4 2
 #define N_R0_Q2_K 4
 #define N_SG_Q2_K 2
@@ -126,8 +129,18 @@ typedef struct {
     uint64_t nb2;
     uint64_t nb3;
     uint64_t offs;
+    uint64_t o1[8];
 } wsp_ggml_metal_kargs_bin;
+typedef struct {
+    int64_t ne0;
+    int64_t ne1;
+    size_t nb01;
+    size_t nb02;
+    size_t nb11;
+    size_t nb21;
+} wsp_ggml_metal_kargs_add_id;
 typedef struct {
     int32_t  ne00;
     int32_t  ne01;
@@ -229,14 +242,18 @@ typedef struct {
     uint64_t nb21;
     uint64_t nb22;
     uint64_t nb23;
+    int32_t  ne32;
+    int32_t  ne33;
     uint64_t nb31;
+    uint64_t nb32;
+    uint64_t nb33;
     int32_t  ne1;
     int32_t  ne2;
     float    scale;
     float    max_bias;
     float    m0;
     float    m1;
-    uint16_t n_head_log2;
+    int32_t  n_head_log2;
     float    logit_softcap;
 } wsp_ggml_metal_kargs_flash_attn_ext;
@@ -373,8 +390,16 @@ typedef struct {
 typedef struct {
     int32_t  ne00;
     int32_t  ne00_4;
-    uint64_t nb01;
+    uint64_t nb1;
+    uint64_t nb2;
+    uint64_t nb3;
     float    eps;
+    int32_t  nef1[3];
+    int32_t  nef2[3];
+    int32_t  nef3[3];
+    uint64_t nbf1[3];
+    uint64_t nbf2[3];
+    uint64_t nbf3[3];
 } wsp_ggml_metal_kargs_rms_norm;
 typedef struct {
@@ -431,6 +456,8 @@ typedef struct{
     uint64_t nb1;
     int32_t  i00;
     int32_t  i10;
+    float    alpha;
+    float    limit;
 } wsp_ggml_metal_kargs_glu;
 typedef struct {
@@ -461,14 +488,26 @@ typedef struct {
 } wsp_ggml_metal_kargs_sum_rows;
 typedef struct {
-    int64_t  ne00;
-    int64_t  ne01;
-    int64_t  ne02;
+    int32_t  ne00;
+    int32_t  ne01;
+    int32_t  ne02;
+    uint64_t nb01;
+    uint64_t nb02;
+    uint64_t nb03;
+    int32_t  ne11;
+    int32_t  ne12;
+    int32_t  ne13;
+    uint64_t nb11;
+    uint64_t nb12;
+    uint64_t nb13;
+    uint64_t nb1;
+    uint64_t nb2;
+    uint64_t nb3;
     float    scale;
     float    max_bias;
     float    m0;
     float    m1;
-    uint32_t n_head_log2;
+    int32_t  n_head_log2;
 } wsp_ggml_metal_kargs_soft_max;
 typedef struct {
@@ -499,26 +538,26 @@ typedef struct {
 typedef struct {
     int64_t  d_state;
     int64_t  d_inner;
+    int64_t  n_head;
+    int64_t  n_group;
     int64_t  n_seq_tokens;
     int64_t  n_seqs;
-    uint64_t nb00;
+    int64_t  s_off;
     uint64_t nb01;
     uint64_t nb02;
-    uint64_t nb10;
+    uint64_t nb03;
     uint64_t nb11;
     uint64_t nb12;
     uint64_t nb13;
-    uint64_t nb20;
     uint64_t nb21;
     uint64_t nb22;
-    uint64_t nb30;
     uint64_t nb31;
-    uint64_t nb40;
     uint64_t nb41;
     uint64_t nb42;
-    uint64_t nb50;
+    uint64_t nb43;
     uint64_t nb51;
     uint64_t nb52;
+    uint64_t nb53;
 } wsp_ggml_metal_kargs_ssm_scan;
 typedef struct {

package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-opt.h CHANGED Viewed

@@ -74,16 +74,26 @@ extern "C" {
         WSP_GGML_OPT_BUILD_TYPE_OPT     = 30,
     };
+    enum wsp_ggml_opt_optimizer_type {
+        WSP_GGML_OPT_OPTIMIZER_TYPE_ADAMW,
+        WSP_GGML_OPT_OPTIMIZER_TYPE_SGD,
+        WSP_GGML_OPT_OPTIMIZER_TYPE_COUNT
+    };
     // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
     struct wsp_ggml_opt_optimizer_params {
-        // AdamW optimizer parameters
         struct {
             float alpha; // learning rate
-            float beta1;
-            float beta2;
+            float beta1; // first AdamW momentum
+            float beta2; // second AdamW momentum
             float eps;   // epsilon for numerical stability
-            float wd;    // weight decay for AdamW, use 0.0f to disable
+            float wd;    // weight decay - 0.0f to disable
         } adamw;
+        struct {
+            float alpha; // learning rate
+            float wd;    // weight decay
+        } sgd;
     };
     // callback to calculate optimizer parameters prior to a backward pass
@@ -112,8 +122,11 @@ extern "C" {
         int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
-        wsp_ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
-        void * get_opt_pars_ud;                     // userdata for calculating optimizer parameters
+        wsp_ggml_opt_get_optimizer_params get_opt_pars;    // callback for calculating optimizer parameters
+        void *                        get_opt_pars_ud; // userdata for calculating optimizer parameters
+        // only WSP_GGML_OPT_OPTIMIZER_TYPE_ADAMW needs m, v momenta per parameter tensor
+        enum wsp_ggml_opt_optimizer_type optimizer;
     };
     // get parameters for an optimization context with defaults set where possible
@@ -142,6 +155,10 @@ extern "C" {
     // get the gradient accumulator for a node from the forward graph
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_opt_grad_acc(wsp_ggml_opt_context_t opt_ctx, struct wsp_ggml_tensor * node);
+    WSP_GGML_API enum wsp_ggml_opt_optimizer_type wsp_ggml_opt_context_optimizer_type(wsp_ggml_opt_context_t); //TODO consistent naming scheme
+    WSP_GGML_API const char * wsp_ggml_opt_optimizer_name(enum wsp_ggml_opt_optimizer_type);
     // ====== Optimization Result ======
     WSP_GGML_API wsp_ggml_opt_result_t wsp_ggml_opt_result_init(void);
@@ -226,12 +243,14 @@ extern "C" {
             struct wsp_ggml_tensor            * outputs,        // output tensor, must have shape [ne_label, ndata_batch] if labels are used
             wsp_ggml_opt_dataset_t              dataset,        // dataset with data and optionally also labels
             enum wsp_ggml_opt_loss_type         loss_type,      // loss to minimize
+            enum wsp_ggml_opt_optimizer_type    optimizer,      // sgd or adamw
             wsp_ggml_opt_get_optimizer_params   get_opt_pars,   // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
             int64_t                         nepoch,         // how many times the dataset should be iterated over
             int64_t                         nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
             float                           val_split,      // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
             bool                            silent);        // whether or not info prints to stderr should be suppressed
 #ifdef  __cplusplus
 }
 #endif

package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-quants.h CHANGED Viewed

@@ -21,6 +21,8 @@ WSP_GGML_API void wsp_quantize_row_q5_1_ref(const float * WSP_GGML_RESTRICT x, b
 WSP_GGML_API void wsp_quantize_row_q8_0_ref(const float * WSP_GGML_RESTRICT x, block_q8_0 * WSP_GGML_RESTRICT y, int64_t k);
 WSP_GGML_API void wsp_quantize_row_q8_1_ref(const float * WSP_GGML_RESTRICT x, block_q8_1 * WSP_GGML_RESTRICT y, int64_t k);
+WSP_GGML_API void wsp_quantize_row_mxfp4_ref(const float * WSP_GGML_RESTRICT x, block_mxfp4 * WSP_GGML_RESTRICT y, int64_t k);
 WSP_GGML_API void wsp_quantize_row_q2_K_ref(const float * WSP_GGML_RESTRICT x, block_q2_K * WSP_GGML_RESTRICT y, int64_t k);
 WSP_GGML_API void wsp_quantize_row_q3_K_ref(const float * WSP_GGML_RESTRICT x, block_q3_K * WSP_GGML_RESTRICT y, int64_t k);
 WSP_GGML_API void wsp_quantize_row_q4_K_ref(const float * WSP_GGML_RESTRICT x, block_q4_K * WSP_GGML_RESTRICT y, int64_t k);
@@ -45,6 +47,8 @@ WSP_GGML_API void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * WSP_GGML_RESTRI
 WSP_GGML_API void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
 //WSP_GGML_API void wsp_dewsp_quantize_row_q8_1(const block_q8_1 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
+WSP_GGML_API void wsp_dewsp_quantize_row_mxfp4(const block_mxfp4 * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
 WSP_GGML_API void wsp_dewsp_quantize_row_q2_K(const block_q2_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
 WSP_GGML_API void wsp_dewsp_quantize_row_q3_K(const block_q3_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
 WSP_GGML_API void wsp_dewsp_quantize_row_q4_K(const block_q4_K * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
@@ -90,6 +94,8 @@ WSP_GGML_API size_t wsp_quantize_q5_0(const float * WSP_GGML_RESTRICT src, void
 WSP_GGML_API size_t wsp_quantize_q5_1(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 WSP_GGML_API size_t wsp_quantize_q8_0(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
+WSP_GGML_API size_t wsp_quantize_mxfp4(const float * WSP_GGML_RESTRICT src, void * WSP_GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
 WSP_GGML_API void wsp_iq2xs_init_impl(enum wsp_ggml_type type);
 WSP_GGML_API void wsp_iq2xs_free_impl(enum wsp_ggml_type type);
 WSP_GGML_API void wsp_iq3xs_init_impl(int grid_size);

package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h CHANGED Viewed

@@ -241,6 +241,8 @@
 #define WSP_GGML_ROPE_TYPE_MROPE  8
 #define WSP_GGML_ROPE_TYPE_VISION 24
+#define WSP_GGML_MROPE_SECTIONS   4
 #define WSP_GGML_UNUSED(x) (void)(x)
 #define WSP_GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -304,6 +306,16 @@
     WSP_GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
     WSP_GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
+#define WSP_GGML_TENSOR_TERNARY_OP_LOCALS \
+    WSP_GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+    WSP_GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
+    WSP_GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
+    WSP_GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb) \
+    WSP_GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
+    WSP_GGML_TENSOR_LOCALS(size_t,  nb2, src2, nb) \
+    WSP_GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
+    WSP_GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
 #define WSP_GGML_TENSOR_BINARY_OP_LOCALS01 \
     WSP_GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
     WSP_GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
@@ -314,6 +326,13 @@
 extern "C" {
 #endif
+    // Function type used in fatal error callbacks
+    typedef void (*wsp_ggml_abort_callback_t)(const char * error_message);
+    // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
+    // Returns the old callback for chaining
+    WSP_GGML_API wsp_ggml_abort_callback_t wsp_ggml_set_abort_callback(wsp_ggml_abort_callback_t callback);
     WSP_GGML_NORETURN WSP_GGML_ATTRIBUTE_FORMAT(3, 4)
     WSP_GGML_API void wsp_ggml_abort(const char * file, int line, const char * fmt, ...);
@@ -388,7 +407,8 @@ extern "C" {
         // WSP_GGML_TYPE_IQ4_NL_4_4 = 36,
         // WSP_GGML_TYPE_IQ4_NL_4_8 = 37,
         // WSP_GGML_TYPE_IQ4_NL_8_8 = 38,
-        WSP_GGML_TYPE_COUNT   = 39,
+        WSP_GGML_TYPE_MXFP4   = 39, // MXFP4 (1 block)
+        WSP_GGML_TYPE_COUNT   = 40,
     };
     // precision
@@ -423,6 +443,7 @@ extern "C" {
         WSP_GGML_FTYPE_MOSTLY_IQ4_XS  = 22, // except 1d tensors
         WSP_GGML_FTYPE_MOSTLY_IQ1_M   = 23, // except 1d tensors
         WSP_GGML_FTYPE_MOSTLY_BF16    = 24, // except 1d tensors
+        WSP_GGML_FTYPE_MOSTLY_MXFP4   = 25, // except 1d tensors
     };
     // available tensor operations:
@@ -431,6 +452,7 @@ extern "C" {
         WSP_GGML_OP_DUP,
         WSP_GGML_OP_ADD,
+        WSP_GGML_OP_ADD_ID,
         WSP_GGML_OP_ADD1,
         WSP_GGML_OP_ACC,
         WSP_GGML_OP_SUB,
@@ -488,7 +510,7 @@ extern "C" {
         WSP_GGML_OP_POOL_1D,
         WSP_GGML_OP_POOL_2D,
         WSP_GGML_OP_POOL_2D_BACK,
-        WSP_GGML_OP_UPSCALE, // nearest interpolate
+        WSP_GGML_OP_UPSCALE,
         WSP_GGML_OP_PAD,
         WSP_GGML_OP_PAD_REFLECT_1D,
         WSP_GGML_OP_ROLL,
@@ -520,6 +542,7 @@ extern "C" {
         WSP_GGML_OP_CROSS_ENTROPY_LOSS,
         WSP_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
         WSP_GGML_OP_OPT_STEP_ADAMW,
+        WSP_GGML_OP_OPT_STEP_SGD,
         WSP_GGML_OP_GLU,
@@ -550,6 +573,9 @@ extern "C" {
         WSP_GGML_GLU_OP_REGLU,
         WSP_GGML_GLU_OP_GEGLU,
         WSP_GGML_GLU_OP_SWIGLU,
+        WSP_GGML_GLU_OP_SWIGLU_OAI,
+        WSP_GGML_GLU_OP_GEGLU_ERF,
+        WSP_GGML_GLU_OP_GEGLU_QUICK,
         WSP_GGML_GLU_OP_COUNT,
     };
@@ -639,6 +665,9 @@ extern "C" {
     // misc
+    WSP_GGML_API const char * wsp_ggml_version(void);
+    WSP_GGML_API const char * wsp_ggml_commit(void);
     WSP_GGML_API void    wsp_ggml_time_init(void); // call this once at the beginning of the program
     WSP_GGML_API int64_t wsp_ggml_time_ms(void);
     WSP_GGML_API int64_t wsp_ggml_time_us(void);
@@ -819,6 +848,13 @@ extern "C" {
             struct wsp_ggml_tensor  * b,
             enum   wsp_ggml_type      type);
+    // dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add_id(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
+            struct wsp_ggml_tensor  * ids);
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_add1(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a,
@@ -1137,6 +1173,22 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_erf(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_erf_swapped(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_quick(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_quick_swapped(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a);
     // A: n columns, r rows,
     // B: n columns, r rows,
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_glu_split(
@@ -1160,6 +1212,23 @@ extern "C" {
             struct wsp_ggml_tensor  * a,
             struct wsp_ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_erf_split(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_geglu_quick_split(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_swiglu_oai(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
+            float                 alpha,
+            float                 limit);
     // normalize along rows
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_norm(
             struct wsp_ggml_context * ctx,
@@ -1259,6 +1328,19 @@ extern "C" {
             struct wsp_ggml_tensor  * a,
             float                 s);
+    // x = s * a + b
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_bias(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        float                 s,
+        float                 b);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_scale_bias_inplace(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor  * a,
+        float                 s,
+        float                 b);
     // b -> view(a,offset,nb1,nb2,3), return modified a
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_set(
             struct wsp_ggml_context * ctx,
@@ -1503,8 +1585,14 @@ extern "C" {
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a);
+    // a    [ne0, ne01, ne02, ne03]
+    // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
+    //
+    // broadcast:
+    //   ne02 % ne12 == 0
+    //   ne03 % ne13 == 0
+    //
     // fused soft_max(a*scale + mask*(ALiBi slope))
-    // mask is optional
     // max_bias = 0.0f for no ALiBi
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext(
             struct wsp_ggml_context * ctx,
@@ -1513,6 +1601,10 @@ extern "C" {
             float                 scale,
             float                 max_bias);
+    WSP_GGML_API void wsp_ggml_soft_max_add_sinks(
+            struct wsp_ggml_tensor * a,
+            struct wsp_ggml_tensor * sinks);
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_back(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a,
@@ -1571,7 +1663,7 @@ extern "C" {
             struct wsp_ggml_tensor  * b,
             struct wsp_ggml_tensor  * c,
             int                   n_dims,
-            int                   sections[4],
+            int                   sections[WSP_GGML_MROPE_SECTIONS],
             int                   mode,
             int                   n_ctx_orig,
             float                 freq_base,
@@ -1597,6 +1689,22 @@ extern "C" {
             float                 beta_fast,
             float                 beta_slow);
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_multi_inplace(
+            struct wsp_ggml_context * ctx,
+            struct wsp_ggml_tensor  * a,
+            struct wsp_ggml_tensor  * b,
+            struct wsp_ggml_tensor  * c,
+            int                   n_dims,
+            int                   sections[WSP_GGML_MROPE_SECTIONS],
+            int                   mode,
+            int                   n_ctx_orig,
+            float                 freq_base,
+            float                 freq_scale,
+            float                 ext_factor,
+            float                 attn_factor,
+            float                 beta_fast,
+            float                 beta_slow);
     WSP_GGML_DEPRECATED(WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_custom(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * a,
@@ -1967,11 +2075,17 @@ extern "C" {
 #define WSP_GGML_KQ_MASK_PAD 64
-    // q:    [n_embd_k, n_batch,     n_head,    1]
-    // k:    [n_embd_k, n_kv,        n_head_kv, 1]
-    // v:    [n_embd_v, n_kv,        n_head_kv, 1] !! not transposed !!
-    // mask: [n_kv,     n_batch_pad, 1,         1] !! n_batch_pad = WSP_GGML_PAD(n_batch, WSP_GGML_KQ_MASK_PAD) !!
-    // res:  [n_embd_v, n_head,      n_batch,   1] !! permuted !!
+    // q:    [n_embd_k, n_batch,     n_head,    ne3 ]
+    // k:    [n_embd_k, n_kv,        n_head_kv, ne3 ]
+    // v:    [n_embd_v, n_kv,        n_head_kv, ne3 ] !! not transposed !!
+    // mask: [n_kv,     n_batch_pad, ne32,      ne33] !! n_batch_pad = WSP_GGML_PAD(n_batch, WSP_GGML_KQ_MASK_PAD) !!
+    // res:  [n_embd_v, n_head,      n_batch,   ne3 ] !! permuted !!
+    //
+    // broadcast:
+    //   n_head % n_head_kv == 0
+    //   n_head % ne32      == 0
+    //   ne3    % ne33      == 0
+    //
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn_ext(
             struct wsp_ggml_context * ctx,
             struct wsp_ggml_tensor  * q,
@@ -1989,6 +2103,10 @@ extern "C" {
     WSP_GGML_API enum wsp_ggml_prec wsp_ggml_flash_attn_ext_get_prec(
             const struct wsp_ggml_tensor * a);
+    WSP_GGML_API void wsp_ggml_flash_attn_ext_add_sinks(
+            struct wsp_ggml_tensor * a,
+            struct wsp_ggml_tensor * sinks);
     // TODO: needs to be adapted to wsp_ggml_flash_attn_ext
     WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_flash_attn_back(
            struct wsp_ggml_context * ctx,
@@ -2010,7 +2128,8 @@ extern "C" {
             struct wsp_ggml_tensor  * dt,
             struct wsp_ggml_tensor  * A,
             struct wsp_ggml_tensor  * B,
-            struct wsp_ggml_tensor  * C);
+            struct wsp_ggml_tensor  * C,
+            struct wsp_ggml_tensor  * ids);
     // partition into non-overlapping windows with padding if needed
     // example:
@@ -2193,7 +2312,14 @@ extern "C" {
             struct wsp_ggml_tensor  * grad,
             struct wsp_ggml_tensor  * m,
             struct wsp_ggml_tensor  * v,
-            struct wsp_ggml_tensor  * adamw_params); // parameters such a the learning rate
+            struct wsp_ggml_tensor  * adamw_params); // parameters such as the learning rate
+    // stochastic gradient descent step (with weight decay)
+    WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_opt_step_sgd(
+        struct wsp_ggml_context * ctx,
+        struct wsp_ggml_tensor *  a,
+        struct wsp_ggml_tensor *  grad,
+        struct wsp_ggml_tensor *  sgd_params); // alpha, weight decay
     //
     // automatic differentiation

package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib CHANGED Viewed

Binary file

package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper CHANGED Viewed

Binary file

package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-common.h CHANGED Viewed

@@ -99,6 +99,9 @@ typedef sycl::half2 wsp_ggml_half2;
 #define QI4_1 (QK4_1 / (4 * QR4_1))
 #define QR4_1 2
+#define QI_MXFP4 (QK_MXFP4 / (4 * QR_MXFP4))
+#define QR_MXFP4 2
 #define QI5_0 (QK5_0 / (4 * QR5_0))
 #define QR5_0 2
@@ -184,6 +187,13 @@ typedef struct {
 } block_q4_1;
 static_assert(sizeof(block_q4_1) == 2 * sizeof(wsp_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
+#define QK_MXFP4 32
+typedef struct {
+    uint8_t e; // E8M0
+    uint8_t qs[QK_MXFP4/2];
+} block_mxfp4;
+static_assert(sizeof(block_mxfp4) == sizeof(uint8_t) + QK_MXFP4/2, "wrong mxfp4 block size/padding");
 #define QK5_0 32
 typedef struct {
     wsp_ggml_half d;           // delta
@@ -1074,10 +1084,17 @@ WSP_GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
     0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
 WSP_GGML_TABLE_END()
+// TODO: fix name to kvalues_iq4_nl
 WSP_GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
     -127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
 WSP_GGML_TABLE_END()
+// e2m1 values (doubled)
+// ref: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
+WSP_GGML_TABLE_BEGIN(int8_t, kvalues_mxfp4, 16)
+    0, 1, 2, 3, 4, 6, 8, 12, 0, -1, -2, -3, -4, -6, -8, -12,
+WSP_GGML_TABLE_END()
 #define NGRID_IQ1S 2048
 #define IQ1S_DELTA 0.125f
 #define IQ1M_DELTA 0.125f

package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h CHANGED Viewed

@@ -73,6 +73,22 @@ static inline int wsp_ggml_up(int n, int m) {
     return (n + m - 1) & ~(m - 1);
 }
+// TODO: move to ggml.h?
+static bool wsp_ggml_are_same_layout(const struct wsp_ggml_tensor * a, const struct wsp_ggml_tensor * b) {
+    if (a->type != b->type) {
+        return false;
+    }
+    for (int i = 0; i < WSP_GGML_MAX_DIMS; i++) {
+        if (a->ne[i] != b->ne[i]) {
+            return false;
+        }
+        if (a->nb[i] != b->nb[i]) {
+            return false;
+        }
+    }
+    return true;
+}
 //
 // logging
 //
@@ -394,6 +410,67 @@ static inline wsp_ggml_fp16_t wsp_ggml_compute_fp32_to_fp16(float f) {
 #define WSP_GGML_FP16_TO_FP32(x) WSP_GGML_COMPUTE_FP16_TO_FP32(x)
 #define WSP_GGML_FP32_TO_FP16(x) WSP_GGML_COMPUTE_FP32_TO_FP16(x)
+static inline float wsp_ggml_e8m0_to_fp32(uint8_t x) {
+    uint32_t bits;  // Stores the raw bit representation of the float
+    // Handle special case for minimum exponent (denormalized float)
+    if (x == 0) {
+        // Bit pattern for 2^(-127):
+        // - Sign bit: 0 (positive)
+        // - Exponent: 0 (denormalized number)
+        // - Mantissa: 0x400000 (0.5 in fractional form)
+        // Value = 0.5 * 2^(-126) = 2^(-127)
+        bits = 0x00400000;
+    }
+    // note: disabled as we don't need to handle NaNs
+    //// Handle special case for NaN (all bits set)
+    //else if (x == 0xFF) {
+    //    // Standard quiet NaN pattern:
+    //    // - Sign bit: 0
+    //    // - Exponent: all 1s (0xFF)
+    //    // - Mantissa: 0x400000 (quiet NaN flag)
+    //    bits = 0x7FC00000;
+    //}
+    // Normalized values (most common case)
+    else {
+        // Construct normalized float by shifting exponent into position:
+        // - Exponent field: 8 bits (positions 30-23)
+        // - Mantissa: 0 (implicit leading 1)
+        // Value = 2^(x - 127)
+        bits = (uint32_t) x << 23;
+    }
+    float result;  // Final float value
+                   // Safely reinterpret bit pattern as float without type-punning issues
+    memcpy(&result, &bits, sizeof(float));
+    return result;
+}
+// Equal to wsp_ggml_e8m0_to_fp32/2
+// Useful with MXFP4 quantization since the E0M2 values are doubled
+static inline float wsp_ggml_e8m0_to_fp32_half(uint8_t x) {
+    uint32_t bits;
+    // For x < 2: use precomputed denormal patterns
+    if (x < 2) {
+        // 0x00200000 = 2^(-128), 0x00400000 = 2^(-127)
+        bits = 0x00200000 << x;
+    }
+    // For x >= 2: normalized exponent adjustment
+    else {
+        // 0.5 * 2^(x-127) = 2^(x-128) = normalized with exponent (x-1)
+        bits = (uint32_t)(x - 1) << 23;
+    }
+    // Note: NaNs are not handled here
+    float result;
+    memcpy(&result, &bits, sizeof(float));
+    return result;
+}
+#define WSP_GGML_E8M0_TO_FP32(x) wsp_ggml_e8m0_to_fp32(x)
+#define WSP_GGML_E8M0_TO_FP32_HALF(x) wsp_ggml_e8m0_to_fp32_half(x)
 /**
  * Converts brain16 to float32.
  *