npm - whisper.rn - Versions diffs - 0.4.0-rc.10 → 0.4.0-rc.11 - Mend

whisper.rn 0.4.0-rc.10 → 0.4.0-rc.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/android/src/main/CMakeLists.txt +9 -3
package/cpp/ggml-alloc.c +6 -14
package/cpp/ggml-backend-impl.h +50 -11
package/cpp/ggml-backend-reg.cpp +409 -31
package/cpp/ggml-backend.cpp +9 -3
package/cpp/ggml-backend.h +18 -0
package/cpp/ggml-common.h +41 -43
package/cpp/ggml-cpp.h +1 -0
package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
package/cpp/ggml-cpu-aarch64.h +2 -24
package/cpp/ggml-cpu-impl.h +171 -11
package/cpp/ggml-cpu-quants.c +1812 -389
package/cpp/ggml-cpu-traits.cpp +36 -0
package/cpp/ggml-cpu-traits.h +38 -0
package/cpp/ggml-cpu.c +1432 -610
package/cpp/ggml-cpu.cpp +131 -141
package/cpp/ggml-cpu.h +10 -50
package/cpp/ggml-impl.h +27 -11
package/cpp/ggml-metal-impl.h +39 -0
package/cpp/ggml-metal.h +1 -1
package/cpp/ggml-metal.m +1031 -359
package/cpp/ggml-opt.cpp +854 -0
package/cpp/ggml-opt.h +216 -0
package/cpp/ggml-quants.c +0 -9
package/cpp/ggml-threading.h +4 -2
package/cpp/ggml-whisper.metallib +0 -0
package/cpp/ggml.c +501 -1537
package/cpp/ggml.h +144 -171
package/cpp/gguf.cpp +1329 -0
package/cpp/gguf.h +202 -0
package/cpp/whisper.cpp +254 -114
package/cpp/whisper.h +6 -3
package/lib/commonjs/version.json +1 -1
package/lib/module/version.json +1 -1
package/package.json +1 -1
package/src/version.json +1 -1
package/whisper-rn.podspec +2 -2
package/cpp/README.md +0 -4
package/cpp/ggml-aarch64.c +0 -129
package/cpp/ggml-aarch64.h +0 -19
package/cpp/ggml-backend.cpp.rej +0 -12

package/cpp/ggml-common.h CHANGED Viewed

@@ -6,7 +6,20 @@
 typedef uint16_t wsp_ggml_half;
 typedef uint32_t wsp_ggml_half2;
-#define WSP_GGML_COMMON_AGGR
+#define WSP_GGML_COMMON_AGGR_U
+#define WSP_GGML_COMMON_AGGR_S
+#define WSP_GGML_COMMON_DECL
+#elif defined(WSP_GGML_COMMON_DECL_CPP)
+#include <cstdint>
+typedef uint16_t wsp_ggml_half;
+typedef uint32_t wsp_ggml_half2;
+// std-c++ allow anonymous unions but some compiler warn on it
+#define WSP_GGML_COMMON_AGGR_U data
+// std-c++ do not allow it.
+#define WSP_GGML_COMMON_AGGR_S data
 #define WSP_GGML_COMMON_DECL
 #elif defined(WSP_GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t wsp_ggml_half2;
 typedef half  wsp_ggml_half;
 typedef half2 wsp_ggml_half2;
-#define WSP_GGML_COMMON_AGGR
+#define WSP_GGML_COMMON_AGGR_U
+#define WSP_GGML_COMMON_AGGR_S
 #define WSP_GGML_COMMON_DECL
 #elif defined(WSP_GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 wsp_ggml_half2;
 typedef half  wsp_ggml_half;
 typedef half2 wsp_ggml_half2;
-#define WSP_GGML_COMMON_AGGR data
+#define WSP_GGML_COMMON_AGGR_U
+#define WSP_GGML_COMMON_AGGR_S data
 #define WSP_GGML_COMMON_DECL
 #elif defined(WSP_GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 wsp_ggml_half2;
 typedef half  wsp_ggml_half;
 typedef half2 wsp_ggml_half2;
-#define WSP_GGML_COMMON_AGGR data
+#define WSP_GGML_COMMON_AGGR_U
+#define WSP_GGML_COMMON_AGGR_S data
 #define WSP_GGML_COMMON_DECL
 #elif defined(WSP_GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 wsp_ggml_half2;
 typedef sycl::half  wsp_ggml_half;
 typedef sycl::half2 wsp_ggml_half2;
-#define WSP_GGML_COMMON_AGGR data
+#define WSP_GGML_COMMON_AGGR_U
+#define WSP_GGML_COMMON_AGGR_S data
 #define WSP_GGML_COMMON_DECL
 #endif
@@ -154,9 +171,9 @@ typedef struct {
         struct {
             wsp_ggml_half d; // delta
             wsp_ggml_half m; // min
-        } WSP_GGML_COMMON_AGGR;
+        } WSP_GGML_COMMON_AGGR_S;
         wsp_ggml_half2 dm;
-    };
+    } WSP_GGML_COMMON_AGGR_U;
     uint8_t qs[QK4_1 / 2]; // nibbles / quants
 } block_q4_1;
 static_assert(sizeof(block_q4_1) == 2 * sizeof(wsp_ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -175,9 +192,9 @@ typedef struct {
         struct {
             wsp_ggml_half d; // delta
             wsp_ggml_half m; // min
-        } WSP_GGML_COMMON_AGGR;
+        } WSP_GGML_COMMON_AGGR_S;
         wsp_ggml_half2 dm;
-    };
+    } WSP_GGML_COMMON_AGGR_U;
     uint8_t qh[4];         // 5-th bit of quants
     uint8_t qs[QK5_1 / 2]; // nibbles / quants
 } block_q5_1;
@@ -196,37 +213,13 @@ typedef struct {
         struct {
             wsp_ggml_half d; // delta
             wsp_ggml_half s; // d * sum(qs[i])
-        } WSP_GGML_COMMON_AGGR;
+        } WSP_GGML_COMMON_AGGR_S;
         wsp_ggml_half2 ds;
-    };
+    } WSP_GGML_COMMON_AGGR_U;
     int8_t qs[QK8_1]; // quants
 } block_q8_1;
 static_assert(sizeof(block_q8_1) == 2*sizeof(wsp_ggml_half) + QK8_1, "wrong q8_1 block size/padding");
-typedef struct {
-    wsp_ggml_half d[4];        // deltas for 4 q4_0 blocks
-    uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
-} block_q4_0x4;
-static_assert(sizeof(block_q4_0x4) == 4 * sizeof(wsp_ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
-typedef struct {
-    wsp_ggml_half d[8];        // deltas for 8 q4_0 blocks
-    uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
-} block_q4_0x8;
-static_assert(sizeof(block_q4_0x8) == 8 * sizeof(wsp_ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
-typedef struct {
-    wsp_ggml_half d[4];        // deltas for 4 q8_0 blocks
-    int8_t qs[QK8_0 * 4];  // quants for 4 q8_0 blocks
-} block_q8_0x4;
-static_assert(sizeof(block_q8_0x4) == 4 * sizeof(wsp_ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
-typedef struct {
-    wsp_ggml_half d[8];        // deltas for 8 q8_0 blocks
-    int8_t qs[QK8_0 * 8];  // quants for 8 q8_0 blocks
-} block_q8_0x8;
-static_assert(sizeof(block_q8_0x8) == 8 * sizeof(wsp_ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
 //
 // Ternary quantization
 //
@@ -261,9 +254,9 @@ typedef struct {
         struct {
             wsp_ggml_half d;    // super-block scale for quantized scales
             wsp_ggml_half dmin; // super-block scale for quantized mins
-        } WSP_GGML_COMMON_AGGR;
+        } WSP_GGML_COMMON_AGGR_S;
         wsp_ggml_half2 dm;
-    };
+    } WSP_GGML_COMMON_AGGR_U;
 } block_q2_K;
 static_assert(sizeof(block_q2_K) == 2*sizeof(wsp_ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
@@ -288,9 +281,9 @@ typedef struct {
         struct {
             wsp_ggml_half d;    // super-block scale for quantized scales
             wsp_ggml_half dmin; // super-block scale for quantized mins
-        } WSP_GGML_COMMON_AGGR;
+        } WSP_GGML_COMMON_AGGR_S;
         wsp_ggml_half2 dm;
-    };
+    } WSP_GGML_COMMON_AGGR_U;
     uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
     uint8_t qs[QK_K/2];           // 4--bit quants
 } block_q4_K;
@@ -305,9 +298,9 @@ typedef struct {
         struct {
             wsp_ggml_half d;    // super-block scale for quantized scales
             wsp_ggml_half dmin; // super-block scale for quantized mins
-        } WSP_GGML_COMMON_AGGR;
+        } WSP_GGML_COMMON_AGGR_S;
         wsp_ggml_half2 dm;
-    };
+    } WSP_GGML_COMMON_AGGR_U;
     uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
     uint8_t qh[QK_K/8];           // quants, high bit
     uint8_t qs[QK_K/2];           // quants, low 4 bits
@@ -431,6 +424,13 @@ static_assert(sizeof(block_iq4_xs) == sizeof(wsp_ggml_half) + sizeof(uint16_t) +
 #define WSP_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
 #define WSP_GGML_TABLE_END() };
+#define WSP_GGML_COMMON_IMPL
+#elif defined(WSP_GGML_COMMON_IMPL_CPP)
+#include <cstdint>
+#define WSP_GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
+#define WSP_GGML_TABLE_END() };
 #define WSP_GGML_COMMON_IMPL
 #elif defined(WSP_GGML_COMMON_IMPL_METAL)
 #include <metal_stdlib>
@@ -473,7 +473,6 @@ WSP_GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
     240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
 WSP_GGML_TABLE_END()
-//#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
 WSP_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
     0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
     0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
@@ -508,7 +507,6 @@ WSP_GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
     0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
     0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
 WSP_GGML_TABLE_END()
-//#endif
 WSP_GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)

package/cpp/ggml-cpp.h CHANGED Viewed

@@ -7,6 +7,7 @@
 #include "ggml.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
+#include "gguf.h"
 #include <memory>
 // Smart pointers for ggml types