whisper.rn 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/android/src/main/java/com/rnwhisper/RNWhisper.java +21 -16
  2. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  4. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  5. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  8. package/cpp/ggml-backend.cpp +36 -18
  9. package/cpp/ggml-backend.h +1 -1
  10. package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
  11. package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
  12. package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
  13. package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
  14. package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
  15. package/cpp/ggml-cpu/common.h +3 -2
  16. package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
  17. package/cpp/ggml-cpu/ggml-cpu.c +95 -17
  18. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  19. package/cpp/ggml-cpu/ops.cpp +775 -74
  20. package/cpp/ggml-cpu/ops.h +7 -0
  21. package/cpp/ggml-cpu/quants.c +25 -24
  22. package/cpp/ggml-cpu/repack.cpp +15 -14
  23. package/cpp/ggml-cpu/simd-mappings.h +211 -33
  24. package/cpp/ggml-cpu/vec.cpp +26 -2
  25. package/cpp/ggml-cpu/vec.h +99 -45
  26. package/cpp/ggml-cpu.h +2 -0
  27. package/cpp/ggml-impl.h +125 -183
  28. package/cpp/ggml-metal-impl.h +27 -0
  29. package/cpp/ggml-metal.m +298 -41
  30. package/cpp/ggml-quants.c +6 -6
  31. package/cpp/ggml-whisper-sim.metallib +0 -0
  32. package/cpp/ggml-whisper.metallib +0 -0
  33. package/cpp/ggml.c +269 -40
  34. package/cpp/ggml.h +122 -2
  35. package/cpp/gguf.cpp +5 -1
  36. package/cpp/whisper.cpp +4 -0
  37. package/cpp/whisper.h +2 -0
  38. package/ios/RNWhisper.mm +28 -31
  39. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  40. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  47. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  48. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  55. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  56. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  57. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  63. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  64. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  71. package/package.json +1 -1
@@ -6,6 +6,7 @@
6
6
  #include "ggml-impl.h"
7
7
  #include "ggml-cpu.h"
8
8
  #include "ggml-cpu-impl.h"
9
+ #include "simd-mappings.h"
9
10
  #include "traits.h"
10
11
 
11
12
  #include <cmath>
@@ -51,7 +52,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x4(const float * WSP_GGML_RESTRICT x, void
51
52
  const float d = amax / ((1 << 7) - 1);
52
53
  id[row_iter] = d ? 1.0f / d : 0.0f;
53
54
 
54
- y[i].d[row_iter] = WSP_GGML_FP32_TO_FP16(d);
55
+ y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
55
56
  }
56
57
 
57
58
  for (int j = 0; j < 8; j++) {
@@ -102,7 +103,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x4(const float * WSP_GGML_RESTRICT x, void
102
103
  const float d = amax / ((1 << 7) - 1);
103
104
  id[row_iter] = d ? 1.0f / d : 0.0f;
104
105
 
105
- y[i].d[row_iter] = WSP_GGML_FP32_TO_FP16(d);
106
+ y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
106
107
  }
107
108
 
108
109
  for (int j = 0; j < QK8_0 * 4; j++) {
@@ -145,7 +146,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x8(const float * WSP_GGML_RESTRICT x, void
145
146
  const float d = amax / ((1 << 7) - 1);
146
147
  id[row_iter] = d ? 1.0f / d : 0.0f;
147
148
 
148
- y[i].d[row_iter] = WSP_GGML_FP32_TO_FP16(d);
149
+ y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
149
150
  }
150
151
 
151
152
  for (int j = 0; j < 4; j++) {
@@ -221,7 +222,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x8(const float * WSP_GGML_RESTRICT x, void
221
222
  const float d = amax / ((1 << 7) - 1);
222
223
  id[row_iter] = d ? 1.0f / d : 0.0f;
223
224
 
224
- y[i].d[row_iter] = WSP_GGML_FP32_TO_FP16(d);
225
+ y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
225
226
  }
226
227
 
227
228
  for (int j = 0; j < QK8_0 * 4; j++) {
@@ -311,7 +312,7 @@ void wsp_ggml_gemv_q4_0_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
311
312
  const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
312
313
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
313
314
  }
314
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
315
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
315
316
  }
316
317
  }
317
318
  }
@@ -399,7 +400,7 @@ void wsp_ggml_gemv_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
399
400
  const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
400
401
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
401
402
  }
402
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
403
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
403
404
  }
404
405
  }
405
406
  }
@@ -514,7 +515,7 @@ void wsp_ggml_gemv_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
514
515
  const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
515
516
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
516
517
  }
517
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
518
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
518
519
  }
519
520
  }
520
521
  }
@@ -608,7 +609,7 @@ void wsp_ggml_gemv_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs
608
609
  const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4];
609
610
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2]));
610
611
  }
611
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
612
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
612
613
  }
613
614
  }
614
615
  }
@@ -1117,7 +1118,7 @@ void wsp_ggml_gemm_q4_0_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
1117
1118
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
1118
1119
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
1119
1120
  }
1120
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
1121
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
1121
1122
  }
1122
1123
  }
1123
1124
  }
@@ -1570,7 +1571,7 @@ void wsp_ggml_gemm_q4_0_4x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
1570
1571
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
1571
1572
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
1572
1573
  }
1573
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
1574
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
1574
1575
  }
1575
1576
  }
1576
1577
  }
@@ -2039,7 +2040,7 @@ void wsp_ggml_gemm_q4_0_8x8_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs,
2039
2040
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
2040
2041
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
2041
2042
  }
2042
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
2043
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
2043
2044
  }
2044
2045
  }
2045
2046
  }
@@ -2147,7 +2148,7 @@ void wsp_ggml_gemm_iq4_nl_4x4_q8_0(int n, float * WSP_GGML_RESTRICT s, size_t bs
2147
2148
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
2148
2149
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4]));
2149
2150
  }
2150
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
2151
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
2151
2152
  }
2152
2153
  }
2153
2154
  }