whisper.rn 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/android/src/main/java/com/rnwhisper/RNWhisper.java +24 -18
  2. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -57
  3. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  5. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  9. package/cpp/ggml-backend.cpp +36 -18
  10. package/cpp/ggml-backend.h +1 -1
  11. package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
  12. package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
  13. package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
  14. package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
  15. package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
  16. package/cpp/ggml-cpu/common.h +3 -2
  17. package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
  18. package/cpp/ggml-cpu/ggml-cpu.c +95 -17
  19. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  20. package/cpp/ggml-cpu/ops.cpp +775 -74
  21. package/cpp/ggml-cpu/ops.h +7 -0
  22. package/cpp/ggml-cpu/quants.c +25 -24
  23. package/cpp/ggml-cpu/repack.cpp +15 -14
  24. package/cpp/ggml-cpu/simd-mappings.h +211 -33
  25. package/cpp/ggml-cpu/vec.cpp +26 -2
  26. package/cpp/ggml-cpu/vec.h +99 -45
  27. package/cpp/ggml-cpu.h +2 -0
  28. package/cpp/ggml-impl.h +125 -183
  29. package/cpp/ggml-metal-impl.h +27 -0
  30. package/cpp/ggml-metal.m +298 -41
  31. package/cpp/ggml-quants.c +6 -6
  32. package/cpp/ggml-whisper-sim.metallib +0 -0
  33. package/cpp/ggml-whisper.metallib +0 -0
  34. package/cpp/ggml.c +269 -40
  35. package/cpp/ggml.h +122 -2
  36. package/cpp/gguf.cpp +5 -1
  37. package/cpp/whisper.cpp +4 -0
  38. package/cpp/whisper.h +2 -0
  39. package/ios/RNWhisper.mm +35 -38
  40. package/ios/RNWhisperVadContext.h +1 -1
  41. package/ios/RNWhisperVadContext.mm +2 -6
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  56. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  57. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  72. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  73. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  74. package/package.json +1 -1
@@ -58,7 +58,7 @@ inline static void wsp_ggml_vec_set_bf16(const int n, wsp_ggml_bf16_t * x, const
58
58
  inline static void wsp_ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; }
59
59
  inline static void wsp_ggml_vec_add_f16 (const int n, wsp_ggml_fp16_t * z, const wsp_ggml_fp16_t * x, const wsp_ggml_fp16_t * y) {
60
60
  for (int i = 0; i < n; ++i) {
61
- z[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(x[i]) + WSP_GGML_FP16_TO_FP32(y[i]));
61
+ z[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(x[i]) + WSP_GGML_CPU_FP16_TO_FP32(y[i]));
62
62
  }
63
63
  }
64
64
  inline static void wsp_ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; }
@@ -67,7 +67,7 @@ inline static void wsp_ggml_vec_acc1_f32(const int n, float * y, const float v
67
67
  inline static void wsp_ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; }
68
68
  inline static void wsp_ggml_vec_sub_f16 (const int n, wsp_ggml_fp16_t * z, const wsp_ggml_fp16_t * x, const wsp_ggml_fp16_t * y) {
69
69
  for (int i = 0; i < n; ++i) {
70
- z[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(x[i]) - WSP_GGML_FP16_TO_FP32(y[i]));
70
+ z[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(x[i]) - WSP_GGML_CPU_FP16_TO_FP32(y[i]));
71
71
  }
72
72
  }
73
73
  inline static void wsp_ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; }
@@ -75,20 +75,20 @@ inline static void wsp_ggml_vec_cpy_f32 (const int n, float * y, const float * x
75
75
  inline static void wsp_ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
76
76
  inline static void wsp_ggml_vec_neg_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
77
77
  for (int i = 0; i < n; ++i) {
78
- y[i] = WSP_GGML_FP32_TO_FP16(-WSP_GGML_FP16_TO_FP32(x[i]));
78
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(-WSP_GGML_CPU_FP16_TO_FP32(x[i]));
79
79
  }
80
80
  }
81
81
 
82
82
  inline static void wsp_ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
83
83
  inline static void wsp_ggml_vec_mul_f16 (const int n, wsp_ggml_fp16_t * z, const wsp_ggml_fp16_t * x, const wsp_ggml_fp16_t * y) {
84
84
  for (int i = 0; i < n; ++i) {
85
- z[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(x[i]) * WSP_GGML_FP16_TO_FP32(y[i]));
85
+ z[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(x[i]) * WSP_GGML_CPU_FP16_TO_FP32(y[i]));
86
86
  }
87
87
  }
88
88
  inline static void wsp_ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
89
89
  inline static void wsp_ggml_vec_div_f16 (const int n, wsp_ggml_fp16_t * z, const wsp_ggml_fp16_t * x, const wsp_ggml_fp16_t * y) {
90
90
  for (int i = 0; i < n; ++i) {
91
- z[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(x[i]) / WSP_GGML_FP16_TO_FP32(y[i]));
91
+ z[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(x[i]) / WSP_GGML_CPU_FP16_TO_FP32(y[i]));
92
92
  }
93
93
  }
94
94
 
@@ -131,13 +131,13 @@ inline static void wsp_ggml_vec_dot_f16_unroll(const int n, const int xs, float
131
131
  // leftovers
132
132
  for (int i = np; i < n; ++i) {
133
133
  for (int j = 0; j < WSP_GGML_VEC_DOT_UNROLL; ++j) {
134
- sumf[j] += (wsp_ggml_float)(WSP_GGML_FP16_TO_FP32(x[j][i])*WSP_GGML_FP16_TO_FP32(y[i]));
134
+ sumf[j] += (wsp_ggml_float)(WSP_GGML_CPU_FP16_TO_FP32(x[j][i])*WSP_GGML_CPU_FP16_TO_FP32(y[i]));
135
135
  }
136
136
  }
137
137
  #else
138
138
  for (int i = 0; i < n; ++i) {
139
139
  for (int j = 0; j < WSP_GGML_VEC_DOT_UNROLL; ++j) {
140
- sumf[j] += (wsp_ggml_float)(WSP_GGML_FP16_TO_FP32(x[j][i])*WSP_GGML_FP16_TO_FP32(y[i]));
140
+ sumf[j] += (wsp_ggml_float)(WSP_GGML_CPU_FP16_TO_FP32(x[j][i])*WSP_GGML_CPU_FP16_TO_FP32(y[i]));
141
141
  }
142
142
  }
143
143
  #endif
@@ -280,12 +280,12 @@ inline static void wsp_ggml_vec_mad_f16(const int n, wsp_ggml_fp16_t * WSP_GGML_
280
280
 
281
281
  // leftovers
282
282
  for (int i = np; i < n; ++i) {
283
- y[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(y[i]) + WSP_GGML_FP16_TO_FP32(x[i])*v);
283
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(y[i]) + WSP_GGML_CPU_FP16_TO_FP32(x[i])*v);
284
284
  }
285
285
  #else
286
286
  // scalar
287
287
  for (int i = 0; i < n; ++i) {
288
- y[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(y[i]) + WSP_GGML_FP16_TO_FP32(x[i])*v);
288
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(y[i]) + WSP_GGML_CPU_FP16_TO_FP32(x[i])*v);
289
289
  }
290
290
  #endif
291
291
  }
@@ -430,12 +430,12 @@ inline static void wsp_ggml_vec_scale_f16(const int n, wsp_ggml_fp16_t * y, cons
430
430
 
431
431
  // leftovers
432
432
  for (int i = np; i < n; ++i) {
433
- y[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(y[i])*v);
433
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(y[i])*v);
434
434
  }
435
435
  #else
436
436
  // scalar
437
437
  for (int i = 0; i < n; ++i) {
438
- y[i] = WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(y[i])*v);
438
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(y[i])*v);
439
439
  }
440
440
  #endif
441
441
  }
@@ -444,103 +444,103 @@ inline static void wsp_ggml_vec_norm_f32 (const int n, float * s, const float *
444
444
  inline static void wsp_ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
445
445
  inline static void wsp_ggml_vec_sqr_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
446
446
  for (int i = 0; i < n; ++i) {
447
- float v = WSP_GGML_FP16_TO_FP32(x[i]);
448
- y[i] = WSP_GGML_FP32_TO_FP16(v*v);
447
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
448
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(v*v);
449
449
  }
450
450
  }
451
451
  inline static void wsp_ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
452
452
  inline static void wsp_ggml_vec_sqrt_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
453
453
  for (int i = 0; i < n; ++i) {
454
- y[i] = WSP_GGML_FP32_TO_FP16(sqrtf(WSP_GGML_FP16_TO_FP32(x[i])));
454
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(sqrtf(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
455
455
  }
456
456
  }
457
457
  inline static void wsp_ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); }
458
458
  inline static void wsp_ggml_vec_log_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
459
459
  for (int i = 0; i < n; ++i) {
460
- y[i] = WSP_GGML_FP32_TO_FP16(logf(WSP_GGML_FP16_TO_FP32(x[i])));
460
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(logf(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
461
461
  }
462
462
  }
463
463
  inline static void wsp_ggml_vec_sin_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sinf(x[i]); }
464
464
  inline static void wsp_ggml_vec_sin_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
465
465
  for (int i = 0; i < n; ++i) {
466
- y[i] = WSP_GGML_FP32_TO_FP16(sinf(WSP_GGML_FP16_TO_FP32(x[i])));
466
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(sinf(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
467
467
  }
468
468
  }
469
469
  inline static void wsp_ggml_vec_cos_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = cosf(x[i]); }
470
470
  inline static void wsp_ggml_vec_cos_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
471
471
  for (int i = 0; i < n; ++i) {
472
- y[i] = WSP_GGML_FP32_TO_FP16(cosf(WSP_GGML_FP16_TO_FP32(x[i])));
472
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(cosf(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
473
473
  }
474
474
  }
475
475
  inline static void wsp_ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
476
476
  inline static void wsp_ggml_vec_abs_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
477
477
  for (int i = 0; i < n; ++i) {
478
- y[i] = WSP_GGML_FP32_TO_FP16(fabsf(WSP_GGML_FP16_TO_FP32(x[i])));
478
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(fabsf(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
479
479
  }
480
480
  }
481
481
  inline static void wsp_ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
482
482
  inline static void wsp_ggml_vec_sgn_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
483
483
  for (int i = 0; i < n; ++i) {
484
- float v = WSP_GGML_FP16_TO_FP32(x[i]);
485
- y[i] = WSP_GGML_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f));
484
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
485
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f));
486
486
  }
487
487
  }
488
488
  inline static void wsp_ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
489
489
  inline static void wsp_ggml_vec_step_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
490
490
  for (int i = 0; i < n; ++i) {
491
- y[i] = WSP_GGML_FP32_TO_FP16((WSP_GGML_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f);
491
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16((WSP_GGML_CPU_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f);
492
492
  }
493
493
  }
494
494
  inline static void wsp_ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
495
495
  inline static void wsp_ggml_vec_tanh_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
496
496
  for (int i = 0; i < n; ++i) {
497
- y[i] = WSP_GGML_FP32_TO_FP16(tanhf(WSP_GGML_FP16_TO_FP32(x[i])));
497
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(tanhf(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
498
498
  }
499
499
  }
500
500
  inline static void wsp_ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
501
501
  inline static void wsp_ggml_vec_elu_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
502
502
  for (int i = 0; i < n; ++i) {
503
- y[i] = WSP_GGML_FP32_TO_FP16(expm1f(WSP_GGML_FP16_TO_FP32(x[i])));
503
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(expm1f(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
504
504
  }
505
505
  }
506
506
  inline static void wsp_ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
507
507
  inline static void wsp_ggml_vec_relu_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
508
508
  for (int i = 0; i < n; ++i) {
509
- float v = WSP_GGML_FP16_TO_FP32(x[i]);
510
- y[i] = WSP_GGML_FP32_TO_FP16((v > 0.f) ? v : 0.f);
509
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
510
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16((v > 0.f) ? v : 0.f);
511
511
  }
512
512
  }
513
513
  inline static void wsp_ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
514
514
  inline static void wsp_ggml_vec_leaky_relu_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x, const float ns) {
515
515
  for (int i = 0; i < n; ++i) {
516
- float v = WSP_GGML_FP16_TO_FP32(x[i]);
517
- y[i] = WSP_GGML_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
516
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
517
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
518
518
  }
519
519
  }
520
520
  inline static void wsp_ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
521
521
  inline static void wsp_ggml_vec_sigmoid_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
522
522
  for (int i = 0; i < n; ++i) {
523
- y[i] = WSP_GGML_FP32_TO_FP16(1.f / (1.f + expf(-WSP_GGML_FP16_TO_FP32(x[i]))));
523
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(1.f / (1.f + expf(-WSP_GGML_CPU_FP16_TO_FP32(x[i]))));
524
524
  }
525
525
  }
526
526
  // TODO: optimize performance
527
527
  inline static void wsp_ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
528
528
  inline static void wsp_ggml_vec_hardswish_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
529
529
  for (int i = 0; i < n; ++i) {
530
- float v = WSP_GGML_FP16_TO_FP32(x[i]);
531
- y[i] = WSP_GGML_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f)));
530
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
531
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f)));
532
532
  }
533
533
  }
534
534
  inline static void wsp_ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
535
535
  inline static void wsp_ggml_vec_hardsigmoid_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
536
536
  for (int i = 0; i < n; ++i) {
537
- y[i] = WSP_GGML_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (WSP_GGML_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f)));
537
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (WSP_GGML_CPU_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f)));
538
538
  }
539
539
  }
540
540
  inline static void wsp_ggml_vec_exp_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = expf(x[i]); }
541
541
  inline static void wsp_ggml_vec_exp_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
542
542
  for (int i = 0; i < n; ++i) {
543
- y[i] = WSP_GGML_FP32_TO_FP16(expf(WSP_GGML_FP16_TO_FP32(x[i])));
543
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(expf(WSP_GGML_CPU_FP16_TO_FP32(x[i])));
544
544
  }
545
545
  }
546
546
 
@@ -562,9 +562,9 @@ inline static void wsp_ggml_vec_gelu_f16(const int n, wsp_ggml_fp16_t * y, const
562
562
 
563
563
  inline static void wsp_ggml_vec_gelu_erf_f16(const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
564
564
  for (int i = 0; i < n; ++i) {
565
- float xi = WSP_GGML_FP16_TO_FP32(x[i]);
565
+ float xi = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
566
566
  float res = 0.5f*xi*(1.0f + erff(xi*SQRT_2_INV));
567
- y[i] = WSP_GGML_FP32_TO_FP16(res);
567
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(res);
568
568
  }
569
569
  }
570
570
 
@@ -577,9 +577,9 @@ inline static void wsp_ggml_vec_gelu_f32(const int n, float * y, const float * x
577
577
  } else if (x[i] >= 10.0f) {
578
578
  y[i] = x[i];
579
579
  } else {
580
- wsp_ggml_fp16_t fp16 = WSP_GGML_FP32_TO_FP16(x[i]);
580
+ wsp_ggml_fp16_t fp16 = WSP_GGML_CPU_FP32_TO_FP16(x[i]);
581
581
  memcpy(&t, &fp16, sizeof(uint16_t));
582
- y[i] = WSP_GGML_FP16_TO_FP32(wsp_ggml_table_gelu_f16[t]);
582
+ y[i] = WSP_GGML_CPU_FP16_TO_FP32(wsp_ggml_table_gelu_f16[t]);
583
583
  }
584
584
  }
585
585
  }
@@ -613,9 +613,9 @@ inline static float wsp_ggml_gelu_quick_f32(float x) {
613
613
  inline static void wsp_ggml_vec_gelu_quick_f32(const int n, float * y, const float * x) {
614
614
  uint16_t t;
615
615
  for (int i = 0; i < n; ++i) {
616
- wsp_ggml_fp16_t fp16 = WSP_GGML_FP32_TO_FP16(x[i]);
616
+ wsp_ggml_fp16_t fp16 = WSP_GGML_CPU_FP32_TO_FP16(x[i]);
617
617
  memcpy(&t, &fp16, sizeof(uint16_t));
618
- y[i] = WSP_GGML_FP16_TO_FP32(wsp_ggml_table_gelu_quick_f16[t]);
618
+ y[i] = WSP_GGML_CPU_FP16_TO_FP32(wsp_ggml_table_gelu_quick_f16[t]);
619
619
  }
620
620
  }
621
621
  #else
@@ -628,8 +628,8 @@ inline static void wsp_ggml_vec_gelu_quick_f32(const int n, float * y, const flo
628
628
 
629
629
  inline static void wsp_ggml_vec_gelu_quick_f16(const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x) {
630
630
  for (int i = 0; i < n; ++i) {
631
- float v = WSP_GGML_FP16_TO_FP32(x[i]);
632
- y[i] = WSP_GGML_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v))));
631
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
632
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v))));
633
633
  }
634
634
  }
635
635
 
@@ -638,8 +638,8 @@ inline static float wsp_ggml_silu_f32(float x) {
638
638
  return x/(1.0f + expf(-x));
639
639
  }
640
640
  inline static wsp_ggml_fp16_t wsp_ggml_silu_f16(wsp_ggml_fp16_t x) {
641
- float v = WSP_GGML_FP16_TO_FP32(x);
642
- return WSP_GGML_FP32_TO_FP16(v/(1.0f + expf(-v)));
641
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x);
642
+ return WSP_GGML_CPU_FP32_TO_FP16(v/(1.0f + expf(-v)));
643
643
  }
644
644
 
645
645
  #if __FINITE_MATH_ONLY__
@@ -888,9 +888,9 @@ inline static float wsp_ggml_silu_backward_f32(float x, float dy) {
888
888
  }
889
889
 
890
890
  inline static wsp_ggml_fp16_t wsp_ggml_silu_backward_f16(wsp_ggml_fp16_t x, wsp_ggml_fp16_t dy) {
891
- const float v = WSP_GGML_FP16_TO_FP32(x);
891
+ const float v = WSP_GGML_CPU_FP16_TO_FP32(x);
892
892
  const float s = 1.0f/(1.0f + expf(-v));
893
- return WSP_GGML_FP32_TO_FP16(WSP_GGML_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s)));
893
+ return WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s)));
894
894
  }
895
895
 
896
896
  inline static void wsp_ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) {
@@ -905,6 +905,60 @@ inline static void wsp_ggml_vec_silu_backward_f16(const int n, wsp_ggml_fp16_t *
905
905
  }
906
906
  }
907
907
 
908
+ inline static void wsp_ggml_vec_reglu_f32 (const int n, float * y, const float * x, const float * g) {
909
+ for (int i = 0; i < n; ++i) {
910
+ y[i] = (x[i] > 0.f) ? x[i] * g[i] : 0.f;
911
+ }
912
+ }
913
+
914
+ inline static void wsp_ggml_vec_reglu_f16 (const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x, const wsp_ggml_fp16_t * g) {
915
+ for (int i = 0; i < n; ++i) {
916
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
917
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16((v > 0.f) ? v * WSP_GGML_CPU_FP16_TO_FP32(g[i]) : 0.f);
918
+ }
919
+ }
920
+
921
+ #ifdef WSP_GGML_GELU_FP16
922
+ inline static void wsp_ggml_vec_geglu_f32(const int n, float * y, const float * x, const float * g) {
923
+ uint16_t t;
924
+ for (int i = 0; i < n; ++i) {
925
+ if (x[i] <= -10.0f) {
926
+ y[i] = 0.0f;
927
+ } else if (x[i] >= 10.0f) {
928
+ y[i] = x[i] * g[i];
929
+ } else {
930
+ wsp_ggml_fp16_t fp16 = WSP_GGML_CPU_FP32_TO_FP16(x[i]);
931
+ memcpy(&t, &fp16, sizeof(uint16_t));
932
+ y[i] = WSP_GGML_CPU_FP16_TO_FP32(wsp_ggml_table_gelu_f16[t]) * g[i];
933
+ }
934
+ }
935
+ }
936
+ #else
937
+ inline static void wsp_ggml_vec_geglu_f32(const int n, float * y, const float * x, const float * g) {
938
+ for (int i = 0; i < n; ++i) {
939
+ y[i] = wsp_ggml_gelu_f32(x[i]) * g[i];
940
+ }
941
+ }
942
+ #endif
943
+
944
+ inline static void wsp_ggml_vec_geglu_f16(const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x, const wsp_ggml_fp16_t * g) {
945
+ const uint16_t * i16 = (const uint16_t *) x;
946
+ for (int i = 0; i < n; ++i) {
947
+ float v = WSP_GGML_CPU_FP16_TO_FP32(g[i]);
948
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16(WSP_GGML_CPU_FP16_TO_FP32(wsp_ggml_table_gelu_f16[i16[i]]) * v);
949
+ }
950
+ }
951
+
952
+ void wsp_ggml_vec_swiglu_f32(const int n, float * y, const float * x, const float * g);
953
+
954
+ inline static void wsp_ggml_vec_swiglu_f16(const int n, wsp_ggml_fp16_t * y, const wsp_ggml_fp16_t * x, const wsp_ggml_fp16_t * g) {
955
+ for (int i = 0; i < n; ++i) {
956
+ float v = WSP_GGML_CPU_FP16_TO_FP32(x[i]);
957
+ float w = WSP_GGML_CPU_FP16_TO_FP32(g[i]);
958
+ y[i] = WSP_GGML_CPU_FP32_TO_FP16((v/(1.0f + expf(-v))) * w);
959
+ }
960
+ }
961
+
908
962
  inline static void wsp_ggml_vec_sum_f32(const int n, float * s, const float * x) {
909
963
  #ifndef WSP_GGML_USE_ACCELERATE
910
964
  wsp_ggml_float sum = 0.0;
@@ -928,7 +982,7 @@ inline static void wsp_ggml_vec_sum_f32_ggf(const int n, wsp_ggml_float * s, con
928
982
  inline static void wsp_ggml_vec_sum_f16_ggf(const int n, float * s, const wsp_ggml_fp16_t * x) {
929
983
  float sum = 0.0f;
930
984
  for (int i = 0; i < n; ++i) {
931
- sum += WSP_GGML_FP16_TO_FP32(x[i]);
985
+ sum += WSP_GGML_CPU_FP16_TO_FP32(x[i]);
932
986
  }
933
987
  *s = sum;
934
988
  }
package/cpp/ggml-cpu.h CHANGED
@@ -101,6 +101,7 @@ extern "C" {
101
101
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_riscv_v (void);
102
102
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vsx (void);
103
103
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_vxe (void);
104
+ WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_nnpa (void);
104
105
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_wasm_simd (void);
105
106
  WSP_GGML_BACKEND_API int wsp_ggml_cpu_has_llamafile (void);
106
107
 
@@ -133,6 +134,7 @@ extern "C" {
133
134
 
134
135
  WSP_GGML_BACKEND_API wsp_ggml_backend_reg_t wsp_ggml_backend_cpu_reg(void);
135
136
 
137
+ WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
136
138
  WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_fp16(const float *, wsp_ggml_fp16_t *, int64_t);
137
139
  WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp16_to_fp32(const wsp_ggml_fp16_t *, float *, int64_t);
138
140
  WSP_GGML_BACKEND_API void wsp_ggml_cpu_fp32_to_bf16(const float *, wsp_ggml_bf16_t *, int64_t);