whisper.rn 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/android/src/main/java/com/rnwhisper/RNWhisper.java +24 -18
  2. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -57
  3. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  5. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  9. package/cpp/ggml-backend.cpp +36 -18
  10. package/cpp/ggml-backend.h +1 -1
  11. package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
  12. package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
  13. package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
  14. package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
  15. package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
  16. package/cpp/ggml-cpu/common.h +3 -2
  17. package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
  18. package/cpp/ggml-cpu/ggml-cpu.c +95 -17
  19. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  20. package/cpp/ggml-cpu/ops.cpp +775 -74
  21. package/cpp/ggml-cpu/ops.h +7 -0
  22. package/cpp/ggml-cpu/quants.c +25 -24
  23. package/cpp/ggml-cpu/repack.cpp +15 -14
  24. package/cpp/ggml-cpu/simd-mappings.h +211 -33
  25. package/cpp/ggml-cpu/vec.cpp +26 -2
  26. package/cpp/ggml-cpu/vec.h +99 -45
  27. package/cpp/ggml-cpu.h +2 -0
  28. package/cpp/ggml-impl.h +125 -183
  29. package/cpp/ggml-metal-impl.h +27 -0
  30. package/cpp/ggml-metal.m +298 -41
  31. package/cpp/ggml-quants.c +6 -6
  32. package/cpp/ggml-whisper-sim.metallib +0 -0
  33. package/cpp/ggml-whisper.metallib +0 -0
  34. package/cpp/ggml.c +269 -40
  35. package/cpp/ggml.h +122 -2
  36. package/cpp/gguf.cpp +5 -1
  37. package/cpp/whisper.cpp +4 -0
  38. package/cpp/whisper.h +2 -0
  39. package/ios/RNWhisper.mm +35 -38
  40. package/ios/RNWhisperVadContext.h +1 -1
  41. package/ios/RNWhisperVadContext.mm +2 -6
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  56. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  57. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  72. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  73. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  74. package/package.json +1 -1
@@ -20,6 +20,9 @@
20
20
 
21
21
  static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
22
22
 
23
+ // Work buffer size for im2col operations in CONV2D
24
+ #define WSP_GGML_IM2COL_WORK_SIZE (16 * 1024 * 1024)
25
+
23
26
  #ifdef __cplusplus
24
27
  extern "C" {
25
28
  #endif
@@ -53,6 +56,7 @@ void wsp_ggml_compute_forward_permute(const struct wsp_ggml_compute_params * par
53
56
  void wsp_ggml_compute_forward_transpose(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
54
57
  void wsp_ggml_compute_forward_get_rows(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
55
58
  void wsp_ggml_compute_forward_get_rows_back(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
59
+ void wsp_ggml_compute_forward_set_rows(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
56
60
  void wsp_ggml_compute_forward_diag(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
57
61
  void wsp_ggml_compute_forward_diag_mask_inf(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
58
62
  void wsp_ggml_compute_forward_diag_mask_zero(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
@@ -64,6 +68,7 @@ void wsp_ggml_compute_forward_clamp(const struct wsp_ggml_compute_params * param
64
68
  void wsp_ggml_compute_forward_conv_transpose_1d(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
65
69
  void wsp_ggml_compute_forward_im2col(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
66
70
  void wsp_ggml_compute_forward_im2col_back_f32(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
71
+ void wsp_ggml_compute_forward_conv_2d(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
67
72
  void wsp_ggml_compute_forward_conv_transpose_2d(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
68
73
  void wsp_ggml_compute_forward_conv_2d_dw(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
69
74
  void wsp_ggml_compute_forward_pool_1d(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
@@ -93,6 +98,7 @@ void wsp_ggml_compute_forward_ssm_scan(const struct wsp_ggml_compute_params * pa
93
98
  void wsp_ggml_compute_forward_win_part(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
94
99
  void wsp_ggml_compute_forward_win_unpart(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
95
100
  void wsp_ggml_compute_forward_unary(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
101
+ void wsp_ggml_compute_forward_glu(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
96
102
  void wsp_ggml_compute_forward_get_rel_pos(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
97
103
  void wsp_ggml_compute_forward_add_rel_pos(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
98
104
  void wsp_ggml_compute_forward_rwkv_wkv6(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
@@ -105,6 +111,7 @@ void wsp_ggml_compute_forward_custom(const struct wsp_ggml_compute_params * para
105
111
  void wsp_ggml_compute_forward_cross_entropy_loss(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
106
112
  void wsp_ggml_compute_forward_cross_entropy_loss_back(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
107
113
  void wsp_ggml_compute_forward_opt_step_adamw(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
114
+ void wsp_ggml_compute_forward_mul_mat(const struct wsp_ggml_compute_params * params, struct wsp_ggml_tensor * dst);
108
115
 
109
116
  #ifdef __cplusplus
110
117
  }
@@ -2,6 +2,7 @@
2
2
  #include "ggml-common.h"
3
3
 
4
4
  #include "ggml-cpu-impl.h"
5
+ #include "simd-mappings.h"
5
6
  #include "ggml-quants.h"
6
7
  #include "quants.h"
7
8
 
@@ -137,7 +138,7 @@ void wsp_ggml_vec_dot_q4_0_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, size
137
138
  }
138
139
 
139
140
  int sumi = sumi0 + sumi1;
140
- sumf += sumi*WSP_GGML_FP16_TO_FP32(x[ib].d)*WSP_GGML_FP16_TO_FP32(y[ib].d);
141
+ sumf += sumi*WSP_GGML_CPU_FP16_TO_FP32(x[ib].d)*WSP_GGML_CPU_FP16_TO_FP32(y[ib].d);
141
142
  }
142
143
 
143
144
  *s = sumf;
@@ -174,7 +175,7 @@ void wsp_ggml_vec_dot_q4_1_q8_1_generic(int n, float * WSP_GGML_RESTRICT s, size
174
175
  }
175
176
 
176
177
  int sumi = sumi0 + sumi1;
177
- sumf += (WSP_GGML_FP16_TO_FP32(x[ib].d)*WSP_GGML_FP16_TO_FP32(y[ib].d))*sumi + WSP_GGML_FP16_TO_FP32(x[ib].m)*WSP_GGML_FP16_TO_FP32(y[ib].s);
178
+ sumf += (WSP_GGML_CPU_FP16_TO_FP32(x[ib].d)*WSP_GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + WSP_GGML_CPU_FP16_TO_FP32(x[ib].m)*WSP_GGML_CPU_FP16_TO_FP32(y[ib].s);
178
179
  }
179
180
 
180
181
  *s = sumf;
@@ -217,7 +218,7 @@ void wsp_ggml_vec_dot_q5_0_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, size
217
218
  }
218
219
 
219
220
  int sumi = sumi0 + sumi1;
220
- sumf += (WSP_GGML_FP16_TO_FP32(x[ib].d)*WSP_GGML_FP16_TO_FP32(y[ib].d)) * sumi;
221
+ sumf += (WSP_GGML_CPU_FP16_TO_FP32(x[ib].d)*WSP_GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi;
221
222
  }
222
223
 
223
224
  *s = sumf;
@@ -260,7 +261,7 @@ void wsp_ggml_vec_dot_q5_1_q8_1_generic(int n, float * WSP_GGML_RESTRICT s, size
260
261
  }
261
262
 
262
263
  int sumi = sumi0 + sumi1;
263
- sumf += (WSP_GGML_FP16_TO_FP32(x[ib].d)*WSP_GGML_FP16_TO_FP32(y[ib].d))*sumi + WSP_GGML_FP16_TO_FP32(x[ib].m)*WSP_GGML_FP16_TO_FP32(y[ib].s);
264
+ sumf += (WSP_GGML_CPU_FP16_TO_FP32(x[ib].d)*WSP_GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + WSP_GGML_CPU_FP16_TO_FP32(x[ib].m)*WSP_GGML_CPU_FP16_TO_FP32(y[ib].s);
264
265
  }
265
266
 
266
267
  *s = sumf;
@@ -290,7 +291,7 @@ void wsp_ggml_vec_dot_q8_0_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, size
290
291
  sumi += x[ib].qs[j]*y[ib].qs[j];
291
292
  }
292
293
 
293
- sumf += sumi*(WSP_GGML_FP16_TO_FP32(x[ib].d)*WSP_GGML_FP16_TO_FP32(y[ib].d));
294
+ sumf += sumi*(WSP_GGML_CPU_FP16_TO_FP32(x[ib].d)*WSP_GGML_CPU_FP16_TO_FP32(y[ib].d));
294
295
  }
295
296
 
296
297
  *s = sumf;
@@ -342,7 +343,7 @@ void wsp_ggml_vec_dot_tq1_0_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
342
343
  }
343
344
  }
344
345
 
345
- sumf += (float) sum * (WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d);
346
+ sumf += (float) sum * (WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d);
346
347
  }
347
348
 
348
349
  *s = sumf;
@@ -372,7 +373,7 @@ void wsp_ggml_vec_dot_tq2_0_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
372
373
  }
373
374
  }
374
375
 
375
- const float d = y[i].d * WSP_GGML_FP16_TO_FP32(x[i].d);
376
+ const float d = y[i].d * WSP_GGML_CPU_FP16_TO_FP32(x[i].d);
376
377
 
377
378
  sumf += (float) sumi * d;
378
379
  }
@@ -405,8 +406,8 @@ void wsp_ggml_vec_dot_q2_K_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, size
405
406
  summs += y[i].bsums[j] * (sc[j] >> 4);
406
407
  }
407
408
 
408
- const float dall = y[i].d * WSP_GGML_FP16_TO_FP32(x[i].d);
409
- const float dmin = y[i].d * WSP_GGML_FP16_TO_FP32(x[i].dmin);
409
+ const float dall = y[i].d * WSP_GGML_CPU_FP16_TO_FP32(x[i].d);
410
+ const float dmin = y[i].d * WSP_GGML_CPU_FP16_TO_FP32(x[i].dmin);
410
411
 
411
412
  int isum = 0;
412
413
  int is = 0;
@@ -504,7 +505,7 @@ void wsp_ggml_vec_dot_q3_K_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, size
504
505
  for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l];
505
506
  q8 += 8; a += 8;
506
507
  }
507
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
508
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
508
509
  for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
509
510
  }
510
511
  for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -577,9 +578,9 @@ void wsp_ggml_vec_dot_q4_K_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, size
577
578
  for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l];
578
579
  q8 += 8; a += 8;
579
580
  }
580
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
581
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
581
582
  for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
582
- const float dmin = WSP_GGML_FP16_TO_FP32(x[i].dmin) * y[i].d;
583
+ const float dmin = WSP_GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d;
583
584
  sumf -= dmin * sumi;
584
585
  }
585
586
  for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -657,9 +658,9 @@ void wsp_ggml_vec_dot_q5_K_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, size
657
658
  for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l];
658
659
  q8 += 8; a += 8;
659
660
  }
660
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
661
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
661
662
  for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
662
- const float dmin = WSP_GGML_FP16_TO_FP32(x[i].dmin) * y[i].d;
663
+ const float dmin = WSP_GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d;
663
664
  sumf -= dmin * sumi;
664
665
  }
665
666
  for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -714,7 +715,7 @@ void wsp_ggml_vec_dot_q6_K_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, size
714
715
  for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l];
715
716
  q8 += 8; a += 8;
716
717
  }
717
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
718
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
718
719
  for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
719
720
  }
720
721
  for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -739,7 +740,7 @@ void wsp_ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, s
739
740
 
740
741
  float sumf = 0.f;
741
742
  for (int i = 0; i < nb; ++i) {
742
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
743
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
743
744
  const uint16_t * WSP_GGML_RESTRICT q2 = x[i].qs;
744
745
  const int8_t * WSP_GGML_RESTRICT q8 = y[i].qs;
745
746
  int32_t bsum = 0;
@@ -778,7 +779,7 @@ void wsp_ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, si
778
779
 
779
780
  float sumf = 0.f;
780
781
  for (int i = 0; i < nb; ++i) {
781
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
782
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
782
783
  const uint16_t * WSP_GGML_RESTRICT q2 = x[i].qs;
783
784
  const uint8_t * WSP_GGML_RESTRICT sc = x[i].scales;
784
785
  const int8_t * WSP_GGML_RESTRICT q8 = y[i].qs;
@@ -829,7 +830,7 @@ void wsp_ggml_vec_dot_iq2_s_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
829
830
  float sumf = 0;
830
831
  for (int i = 0; i < nb; i++) {
831
832
 
832
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
833
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
833
834
  const int8_t * q8 = y[i].qs;
834
835
  const uint8_t * qs = x[i].qs;
835
836
  const uint8_t * qh = x[i].qh;
@@ -882,7 +883,7 @@ void wsp_ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, s
882
883
 
883
884
  float sumf = 0.f;
884
885
  for (int i = 0; i < nb; ++i) {
885
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
886
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
886
887
  const uint8_t * WSP_GGML_RESTRICT q3 = x[i].qs;
887
888
  const uint8_t * WSP_GGML_RESTRICT gas = x[i].qs + QK_K/4;
888
889
  const int8_t * WSP_GGML_RESTRICT q8 = y[i].qs;
@@ -924,7 +925,7 @@ void wsp_ggml_vec_dot_iq3_s_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
924
925
 
925
926
  float sumf = 0.f;
926
927
  for (int i = 0; i < nb; ++i) {
927
- const float d = WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d;
928
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d;
928
929
  const uint8_t * WSP_GGML_RESTRICT qs = x[i].qs;
929
930
  const uint8_t * WSP_GGML_RESTRICT qh = x[i].qh;
930
931
  const uint8_t * WSP_GGML_RESTRICT signs = x[i].signs;
@@ -1002,7 +1003,7 @@ void wsp_ggml_vec_dot_iq1_s_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
1002
1003
  qs += 4;
1003
1004
  }
1004
1005
 
1005
- sumf += WSP_GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1);
1006
+ sumf += WSP_GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1);
1006
1007
  }
1007
1008
 
1008
1009
  *s = sumf;
@@ -1063,7 +1064,7 @@ void wsp_ggml_vec_dot_iq1_m_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
1063
1064
  qh += 2;
1064
1065
  }
1065
1066
 
1066
- sumf += WSP_GGML_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2);
1067
+ sumf += WSP_GGML_CPU_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2);
1067
1068
  }
1068
1069
 
1069
1070
  *s = sumf;
@@ -1087,7 +1088,7 @@ void wsp_ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, si
1087
1088
  float sumf = 0;
1088
1089
 
1089
1090
  for (; ib < nb; ++ib) {
1090
- const float d = WSP_GGML_FP16_TO_FP32(y[ib].d)*WSP_GGML_FP16_TO_FP32(x[ib].d);
1091
+ const float d = WSP_GGML_CPU_FP16_TO_FP32(y[ib].d)*WSP_GGML_CPU_FP16_TO_FP32(x[ib].d);
1091
1092
  int sumi1 = 0, sumi2 = 0;
1092
1093
  for (int j = 0; j < QK4_NL/2; ++j) {
1093
1094
  sumi1 += y[ib].qs[j+ 0] * kvalues_iq4nl[x[ib].qs[j] & 0xf];
@@ -1113,7 +1114,7 @@ void wsp_ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, si
1113
1114
 
1114
1115
  float sumf = 0;
1115
1116
  for (int ibl = 0; ibl < nb; ++ibl) {
1116
- const float d4d8 = WSP_GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d;
1117
+ const float d4d8 = WSP_GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d;
1117
1118
  uint16_t h = x[ibl].scales_h;
1118
1119
  const uint8_t * qs = x[ibl].qs;
1119
1120
  const int8_t * q8 = y[ibl].qs;
@@ -6,6 +6,7 @@
6
6
  #include "ggml-impl.h"
7
7
  #include "ggml-cpu.h"
8
8
  #include "ggml-cpu-impl.h"
9
+ #include "simd-mappings.h"
9
10
  #include "traits.h"
10
11
 
11
12
  #include "arch-fallback.h"
@@ -72,7 +73,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic(const float * WSP_GGML_RESTRICT
72
73
  const float d = amax / ((1 << 7) - 1);
73
74
  id[row_iter] = d ? 1.0f / d : 0.0f;
74
75
 
75
- y[i].d[row_iter] = WSP_GGML_FP32_TO_FP16(d);
76
+ y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
76
77
  }
77
78
 
78
79
  for (int j = 0; j < QK8_0 * 4; j++) {
@@ -110,7 +111,7 @@ void wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic(const float * WSP_GGML_RESTRICT
110
111
  const float d = amax / ((1 << 7) - 1);
111
112
  id[row_iter] = d ? 1.0f / d : 0.0f;
112
113
 
113
- y[i].d[row_iter] = WSP_GGML_FP32_TO_FP16(d);
114
+ y[i].d[row_iter] = WSP_GGML_CPU_FP32_TO_FP16(d);
114
115
  }
115
116
 
116
117
  for (int j = 0; j < QK8_0 * 4; j++) {
@@ -236,7 +237,7 @@ void wsp_ggml_gemv_q4_0_4x4_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, siz
236
237
  const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
237
238
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
238
239
  }
239
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
240
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
240
241
  }
241
242
  }
242
243
  }
@@ -280,7 +281,7 @@ void wsp_ggml_gemv_q4_0_4x8_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, siz
280
281
  const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
281
282
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
282
283
  }
283
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
284
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
284
285
  }
285
286
  }
286
287
  }
@@ -325,7 +326,7 @@ void wsp_ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, siz
325
326
  const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
326
327
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
327
328
  }
328
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
329
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
329
330
  }
330
331
  }
331
332
  }
@@ -396,13 +397,13 @@ void wsp_ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
396
397
  sumi2 = sumi2 * scales_1[j];
397
398
  sumi += sumi1 + sumi2;
398
399
  }
399
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d;
400
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d;
400
401
  }
401
402
  }
402
403
  for (int sb = 0; sb < 8; sb++) {
403
404
  uint8_t *mins = (uint8_t*) utmp + 8 + sb * 16;
404
405
  for (int j = 0; j < ncols_interleaved; j++) {
405
- sum_minf[j] += mins[j] * (a_ptr[l].bsums[sb * 2] + a_ptr[l].bsums[sb * 2 + 1]) * WSP_GGML_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d;
406
+ sum_minf[j] += mins[j] * (a_ptr[l].bsums[sb * 2] + a_ptr[l].bsums[sb * 2 + 1]) * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d;
406
407
  }
407
408
  }
408
409
  }
@@ -449,7 +450,7 @@ void wsp_ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, s
449
450
  const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4];
450
451
  sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2]));
451
452
  }
452
- sumf[j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d);
453
+ sumf[j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
453
454
  }
454
455
  }
455
456
  }
@@ -500,7 +501,7 @@ void wsp_ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, siz
500
501
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
501
502
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
502
503
  }
503
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
504
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
504
505
  }
505
506
  }
506
507
  }
@@ -555,7 +556,7 @@ void wsp_ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, siz
555
556
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
556
557
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
557
558
  }
558
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
559
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
559
560
  }
560
561
  }
561
562
  }
@@ -609,7 +610,7 @@ void wsp_ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, siz
609
610
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
610
611
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
611
612
  }
612
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
613
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
613
614
  }
614
615
  }
615
616
  }
@@ -688,7 +689,7 @@ void wsp_ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
688
689
  sumi2 = sumi2 * scales_1[j];
689
690
  sumi += sumi1 + sumi2;
690
691
  }
691
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d[m];
692
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d[m];
692
693
  }
693
694
  }
694
695
  }
@@ -697,7 +698,7 @@ void wsp_ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * WSP_GGML_RESTRICT s, siz
697
698
  for(int m = 0; m < 4; m++) {
698
699
  const int16_t *bsums = a_ptr[l].bsums + (sb * 8) + (m * 4) - ((sb % 2) * 6);
699
700
  for(int j = 0; j < ncols_interleaved; j++) {
700
- sum_minf[m][j] += mins[j] * (bsums[0] + bsums[1]) * WSP_GGML_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d[m];
701
+ sum_minf[m][j] += mins[j] * (bsums[0] + bsums[1]) * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d[m];
701
702
  }
702
703
  }
703
704
  }
@@ -753,7 +754,7 @@ void wsp_ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * WSP_GGML_RESTRICT s, s
753
754
  sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
754
755
  (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4]));
755
756
  }
756
- sumf[m][j] += sumi * WSP_GGML_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_FP16_TO_FP32(a_ptr[l].d[m]);
757
+ sumf[m][j] += sumi * WSP_GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * WSP_GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
757
758
  }
758
759
  }
759
760
  }