whisper.rn 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/android/src/main/java/com/rnwhisper/RNWhisper.java +24 -18
  2. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -57
  3. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  5. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  9. package/cpp/ggml-backend.cpp +36 -18
  10. package/cpp/ggml-backend.h +1 -1
  11. package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
  12. package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
  13. package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
  14. package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
  15. package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
  16. package/cpp/ggml-cpu/common.h +3 -2
  17. package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
  18. package/cpp/ggml-cpu/ggml-cpu.c +95 -17
  19. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  20. package/cpp/ggml-cpu/ops.cpp +775 -74
  21. package/cpp/ggml-cpu/ops.h +7 -0
  22. package/cpp/ggml-cpu/quants.c +25 -24
  23. package/cpp/ggml-cpu/repack.cpp +15 -14
  24. package/cpp/ggml-cpu/simd-mappings.h +211 -33
  25. package/cpp/ggml-cpu/vec.cpp +26 -2
  26. package/cpp/ggml-cpu/vec.h +99 -45
  27. package/cpp/ggml-cpu.h +2 -0
  28. package/cpp/ggml-impl.h +125 -183
  29. package/cpp/ggml-metal-impl.h +27 -0
  30. package/cpp/ggml-metal.m +298 -41
  31. package/cpp/ggml-quants.c +6 -6
  32. package/cpp/ggml-whisper-sim.metallib +0 -0
  33. package/cpp/ggml-whisper.metallib +0 -0
  34. package/cpp/ggml.c +269 -40
  35. package/cpp/ggml.h +122 -2
  36. package/cpp/gguf.cpp +5 -1
  37. package/cpp/whisper.cpp +4 -0
  38. package/cpp/whisper.h +2 -0
  39. package/ios/RNWhisper.mm +35 -38
  40. package/ios/RNWhisperVadContext.h +1 -1
  41. package/ios/RNWhisperVadContext.mm +2 -6
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  49. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  56. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  57. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  58. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  59. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  65. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  66. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  67. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  68. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  72. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  73. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  74. package/package.json +1 -1
@@ -326,7 +326,7 @@ public class RNWhisper implements LifecycleEventListener {
326
326
  @Override
327
327
  protected Void doInBackground(Void... voids) {
328
328
  try {
329
- onHostDestroy();
329
+ releaseAllContexts();
330
330
  } catch (Exception e) {
331
331
  exception = e;
332
332
  }
@@ -415,7 +415,8 @@ public class RNWhisper implements LifecycleEventListener {
415
415
  @Override
416
416
  protected WritableArray doInBackground(Void... voids) {
417
417
  try {
418
- return vadContext.detectSpeech(audioDataBase64, options);
418
+ float[] audioData = AudioUtils.decodePcmData(audioDataBase64);
419
+ return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
419
420
  } catch (Exception e) {
420
421
  exception = e;
421
422
  return null;
@@ -468,7 +469,7 @@ public class RNWhisper implements LifecycleEventListener {
468
469
  throw new Exception("Failed to load audio file: " + filePathOrBase64);
469
470
  }
470
471
 
471
- return vadContext.detectSpeechWithAudioData(audioData, options);
472
+ return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
472
473
  } catch (Exception e) {
473
474
  exception = e;
474
475
  return null;
@@ -528,10 +529,7 @@ public class RNWhisper implements LifecycleEventListener {
528
529
  @Override
529
530
  protected Void doInBackground(Void... voids) {
530
531
  try {
531
- for (WhisperVadContext vadContext : vadContexts.values()) {
532
- vadContext.release();
533
- }
534
- vadContexts.clear();
532
+ releaseAllVadContexts();
535
533
  } catch (Exception e) {
536
534
  exception = e;
537
535
  }
@@ -559,27 +557,35 @@ public class RNWhisper implements LifecycleEventListener {
559
557
  public void onHostPause() {
560
558
  }
561
559
 
562
- @Override
563
- public void onHostDestroy() {
560
+ private void releaseAllContexts() {
564
561
  for (WhisperContext context : contexts.values()) {
565
562
  context.stopCurrentTranscribe();
566
563
  }
567
- for (AsyncTask task : tasks.keySet()) {
568
- try {
569
- task.get();
570
- } catch (Exception e) {
571
- Log.e(NAME, "Failed to wait for task", e);
572
- }
573
- }
564
+ WhisperContext.abortAllTranscribe(); // graceful abort
574
565
  for (WhisperContext context : contexts.values()) {
575
566
  context.release();
576
567
  }
568
+ contexts.clear();
569
+ }
570
+
571
+ private void releaseAllVadContexts() {
577
572
  for (WhisperVadContext vadContext : vadContexts.values()) {
578
573
  vadContext.release();
579
574
  }
580
- WhisperContext.abortAllTranscribe(); // graceful abort
581
- contexts.clear();
582
575
  vadContexts.clear();
576
+ }
577
+
578
+ @Override
579
+ public void onHostDestroy() {
580
+ for (AsyncTask task : tasks.keySet()) {
581
+ try {
582
+ task.get();
583
+ } catch (Exception e) {
584
+ Log.e(NAME, "Failed to wait for task", e);
585
+ }
586
+ }
583
587
  downloader.clearCache();
588
+ releaseAllContexts();
589
+ releaseAllVadContexts();
584
590
  }
585
591
  }
@@ -25,70 +25,14 @@ public class WhisperVadContext {
25
25
  this.reactContext = reactContext;
26
26
  }
27
27
 
28
- public WritableArray detectSpeech(String audioDataBase64, ReadableMap options) throws Exception {
28
+ public WritableArray detectSpeechWithAudioData(float[] audioData, int numSamples, ReadableMap options) throws Exception {
29
29
  if (vadContext == 0) {
30
30
  throw new Exception("VAD context is null");
31
31
  }
32
32
 
33
- // Decode base64 audio data to float array
34
- byte[] audioBytes = Base64.decode(audioDataBase64, Base64.DEFAULT);
35
- int numSamples = audioBytes.length / 4; // 4 bytes per float
36
- float[] audioData = new float[numSamples];
37
-
38
- for (int i = 0; i < numSamples; i++) {
39
- int intBits = (audioBytes[i * 4] & 0xFF) |
40
- ((audioBytes[i * 4 + 1] & 0xFF) << 8) |
41
- ((audioBytes[i * 4 + 2] & 0xFF) << 16) |
42
- ((audioBytes[i * 4 + 3] & 0xFF) << 24);
43
- audioData[i] = Float.intBitsToFloat(intBits);
44
- }
45
-
46
33
  return processVadDetection(audioData, numSamples, options);
47
34
  }
48
35
 
49
- public WritableArray detectSpeechFile(String filePathOrBase64, ReadableMap options) throws Exception {
50
- if (vadContext == 0) {
51
- throw new Exception("VAD context is null");
52
- }
53
-
54
- // Follow the same pattern as transcribeFile
55
- String filePath = filePathOrBase64;
56
-
57
- // Handle HTTP downloads
58
- if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
59
- // Note: This would require access to the downloader, but for now we'll throw an error
60
- throw new Exception("HTTP URLs not supported in VAD file detection. Please download the file first.");
61
- }
62
-
63
- float[] audioData;
64
-
65
- // Check for resource identifier (bundled assets)
66
- int resId = getResourceIdentifier(filePath);
67
- if (resId > 0) {
68
- audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
69
- } else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
70
- // Handle base64 WAV data
71
- audioData = AudioUtils.decodeWaveData(filePathOrBase64);
72
- } else {
73
- // Handle regular file path
74
- audioData = AudioUtils.decodeWaveFile(new java.io.FileInputStream(new java.io.File(filePath)));
75
- }
76
-
77
- if (audioData == null) {
78
- throw new Exception("Failed to load audio file: " + filePathOrBase64);
79
- }
80
-
81
- return processVadDetection(audioData, audioData.length, options);
82
- }
83
-
84
- public WritableArray detectSpeechWithAudioData(float[] audioData, ReadableMap options) throws Exception {
85
- if (vadContext == 0) {
86
- throw new Exception("VAD context is null");
87
- }
88
-
89
- return processVadDetection(audioData, audioData.length, options);
90
- }
91
-
92
36
  private int getResourceIdentifier(String filePath) {
93
37
  int identifier = reactContext.getResources().getIdentifier(
94
38
  filePath,
@@ -817,8 +817,9 @@ static void wsp_ggml_backend_sched_print_assignments(wsp_ggml_backend_sched_t sc
817
817
  }
818
818
  if (sched->debug > 1) {
819
819
  wsp_ggml_backend_t tensor_backend = wsp_ggml_backend_sched_get_tensor_backend(sched, node);
820
- WSP_GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, wsp_ggml_op_name(node->op), node->name,
821
- fmt_size(wsp_ggml_nbytes(node)), tensor_backend ? wsp_ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
820
+ WSP_GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, wsp_ggml_op_name(node->op), node->name,
821
+ fmt_size(wsp_ggml_nbytes(node)), tensor_backend ? wsp_ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node),
822
+ graph->use_counts[wsp_ggml_hash_find(&graph->visited_hash_set, node)]);
822
823
  for (int j = 0; j < WSP_GGML_MAX_SRC; j++) {
823
824
  struct wsp_ggml_tensor * src = node->src[j];
824
825
  if (src == NULL) {
@@ -1826,7 +1827,7 @@ void wsp_ggml_backend_graph_copy_free(struct wsp_ggml_backend_graph_copy copy) {
1826
1827
  wsp_ggml_free(copy.ctx_unallocated);
1827
1828
  }
1828
1829
 
1829
- bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data) {
1830
+ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node) {
1830
1831
  struct wsp_ggml_backend_graph_copy copy = wsp_ggml_backend_graph_copy(backend2, graph);
1831
1832
  if (copy.buffer == NULL) {
1832
1833
  return false;
@@ -1837,28 +1838,45 @@ bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggm
1837
1838
 
1838
1839
  assert(g1->n_nodes == g2->n_nodes);
1839
1840
 
1840
- for (int i = 0; i < g1->n_nodes; i++) {
1841
- struct wsp_ggml_tensor * t1 = g1->nodes[i];
1842
- struct wsp_ggml_tensor * t2 = g2->nodes[i];
1841
+ if (test_node != nullptr) {
1842
+ // Compute the whole graph and only test the output for a specific tensor
1843
+ wsp_ggml_backend_graph_compute(backend1, g1);
1844
+ wsp_ggml_backend_graph_compute(backend2, g2);
1843
1845
 
1844
- assert(t1->op == t2->op && wsp_ggml_are_same_layout(t1, t2));
1846
+ int test_node_idx = -1;
1847
+ for (int i = 0; i < g1->n_nodes; i++) {
1848
+ struct wsp_ggml_tensor * t1 = g1->nodes[i];
1849
+ if (t1 == test_node) {
1850
+ test_node_idx = i;
1851
+ break;
1852
+ }
1853
+ }
1854
+ WSP_GGML_ASSERT(test_node_idx != -1);
1845
1855
 
1846
- struct wsp_ggml_cgraph g1v = wsp_ggml_graph_view(g1, i, i + 1);
1847
- struct wsp_ggml_cgraph g2v = wsp_ggml_graph_view(g2, i, i + 1);
1856
+ callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data);
1857
+ } else {
1858
+ for (int i = 0; i < g1->n_nodes; i++) {
1859
+ struct wsp_ggml_tensor * t1 = g1->nodes[i];
1860
+ struct wsp_ggml_tensor * t2 = g2->nodes[i];
1848
1861
 
1849
- wsp_ggml_backend_graph_compute(backend1, &g1v);
1850
- wsp_ggml_backend_graph_compute(backend2, &g2v);
1862
+ assert(t1->op == t2->op && wsp_ggml_are_same_layout(t1, t2));
1851
1863
 
1852
- if (wsp_ggml_is_view_op(t1->op)) {
1853
- continue;
1854
- }
1864
+ struct wsp_ggml_cgraph g1v = wsp_ggml_graph_view(g1, i, i + 1);
1865
+ struct wsp_ggml_cgraph g2v = wsp_ggml_graph_view(g2, i, i + 1);
1855
1866
 
1856
- // compare results, calculate rms etc
1857
- if (!callback(i, t1, t2, user_data)) {
1858
- break;
1867
+ wsp_ggml_backend_graph_compute(backend1, &g1v);
1868
+ wsp_ggml_backend_graph_compute(backend2, &g2v);
1869
+
1870
+ if (wsp_ggml_is_view_op(t1->op)) {
1871
+ continue;
1872
+ }
1873
+
1874
+ // compare results, calculate rms etc
1875
+ if (!callback(i, t1, t2, user_data)) {
1876
+ break;
1877
+ }
1859
1878
  }
1860
1879
  }
1861
-
1862
1880
  wsp_ggml_backend_graph_copy_free(copy);
1863
1881
 
1864
1882
  return true;
@@ -339,7 +339,7 @@ extern "C" {
339
339
  typedef bool (*wsp_ggml_backend_eval_callback)(int node_index, struct wsp_ggml_tensor * t1, struct wsp_ggml_tensor * t2, void * user_data);
340
340
 
341
341
  // Compare the output of two backends
342
- WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data);
342
+ WSP_GGML_API bool wsp_ggml_backend_compare_graph_backend(wsp_ggml_backend_t backend1, wsp_ggml_backend_t backend2, struct wsp_ggml_cgraph * graph, wsp_ggml_backend_eval_callback callback, void * user_data, struct wsp_ggml_tensor * test_node);
343
343
 
344
344
  // Tensor initialization
345
345
  WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_tensor_alloc(wsp_ggml_backend_buffer_t buffer, struct wsp_ggml_tensor * tensor, void * addr);
@@ -8,6 +8,7 @@
8
8
  #include "mmq.h"
9
9
  #include "ggml-impl.h"
10
10
  #include "ggml-cpu-impl.h"
11
+ #include "simd-mappings.h"
11
12
  #include "quants.h"
12
13
  #include "ggml-quants.h"
13
14
  #include <algorithm>
@@ -453,7 +454,7 @@ void wsp_quantize_row_q8_K_vnni(const float * RESTRICT x, void * RESTRICT vy, in
453
454
 
454
455
  // Quantize these floats
455
456
  const float iscale = 127.f / amax;
456
- y[i].d = WSP_GGML_FP32_TO_FP16(1 / iscale);
457
+ y[i].d = WSP_GGML_CPU_FP32_TO_FP16(1 / iscale);
457
458
  const float id = ( amax != 0.0f ) ? iscale : 0.f;
458
459
  const __m512 vscale = _mm512_set1_ps(id);
459
460
 
@@ -1090,7 +1091,7 @@ struct acc_C<block_q8_0, block_q4_0, is_acc> {
1090
1091
  const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
1091
1092
 
1092
1093
  for (int m = 0; m < nr; ++m) {
1093
- const __m512 vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].d));
1094
+ const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
1094
1095
  const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
1095
1096
 
1096
1097
  __m512 vsum;
@@ -1113,8 +1114,8 @@ struct acc_C<block_q8_1, block_q4_1, is_acc> {
1113
1114
  const __m512 vm0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset + TILE_N * sizeof(wsp_ggml_half))));
1114
1115
 
1115
1116
  for (int m = 0; m < nr; ++m) {
1116
- const __m512 vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].d));
1117
- const __m512 vs1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].s));
1117
+ const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
1118
+ const __m512 vs1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].s));
1118
1119
  const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
1119
1120
 
1120
1121
  __m512 vsum;
@@ -1137,7 +1138,7 @@ struct acc_C<block_q8_0, block_q8_0, is_acc> {
1137
1138
  const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
1138
1139
 
1139
1140
  for (int m = 0; m < nr; ++m) {
1140
- const __m512 vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[m * lda].d));
1141
+ const __m512 vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[m * lda].d));
1141
1142
  const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
1142
1143
 
1143
1144
  __m512 vsum;
@@ -1437,7 +1438,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLO
1437
1438
  va[k] = _mm512_set1_epi32(a_ptr[k]);
1438
1439
  vcomp = _mm512_dpbusd_epi32(vcomp, off, va[k]);
1439
1440
  }
1440
- vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].d));
1441
+ vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
1441
1442
  }
1442
1443
 
1443
1444
  // load b
@@ -1498,8 +1499,8 @@ struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K>
1498
1499
  for (int k = 0; k < 8; ++k) {
1499
1500
  va[k] = _mm512_set1_epi32(a_ptr[k]);
1500
1501
  }
1501
- vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].d));
1502
- vs1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].s));
1502
+ vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
1503
+ vs1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].s));
1503
1504
  }
1504
1505
 
1505
1506
  // load b
@@ -1571,7 +1572,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLO
1571
1572
  va[k] = _mm512_set1_epi32(a_ptr[k]);
1572
1573
  va[k] = _mm512_add_epi8(va[k], off);
1573
1574
  }
1574
- vd1 = _mm512_set1_ps(WSP_GGML_FP16_TO_FP32(A[0 * KB + i].d));
1575
+ vd1 = _mm512_set1_ps(WSP_GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
1575
1576
  }
1576
1577
 
1577
1578
  // load b