whisper.rn 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/android/build.gradle +2 -1
  2. package/android/gradle.properties +1 -1
  3. package/android/src/main/jni.cpp +12 -3
  4. package/cpp/ggml-alloc.c +292 -130
  5. package/cpp/ggml-backend-impl.h +4 -4
  6. package/cpp/ggml-backend-reg.cpp +13 -5
  7. package/cpp/ggml-backend.cpp +207 -17
  8. package/cpp/ggml-backend.h +19 -1
  9. package/cpp/ggml-cpu/amx/amx.cpp +5 -2
  10. package/cpp/ggml-cpu/arch/x86/repack.cpp +2 -2
  11. package/cpp/ggml-cpu/arch-fallback.h +0 -4
  12. package/cpp/ggml-cpu/common.h +14 -0
  13. package/cpp/ggml-cpu/ggml-cpu-impl.h +14 -7
  14. package/cpp/ggml-cpu/ggml-cpu.c +65 -44
  15. package/cpp/ggml-cpu/ggml-cpu.cpp +14 -4
  16. package/cpp/ggml-cpu/ops.cpp +542 -775
  17. package/cpp/ggml-cpu/ops.h +2 -0
  18. package/cpp/ggml-cpu/simd-mappings.h +88 -59
  19. package/cpp/ggml-cpu/unary-ops.cpp +135 -0
  20. package/cpp/ggml-cpu/unary-ops.h +5 -0
  21. package/cpp/ggml-cpu/vec.cpp +227 -20
  22. package/cpp/ggml-cpu/vec.h +407 -56
  23. package/cpp/ggml-cpu.h +1 -1
  24. package/cpp/ggml-impl.h +94 -12
  25. package/cpp/ggml-metal/ggml-metal-common.cpp +446 -0
  26. package/cpp/ggml-metal/ggml-metal-common.h +52 -0
  27. package/cpp/ggml-metal/ggml-metal-context.h +33 -0
  28. package/cpp/ggml-metal/ggml-metal-context.m +600 -0
  29. package/cpp/ggml-metal/ggml-metal-device.cpp +1565 -0
  30. package/cpp/ggml-metal/ggml-metal-device.h +244 -0
  31. package/cpp/ggml-metal/ggml-metal-device.m +1325 -0
  32. package/cpp/ggml-metal/ggml-metal-impl.h +802 -0
  33. package/cpp/ggml-metal/ggml-metal-ops.cpp +3583 -0
  34. package/cpp/ggml-metal/ggml-metal-ops.h +88 -0
  35. package/cpp/ggml-metal/ggml-metal.cpp +718 -0
  36. package/cpp/ggml-metal/ggml-whisper-sim.metallib +0 -0
  37. package/cpp/ggml-metal/ggml-whisper.metallib +0 -0
  38. package/cpp/ggml-metal-impl.h +40 -40
  39. package/cpp/ggml-metal.h +1 -6
  40. package/cpp/ggml-quants.c +1 -0
  41. package/cpp/ggml.c +341 -15
  42. package/cpp/ggml.h +150 -5
  43. package/cpp/jsi/RNWhisperJSI.cpp +9 -2
  44. package/cpp/jsi/ThreadPool.h +3 -3
  45. package/cpp/rn-whisper.h +1 -0
  46. package/cpp/whisper.cpp +89 -72
  47. package/cpp/whisper.h +1 -0
  48. package/ios/CMakeLists.txt +6 -1
  49. package/ios/RNWhisperContext.mm +3 -1
  50. package/ios/RNWhisperVadContext.mm +14 -13
  51. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  52. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  53. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  54. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  55. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  56. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  57. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +150 -5
  58. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  59. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
  60. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  61. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  62. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  63. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  64. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  65. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  66. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  67. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  68. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  69. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +150 -5
  70. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  71. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
  72. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  73. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  74. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  75. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  76. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  77. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  78. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  79. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  80. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  81. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  82. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +150 -5
  83. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  84. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +1 -0
  85. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  86. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  87. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  88. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +4 -4
  89. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +19 -1
  90. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -1
  91. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +94 -12
  92. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +40 -40
  93. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal.h +1 -6
  94. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +150 -5
  95. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/rn-whisper.h +1 -0
  96. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +1 -0
  97. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  98. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  99. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  100. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  101. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  102. package/lib/commonjs/version.json +1 -1
  103. package/lib/module/NativeRNWhisper.js.map +1 -1
  104. package/lib/module/version.json +1 -1
  105. package/lib/typescript/NativeRNWhisper.d.ts +2 -0
  106. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  107. package/package.json +1 -1
  108. package/src/NativeRNWhisper.ts +2 -0
  109. package/src/version.json +1 -1
  110. package/whisper-rn.podspec +8 -9
  111. package/cpp/ggml-metal.m +0 -6779
  112. package/cpp/ggml-whisper-sim.metallib +0 -0
  113. package/cpp/ggml-whisper.metallib +0 -0
@@ -132,6 +132,8 @@ extern "C" {
132
132
  WSP_GGML_BACKEND_DEVICE_TYPE_CPU,
133
133
  // GPU device using dedicated memory
134
134
  WSP_GGML_BACKEND_DEVICE_TYPE_GPU,
135
+ // integrated GPU device using host memory
136
+ WSP_GGML_BACKEND_DEVICE_TYPE_IGPU,
135
137
  // accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
136
138
  WSP_GGML_BACKEND_DEVICE_TYPE_ACCEL
137
139
  };
@@ -150,11 +152,21 @@ extern "C" {
150
152
 
151
153
  // all the device properties
152
154
  struct wsp_ggml_backend_dev_props {
155
+ // device name
153
156
  const char * name;
157
+ // device description
154
158
  const char * description;
159
+ // device free memory in bytes
155
160
  size_t memory_free;
161
+ // device total memory in bytes
156
162
  size_t memory_total;
163
+ // device type
157
164
  enum wsp_ggml_backend_dev_type type;
165
+ // device id
166
+ // for PCI devices, this should be the PCI bus id formatted as "domain:bus:device.function" (e.g. "0000:01:00.0")
167
+ // if the id is unknown, this should be NULL
168
+ const char * device_id;
169
+ // device capabilities
158
170
  struct wsp_ggml_backend_dev_caps caps;
159
171
  };
160
172
 
@@ -203,6 +215,8 @@ extern "C" {
203
215
  // Backend registry
204
216
  //
205
217
 
218
+ WSP_GGML_API void wsp_ggml_backend_register(wsp_ggml_backend_reg_t reg);
219
+
206
220
  WSP_GGML_API void wsp_ggml_backend_device_register(wsp_ggml_backend_dev_t device);
207
221
 
208
222
  // Backend (reg) enumeration
@@ -302,11 +316,15 @@ extern "C" {
302
316
  WSP_GGML_API int wsp_ggml_backend_sched_get_n_splits(wsp_ggml_backend_sched_t sched);
303
317
  WSP_GGML_API int wsp_ggml_backend_sched_get_n_copies(wsp_ggml_backend_sched_t sched);
304
318
 
305
- WSP_GGML_API size_t wsp_ggml_backend_sched_get_buffer_size(wsp_ggml_backend_sched_t sched, wsp_ggml_backend_t backend);
319
+ WSP_GGML_API wsp_ggml_backend_buffer_type_t wsp_ggml_backend_sched_get_buffer_type(wsp_ggml_backend_sched_t sched, wsp_ggml_backend_t backend);
320
+ WSP_GGML_API size_t wsp_ggml_backend_sched_get_buffer_size(wsp_ggml_backend_sched_t sched, wsp_ggml_backend_t backend);
306
321
 
307
322
  WSP_GGML_API void wsp_ggml_backend_sched_set_tensor_backend(wsp_ggml_backend_sched_t sched, struct wsp_ggml_tensor * node, wsp_ggml_backend_t backend);
308
323
  WSP_GGML_API wsp_ggml_backend_t wsp_ggml_backend_sched_get_tensor_backend(wsp_ggml_backend_sched_t sched, struct wsp_ggml_tensor * node);
309
324
 
325
+ // Split graph without allocating it
326
+ WSP_GGML_API void wsp_ggml_backend_sched_split_graph(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph);
327
+
310
328
  // Allocate and compute graph on the backend scheduler
311
329
  WSP_GGML_API bool wsp_ggml_backend_sched_alloc_graph(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph); // returns success
312
330
  WSP_GGML_API enum wsp_ggml_status wsp_ggml_backend_sched_graph_compute(wsp_ggml_backend_sched_t sched, struct wsp_ggml_cgraph * graph);
@@ -7,7 +7,7 @@
7
7
  #include "ggml-cpu.h"
8
8
  #include "traits.h"
9
9
 
10
- #if defined(__gnu_linux__)
10
+ #if defined(__linux__)
11
11
  #include <sys/syscall.h>
12
12
  #include <unistd.h>
13
13
  #endif
@@ -149,6 +149,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
149
149
  if (op->op == WSP_GGML_OP_MUL_MAT && is_contiguous_2d(op->src[0]) && // src0 must be contiguous
150
150
  is_contiguous_2d(op->src[1]) && // src1 must be contiguous
151
151
  op->src[0]->buffer && op->src[0]->buffer->buft == wsp_ggml_backend_amx_buffer_type() &&
152
+ op->src[0]->ne[0] % (TILE_K * 2 * 32) == 0 && // TODO: not sure if correct (https://github.com/ggml-org/llama.cpp/pull/16315)
152
153
  op->ne[0] % (TILE_N * 2) == 0 && // out_features is 32x
153
154
  (qtype_has_amx_kernels(op->src[0]->type) || (op->src[0]->type == WSP_GGML_TYPE_F16))) {
154
155
  // src1 must be host buffer
@@ -186,7 +187,7 @@ static size_t wsp_ggml_backend_amx_buffer_type_get_alloc_size(wsp_ggml_backend_b
186
187
  #define XFEATURE_XTILEDATA 18
187
188
 
188
189
  static bool wsp_ggml_amx_init() {
189
- #if defined(__gnu_linux__)
190
+ #if defined(__linux__)
190
191
  if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {
191
192
  fprintf(stderr, "AMX is not ready to be used!\n");
192
193
  return false;
@@ -194,6 +195,8 @@ static bool wsp_ggml_amx_init() {
194
195
  return true;
195
196
  #elif defined(_WIN32)
196
197
  return true;
198
+ #else
199
+ return false;
197
200
  #endif
198
201
  }
199
202
 
@@ -878,7 +878,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * WSP_GGML_RESTRICT s, siz
878
878
  const __m256i rhs_raw_mat_89AB_1 = _mm256_loadu_si256((const __m256i *)(b_ptr_1[b].qs + 64));
879
879
  const __m256i rhs_raw_mat_CDEF_1 = _mm256_loadu_si256((const __m256i *)(b_ptr_1[b].qs + 96));
880
880
 
881
- // Save the values in the following vectors in the formats B0B1B4B5, B2B3B6B7 for further processing and storing of valuess
881
+ // Save the values in the following vectors in the formats B0B1B4B5, B2B3B6B7 for further processing and storing of values
882
882
  const __m256i rhs_raw_mat_0145_0 = _mm256_blend_epi32(rhs_raw_mat_0123_0, _mm256_permutevar8x32_epi32(rhs_raw_mat_4567_0, requiredOrder), 240);
883
883
  const __m256i rhs_raw_mat_2367_0 = _mm256_blend_epi32(_mm256_permutevar8x32_epi32(rhs_raw_mat_0123_0, requiredOrder), rhs_raw_mat_4567_0, 240);
884
884
  const __m256i rhs_raw_mat_0145_1 = _mm256_blend_epi32(rhs_raw_mat_0123_1, _mm256_permutevar8x32_epi32(rhs_raw_mat_4567_1, requiredOrder), 240);
@@ -1231,7 +1231,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * WSP_GGML_RESTRICT s, siz
1231
1231
  const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64));
1232
1232
  const __m256i rhs_raw_mat_4567_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 96));
1233
1233
 
1234
- // Save the values in the following vectors in the formats B0B1B4B5, B2B3B6B7 for further processing and storing of valuess
1234
+ // Save the values in the following vectors in the formats B0B1B4B5, B2B3B6B7 for further processing and storing of values
1235
1235
  const __m256i rhs_raw_mat_0145_0 = _mm256_blend_epi32(rhs_raw_mat_0123_0, _mm256_permutevar8x32_epi32(rhs_raw_mat_4567_0, requiredOrder), 240);
1236
1236
  const __m256i rhs_raw_mat_2367_0 = _mm256_blend_epi32(_mm256_permutevar8x32_epi32(rhs_raw_mat_0123_0, requiredOrder), rhs_raw_mat_4567_0, 240);
1237
1237
  const __m256i rhs_raw_mat_0145_1 = _mm256_blend_epi32(rhs_raw_mat_0123_1, _mm256_permutevar8x32_epi32(rhs_raw_mat_4567_1, requiredOrder), 240);
@@ -73,7 +73,6 @@
73
73
  #define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
74
74
  #define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
75
75
  #define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
76
- #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
77
76
  // repack.cpp
78
77
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
79
78
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
@@ -151,8 +150,6 @@
151
150
  #elif defined(__s390x__)
152
151
  // quants.c
153
152
  #define wsp_quantize_row_q8_K_generic wsp_quantize_row_q8_K
154
- #define wsp_ggml_vec_dot_q5_0_q8_0_generic wsp_ggml_vec_dot_q5_0_q8_0
155
- #define wsp_ggml_vec_dot_q5_1_q8_1_generic wsp_ggml_vec_dot_q5_1_q8_1
156
153
  #define wsp_ggml_vec_dot_tq1_0_q8_K_generic wsp_ggml_vec_dot_tq1_0_q8_K
157
154
  #define wsp_ggml_vec_dot_tq2_0_q8_K_generic wsp_ggml_vec_dot_tq2_0_q8_K
158
155
  #define wsp_ggml_vec_dot_q2_K_q8_K_generic wsp_ggml_vec_dot_q2_K_q8_K
@@ -163,7 +160,6 @@
163
160
  #define wsp_ggml_vec_dot_iq3_s_q8_K_generic wsp_ggml_vec_dot_iq3_s_q8_K
164
161
  #define wsp_ggml_vec_dot_iq1_s_q8_K_generic wsp_ggml_vec_dot_iq1_s_q8_K
165
162
  #define wsp_ggml_vec_dot_iq1_m_q8_K_generic wsp_ggml_vec_dot_iq1_m_q8_K
166
- #define wsp_ggml_vec_dot_mxfp4_q8_0_generic wsp_ggml_vec_dot_mxfp4_q8_0
167
163
  // repack.cpp
168
164
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x4_generic wsp_ggml_wsp_quantize_mat_q8_0_4x4
169
165
  #define wsp_ggml_wsp_quantize_mat_q8_0_4x8_generic wsp_ggml_wsp_quantize_mat_q8_0_4x8
@@ -28,6 +28,14 @@ static inline float bf16_to_f32(wsp_ggml_bf16_t x) {
28
28
  return WSP_GGML_BF16_TO_FP32(x);
29
29
  }
30
30
 
31
+ static inline float i32_to_f32(int32_t x) {
32
+ return x;
33
+ }
34
+
35
+ static inline int32_t f32_to_i32(float x) {
36
+ return x;
37
+ }
38
+
31
39
  static inline float f32_to_f32(float x) {
32
40
  return x;
33
41
  }
@@ -54,6 +62,12 @@ struct type_conversion_table<wsp_ggml_bf16_t> {
54
62
  static constexpr wsp_ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
55
63
  };
56
64
 
65
+ template <>
66
+ struct type_conversion_table<int32_t> {
67
+ static constexpr float (*to_f32)(int32_t) = i32_to_f32;
68
+ static constexpr int32_t (*from_f32)(float) = f32_to_i32;
69
+ };
70
+
57
71
  static std::pair<int64_t, int64_t> get_thread_range(const struct wsp_ggml_compute_params * params, const struct wsp_ggml_tensor * src0) {
58
72
  const int64_t ith = params->ith;
59
73
  const int64_t nth = params->nth;
@@ -68,13 +68,7 @@ struct wsp_ggml_compute_params {
68
68
  #endif // __VXE2__
69
69
  #endif // __s390x__ && __VEC__
70
70
 
71
- #if defined(__s390x__) && defined(WSP_GGML_NNPA)
72
- #ifndef __NNPA__
73
- #define __NNPA__
74
- #endif // __NNPA__
75
- #endif // __s390x__ && WSP_GGML_NNPA
76
-
77
- #if defined(__ARM_FEATURE_SVE)
71
+ #if defined(__ARM_FEATURE_SVE) && defined(__linux__)
78
72
  #include <sys/prctl.h>
79
73
  #endif
80
74
 
@@ -486,6 +480,19 @@ inline static int16x8_t vec_padd_s16(int16x8_t a, int16x8_t b) {
486
480
  return v_abo + v_abe;
487
481
  }
488
482
 
483
+ /**
484
+ * @see https://github.com/ggml-org/llama.cpp/pull/14037
485
+ */
486
+ inline static float vec_hsum_f32x4(float32x4_t v) {
487
+ float32x4_t v_temp = v + vec_reve(v);
488
+ return v_temp[0] + v_temp[1];
489
+ }
490
+
491
+ inline static int32_t vec_hsum_i32x4(int32x4_t v) {
492
+ int32x4_t v_temp = v + vec_reve(v);
493
+ return v_temp[0] + v_temp[1];
494
+ }
495
+
489
496
  inline static int32x4_t wsp_ggml_vec_dot(int32x4_t acc, int8x16_t a, int8x16_t b) {
490
497
  const int16x8_t p = vec_mule(a, b) + vec_mulo(a, b);
491
498
  return acc + (vec_unpackh(p) + vec_unpackl(p));
@@ -373,6 +373,9 @@ static const struct wsp_ggml_type_traits_cpu type_traits_cpu[WSP_GGML_TYPE_COUNT
373
373
  .vec_dot_type = WSP_GGML_TYPE_Q8_K,
374
374
  .nrows = 1,
375
375
  },
376
+ [WSP_GGML_TYPE_I32] = {
377
+ .from_float = (wsp_ggml_from_float_t) wsp_ggml_cpu_fp32_to_i32,
378
+ },
376
379
  };
377
380
 
378
381
  const struct wsp_ggml_type_traits_cpu * wsp_ggml_get_type_traits_cpu(enum wsp_ggml_type type) {
@@ -470,10 +473,10 @@ struct wsp_ggml_threadpool {
470
473
  struct wsp_ggml_compute_state {
471
474
  #ifndef WSP_GGML_USE_OPENMP
472
475
  wsp_ggml_thread_t thrd;
473
- bool cpumask[WSP_GGML_MAX_N_THREADS];
474
476
  int last_graph;
475
477
  bool pending;
476
478
  #endif
479
+ bool cpumask[WSP_GGML_MAX_N_THREADS];
477
480
  struct wsp_ggml_threadpool * threadpool;
478
481
  int ith;
479
482
  };
@@ -686,8 +689,13 @@ bool wsp_ggml_is_numa(void) {
686
689
  #endif
687
690
 
688
691
  static void wsp_ggml_init_arm_arch_features(void) {
689
- #if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
692
+ #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
693
+ #if defined(__linux__)
690
694
  wsp_ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
695
+ #else
696
+ // TODO: add support of SVE for non-linux systems
697
+ #error "TODO: SVE is not supported on this platform. To use SVE, sve_cnt needs to be initialized here."
698
+ #endif
691
699
  #endif
692
700
  }
693
701
 
@@ -1876,10 +1884,18 @@ static void wsp_ggml_compute_forward(struct wsp_ggml_compute_params * params, st
1876
1884
  {
1877
1885
  wsp_ggml_compute_forward_im2col_back_f32(params, tensor);
1878
1886
  } break;
1887
+ case WSP_GGML_OP_IM2COL_3D:
1888
+ {
1889
+ wsp_ggml_compute_forward_im2col_3d(params, tensor);
1890
+ } break;
1879
1891
  case WSP_GGML_OP_CONV_2D:
1880
1892
  {
1881
1893
  wsp_ggml_compute_forward_conv_2d(params, tensor);
1882
1894
  } break;
1895
+ case WSP_GGML_OP_CONV_3D:
1896
+ {
1897
+ wsp_ggml_compute_forward_conv_3d(params, tensor);
1898
+ } break;
1883
1899
  case WSP_GGML_OP_CONV_2D_DW:
1884
1900
  {
1885
1901
  wsp_ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -2168,6 +2184,10 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
2168
2184
  case WSP_GGML_UNARY_OP_HARDSWISH:
2169
2185
  case WSP_GGML_UNARY_OP_HARDSIGMOID:
2170
2186
  case WSP_GGML_UNARY_OP_EXP:
2187
+ case WSP_GGML_UNARY_OP_FLOOR:
2188
+ case WSP_GGML_UNARY_OP_CEIL:
2189
+ case WSP_GGML_UNARY_OP_ROUND:
2190
+ case WSP_GGML_UNARY_OP_TRUNC:
2171
2191
  {
2172
2192
  n_tasks = 1;
2173
2193
  } break;
@@ -2176,6 +2196,7 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
2176
2196
  case WSP_GGML_UNARY_OP_GELU_ERF:
2177
2197
  case WSP_GGML_UNARY_OP_GELU_QUICK:
2178
2198
  case WSP_GGML_UNARY_OP_SILU:
2199
+ case WSP_GGML_UNARY_OP_XIELU:
2179
2200
  {
2180
2201
  n_tasks = n_threads;
2181
2202
  } break;
@@ -2251,7 +2272,9 @@ static int wsp_ggml_get_n_tasks(struct wsp_ggml_tensor * node, int n_threads) {
2251
2272
  } break;
2252
2273
  case WSP_GGML_OP_IM2COL:
2253
2274
  case WSP_GGML_OP_IM2COL_BACK:
2275
+ case WSP_GGML_OP_IM2COL_3D:
2254
2276
  case WSP_GGML_OP_CONV_2D:
2277
+ case WSP_GGML_OP_CONV_3D:
2255
2278
  case WSP_GGML_OP_CONV_2D_DW:
2256
2279
  case WSP_GGML_OP_CONV_TRANSPOSE_1D:
2257
2280
  case WSP_GGML_OP_CONV_TRANSPOSE_2D:
@@ -2686,7 +2709,10 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
2686
2709
  if (wsp_ggml_is_quantized(node->type) ||
2687
2710
  // F16 -> BF16 and BF16 -> F16 copies go through intermediate F32
2688
2711
  (node->src[0]->type == WSP_GGML_TYPE_F16 && node->src[1] && node->src[1]->type == WSP_GGML_TYPE_BF16) ||
2689
- (node->src[0]->type == WSP_GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == WSP_GGML_TYPE_F16)) {
2712
+ (node->src[0]->type == WSP_GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == WSP_GGML_TYPE_F16) ||
2713
+ // conversion between F32 and I32
2714
+ (node->src[0]->type == WSP_GGML_TYPE_F32 && node->src[1] && node->src[1]->type == WSP_GGML_TYPE_I32) ||
2715
+ (node->src[0]->type == WSP_GGML_TYPE_I32 && node->src[1] && node->src[1]->type == WSP_GGML_TYPE_F32)) {
2690
2716
  cur = wsp_ggml_type_size(WSP_GGML_TYPE_F32) * node->ne[0] * n_tasks;
2691
2717
  }
2692
2718
  } break;
@@ -2773,6 +2799,7 @@ struct wsp_ggml_cplan wsp_ggml_graph_plan(
2773
2799
  }
2774
2800
  } break;
2775
2801
  case WSP_GGML_OP_CONV_2D:
2802
+ case WSP_GGML_OP_CONV_3D:
2776
2803
  {
2777
2804
  cur = WSP_GGML_IM2COL_WORK_SIZE;
2778
2805
  } break;
@@ -3064,7 +3091,14 @@ static struct wsp_ggml_threadpool * wsp_ggml_threadpool_new_impl(
3064
3091
 
3065
3092
  threadpool->workers = workers;
3066
3093
 
3067
- #ifndef WSP_GGML_USE_OPENMP
3094
+ #ifdef WSP_GGML_USE_OPENMP
3095
+ int32_t cpumask_iter = 0;
3096
+
3097
+ // Compute CPU masks for each thread
3098
+ for (int j = 0; j < tpp->n_threads; j++) {
3099
+ wsp_ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
3100
+ }
3101
+ #else // WSP_GGML_USE_OPENMP
3068
3102
  wsp_ggml_mutex_init(&threadpool->mutex);
3069
3103
  wsp_ggml_cond_init(&threadpool->cond);
3070
3104
 
@@ -3137,7 +3171,14 @@ enum wsp_ggml_status wsp_ggml_graph_compute(struct wsp_ggml_cgraph * cgraph, str
3137
3171
  atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
3138
3172
  }
3139
3173
 
3140
- wsp_ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
3174
+ // Apply thread CPU mask and priority
3175
+ int ith = omp_get_thread_num();
3176
+
3177
+ wsp_ggml_thread_apply_priority(threadpool->prio);
3178
+ if (wsp_ggml_thread_cpumask_is_valid(threadpool->workers[ith].cpumask)) {
3179
+ wsp_ggml_thread_apply_affinity(threadpool->workers[ith].cpumask);
3180
+ }
3181
+ wsp_ggml_graph_compute_thread(&threadpool->workers[ith]);
3141
3182
  }
3142
3183
  } else {
3143
3184
  atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);
@@ -3200,20 +3241,12 @@ void wsp_ggml_cpu_fp32_to_fp16(const float * x, wsp_ggml_fp16_t * y, int64_t n)
3200
3241
  __m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
3201
3242
  _mm_storel_epi64((__m128i *)(y + i), y_vec);
3202
3243
  }
3203
- #elif defined(__NNPA__)
3204
- for (; i + 7 < n; i += 8) {
3205
- float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
3206
- float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
3207
- uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
3208
- uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3209
- vec_xst(v_y, 0, (wsp_ggml_fp16_t *)(y + i));
3210
- }
3211
- for (; i + 3 < n; i += 4) {
3212
- float32x4_t v_x = vec_xl(0, (const float *)(x + i));
3213
- float32x4_t v_zero = vec_splats(0.0f);
3214
- uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
3215
- uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3216
- vec_xst(v_y, 0, (wsp_ggml_fp16_t *)(y + i));
3244
+ #elif defined(__riscv_zvfh)
3245
+ for (int vl; i < n; i += vl) {
3246
+ vl = __riscv_vsetvl_e32m2(n - i);
3247
+ vfloat32m2_t vx = __riscv_vle32_v_f32m2(&x[i], vl);
3248
+ vfloat16m1_t vy = __riscv_vfncvt_f_f_w_f16m1(vx, vl);
3249
+ __riscv_vse16_v_f16m1((_Float16 *)&y[i], vy, vl);
3217
3250
  }
3218
3251
  #endif
3219
3252
  for (; i < n; ++i) {
@@ -3241,21 +3274,6 @@ void wsp_ggml_cpu_fp16_to_fp32(const wsp_ggml_fp16_t * x, float * y, int64_t n)
3241
3274
  __m128 y_vec = _mm_cvtph_ps(x_vec);
3242
3275
  _mm_storeu_ps(y + i, y_vec);
3243
3276
  }
3244
- #elif defined(__NNPA__)
3245
- for (; i + 7 < n; i += 8) {
3246
- uint16x8_t v_x = vec_xl(0, (const wsp_ggml_fp16_t *)(x + i));
3247
- uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
3248
- float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
3249
- float32x4_t v_yl = vec_extend_to_fp32_lo(v_yd, 0);
3250
- vec_xst(v_yh, 0, (float *)(y + i + 0));
3251
- vec_xst(v_yl, 0, (float *)(y + i + 4));
3252
- }
3253
- for (; i + 3 < n; i += 4) {
3254
- uint16x8_t v_x = vec_xl(0, (const wsp_ggml_fp16_t *)(x + i));
3255
- uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
3256
- float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
3257
- vec_xst(v_yh, 0, (float *)(y + i));
3258
- }
3259
3277
  #endif
3260
3278
 
3261
3279
  for (; i < n; ++i) {
@@ -3270,6 +3288,13 @@ void wsp_ggml_cpu_fp32_to_bf16(const float * x, wsp_ggml_bf16_t * y, int64_t n)
3270
3288
  }
3271
3289
  }
3272
3290
 
3291
+ void wsp_ggml_cpu_fp32_to_i32(const float * x, int32_t * y, int64_t n) {
3292
+ int64_t i = 0;
3293
+ for (; i < n; ++i) {
3294
+ y[i] = x[i];
3295
+ }
3296
+ }
3297
+
3273
3298
  void wsp_ggml_cpu_bf16_to_fp32(const wsp_ggml_bf16_t * x, float * y, int64_t n) {
3274
3299
  int64_t i = 0;
3275
3300
  #if defined(__AVX2__)
@@ -3459,14 +3484,6 @@ int wsp_ggml_cpu_has_vxe(void) {
3459
3484
  #endif
3460
3485
  }
3461
3486
 
3462
- int wsp_ggml_cpu_has_nnpa(void) {
3463
- #if defined(WSP_GGML_NNPA)
3464
- return 1;
3465
- #else
3466
- return 0;
3467
- #endif
3468
- }
3469
-
3470
3487
  int wsp_ggml_cpu_has_neon(void) {
3471
3488
  #if defined(__ARM_ARCH) && defined(__ARM_NEON)
3472
3489
  return 1;
@@ -3550,13 +3567,17 @@ void wsp_ggml_cpu_init(void) {
3550
3567
  #ifdef WSP_GGML_USE_OPENMP
3551
3568
  //if (!getenv("OMP_WAIT_POLICY")) {
3552
3569
  // // set the wait policy to active, so that OpenMP threads don't sleep
3553
- // putenv("OMP_WAIT_POLICY=active");
3570
+ // setenv("OMP_WAIT_POLICY", "active", 0)
3554
3571
  //}
3555
3572
 
3556
3573
  if (!getenv("KMP_BLOCKTIME")) {
3557
3574
  // set the time to wait before sleeping a thread
3558
3575
  // this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases
3559
- putenv("KMP_BLOCKTIME=200"); // 200ms
3576
+ #ifdef _WIN32
3577
+ _putenv_s("KMP_BLOCKTIME", "200"); // 200ms
3578
+ #else
3579
+ setenv("KMP_BLOCKTIME", "200", 0); // 200ms
3580
+ #endif
3560
3581
  }
3561
3582
  #endif
3562
3583
  }
@@ -18,6 +18,10 @@
18
18
  # include "kleidiai/kleidiai.h"
19
19
  #endif
20
20
 
21
+ #ifdef WSP_GGML_USE_CPU_RISCV64_SPACEMIT
22
+ # include "spacemit/ime.h"
23
+ #endif
24
+
21
25
  #if defined(_WIN32)
22
26
  # define WIN32_LEAN_AND_MEAN
23
27
  # ifndef NOMINMAX
@@ -45,6 +49,12 @@ std::vector<wsp_ggml_backend_buffer_type_t> & wsp_ggml_backend_cpu_get_extra_buf
45
49
  }
46
50
  #endif
47
51
 
52
+ #ifdef WSP_GGML_USE_CPU_RISCV64_SPACEMIT
53
+ if (wsp_ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
54
+ bufts.push_back(wsp_ggml_backend_cpu_riscv64_spacemit_buffer_type());
55
+ }
56
+ #endif
57
+
48
58
  #ifdef WSP_GGML_USE_CPU_KLEIDIAI
49
59
  if (wsp_ggml_backend_cpu_kleidiai_buffer_type()) {
50
60
  bufts.push_back(wsp_ggml_backend_cpu_kleidiai_buffer_type());
@@ -190,6 +200,7 @@ static const struct wsp_ggml_backend_i wsp_ggml_backend_cpu_i = {
190
200
  /* .graph_compute = */ wsp_ggml_backend_cpu_graph_compute,
191
201
  /* .event_record = */ NULL,
192
202
  /* .event_wait = */ NULL,
203
+ /* .graph_optimize = */ NULL,
193
204
  };
194
205
 
195
206
  static wsp_ggml_guid_t wsp_ggml_backend_cpu_guid(void) {
@@ -348,8 +359,10 @@ static void wsp_ggml_backend_cpu_device_get_memory(wsp_ggml_backend_dev_t dev, s
348
359
  long pages = sysconf(_SC_PHYS_PAGES);
349
360
  long page_size = sysconf(_SC_PAGE_SIZE);
350
361
  *total = pages * page_size;
362
+
363
+ // "free" system memory is ill-defined, for practical purposes assume that all of it is free:
351
364
  *free = *total;
352
- #endif
365
+ #endif // _WIN32
353
366
 
354
367
  WSP_GGML_UNUSED(dev);
355
368
  }
@@ -576,9 +589,6 @@ static wsp_ggml_backend_feature * wsp_ggml_backend_cpu_get_features(wsp_ggml_bac
576
589
  if (wsp_ggml_cpu_has_vxe()) {
577
590
  features.push_back({ "VXE", "1" });
578
591
  }
579
- if (wsp_ggml_cpu_has_nnpa()) {
580
- features.push_back({ "NNPA", "1" });
581
- }
582
592
  if (wsp_ggml_cpu_has_wasm_simd()) {
583
593
  features.push_back({ "WASM_SIMD", "1" });
584
594
  }