whispercpp 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -3
  3. data/README.md +71 -14
  4. data/Rakefile +20 -7
  5. data/ext/.gitignore +4 -6
  6. data/ext/dependencies.rb +36 -24
  7. data/ext/extconf.rb +1 -1
  8. data/ext/options.rb +48 -184
  9. data/ext/ruby_whisper.c +18 -0
  10. data/ext/ruby_whisper_context.c +43 -12
  11. data/ext/ruby_whisper_model.c +1 -1
  12. data/ext/ruby_whisper_params.c +4 -2
  13. data/ext/ruby_whisper_segment.c +81 -4
  14. data/ext/ruby_whisper_transcribe.cpp +13 -7
  15. data/ext/ruby_whisper_vad_params.c +1 -1
  16. data/ext/sources/CMakeLists.txt +5 -1
  17. data/ext/sources/bindings/javascript/package.json +1 -1
  18. data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
  19. data/ext/sources/examples/addon.node/addon.cpp +150 -31
  20. data/ext/sources/examples/addon.node/index.js +3 -0
  21. data/ext/sources/examples/addon.node/vad-example.js +132 -0
  22. data/ext/sources/examples/bench/bench.cpp +3 -2
  23. data/ext/sources/examples/cli/cli.cpp +3 -2
  24. data/ext/sources/examples/command/command.cpp +32 -8
  25. data/ext/sources/examples/common-whisper.cpp +14 -7
  26. data/ext/sources/examples/lsp/lsp.cpp +2 -0
  27. data/ext/sources/examples/quantize/quantize.cpp +3 -0
  28. data/ext/sources/examples/server/CMakeLists.txt +3 -0
  29. data/ext/sources/examples/server/server.cpp +169 -22
  30. data/ext/sources/examples/stream/stream.cpp +6 -0
  31. data/ext/sources/examples/talk-llama/CMakeLists.txt +4 -1
  32. data/ext/sources/examples/talk-llama/llama-arch.cpp +171 -3
  33. data/ext/sources/examples/talk-llama/llama-arch.h +28 -1
  34. data/ext/sources/examples/talk-llama/llama-batch.cpp +741 -272
  35. data/ext/sources/examples/talk-llama/llama-batch.h +112 -54
  36. data/ext/sources/examples/talk-llama/llama-chat.cpp +30 -8
  37. data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
  38. data/ext/sources/examples/talk-llama/llama-context.cpp +520 -351
  39. data/ext/sources/examples/talk-llama/llama-context.h +38 -17
  40. data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
  41. data/ext/sources/examples/talk-llama/llama-cparams.h +1 -1
  42. data/ext/sources/examples/talk-llama/llama-graph.cpp +447 -372
  43. data/ext/sources/examples/talk-llama/llama-graph.h +128 -58
  44. data/ext/sources/examples/talk-llama/llama-hparams.cpp +10 -2
  45. data/ext/sources/examples/talk-llama/llama-hparams.h +19 -2
  46. data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
  47. data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
  48. data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
  49. data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
  50. data/ext/sources/examples/talk-llama/llama-kv-cache.h +14 -472
  51. data/ext/sources/examples/talk-llama/llama-kv-cells.h +86 -26
  52. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
  53. data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
  54. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
  55. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
  56. data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
  57. data/ext/sources/examples/talk-llama/llama-memory.h +88 -4
  58. data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
  59. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +42 -17
  60. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
  61. data/ext/sources/examples/talk-llama/llama-model.cpp +1863 -563
  62. data/ext/sources/examples/talk-llama/llama-model.h +27 -0
  63. data/ext/sources/examples/talk-llama/llama-quant.cpp +89 -6
  64. data/ext/sources/examples/talk-llama/llama-vocab.cpp +65 -28
  65. data/ext/sources/examples/talk-llama/llama-vocab.h +1 -0
  66. data/ext/sources/examples/talk-llama/llama.cpp +11 -7
  67. data/ext/sources/examples/talk-llama/llama.h +147 -40
  68. data/ext/sources/examples/talk-llama/talk-llama.cpp +2 -0
  69. data/ext/sources/examples/talk-llama/unicode.cpp +5 -0
  70. data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
  71. data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +2 -0
  72. data/ext/sources/ggml/CMakeLists.txt +48 -3
  73. data/ext/sources/ggml/cmake/common.cmake +24 -0
  74. data/ext/sources/ggml/include/ggml-backend.h +1 -1
  75. data/ext/sources/ggml/include/ggml-cpu.h +2 -0
  76. data/ext/sources/ggml/include/ggml.h +144 -5
  77. data/ext/sources/ggml/src/CMakeLists.txt +82 -24
  78. data/ext/sources/ggml/src/ggml-backend-reg.cpp +5 -0
  79. data/ext/sources/ggml/src/ggml-backend.cpp +46 -23
  80. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +3 -3
  81. data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  82. data/ext/sources/ggml/src/ggml-cann/common.h +6 -1
  83. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
  84. data/ext/sources/ggml/src/ggml-common.h +4 -0
  85. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +133 -40
  86. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  87. data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
  88. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  89. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
  90. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
  91. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
  92. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  93. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
  94. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
  95. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
  96. data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
  97. data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
  98. data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
  99. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  100. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  101. data/ext/sources/ggml/src/ggml-cpu/common.h +4 -3
  102. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
  103. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +146 -105
  104. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
  105. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  106. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  107. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
  108. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  109. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1057 -174
  110. data/ext/sources/ggml/src/ggml-cpu/ops.h +8 -0
  111. data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
  112. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  113. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
  114. data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
  115. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +330 -38
  116. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  117. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +111 -18
  118. data/ext/sources/ggml/src/ggml-cpu/vec.h +303 -94
  119. data/ext/sources/ggml/src/ggml-cuda/common.cuh +60 -37
  120. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  121. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  122. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  123. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  124. data/ext/sources/ggml/src/ggml-cuda/convert.cu +22 -0
  125. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +5 -0
  126. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +2 -2
  127. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
  128. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
  129. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +265 -123
  130. data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
  131. data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
  132. data/ext/sources/ggml/src/ggml-cuda/mmv.cu +257 -87
  133. data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +2 -3
  134. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
  135. data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +5 -18
  136. data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  137. data/ext/sources/ggml/src/ggml-cuda/unary.cu +89 -0
  138. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +7 -0
  139. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +4 -0
  140. data/ext/sources/ggml/src/ggml-impl.h +127 -183
  141. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +11 -10
  142. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +27 -0
  143. data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +331 -49
  144. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +564 -282
  145. data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
  146. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +14 -0
  147. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1859 -489
  148. data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  149. data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  150. data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  151. data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
  152. data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  153. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  154. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  155. data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  156. data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  157. data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  158. data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  159. data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  160. data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  161. data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  162. data/ext/sources/ggml/src/ggml-quants.c +6 -8
  163. data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
  164. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
  165. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  166. data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
  167. data/ext/sources/ggml/src/ggml-sycl/concat.cpp +28 -41
  168. data/ext/sources/ggml/src/ggml-sycl/conv.cpp +4 -10
  169. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +117 -165
  170. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +192 -53
  171. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  172. data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  173. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  174. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +648 -1039
  175. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  176. data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +3 -0
  177. data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +8 -105
  178. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -100
  179. data/ext/sources/ggml/src/ggml-sycl/gla.cpp +2 -2
  180. data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
  181. data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +60 -80
  182. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +158 -203
  183. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +55 -74
  184. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -10
  185. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +138 -27
  186. data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +3 -3
  187. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  188. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  189. data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  190. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
  191. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +12 -16
  192. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
  193. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +726 -282
  194. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
  195. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  196. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  197. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  198. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  199. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  200. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
  201. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  202. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +10 -1
  203. data/ext/sources/ggml/src/ggml.c +328 -48
  204. data/ext/sources/ggml/src/ggml.cpp +26 -0
  205. data/ext/sources/ggml/src/gguf.cpp +24 -3
  206. data/ext/sources/include/whisper.h +2 -0
  207. data/ext/sources/src/CMakeLists.txt +2 -0
  208. data/ext/sources/src/coreml/whisper-compat.h +10 -0
  209. data/ext/sources/src/coreml/whisper-compat.m +35 -0
  210. data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
  211. data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
  212. data/ext/sources/src/whisper.cpp +218 -169
  213. data/extsources.rb +15 -9
  214. data/lib/whisper/context.rb +15 -0
  215. data/lib/whisper/model/uri.rb +56 -1
  216. data/lib/whisper/segment.rb +58 -0
  217. data/sig/whisper.rbs +68 -38
  218. data/{tests → test}/helper.rb +1 -12
  219. data/{tests → test}/test_model.rb +9 -0
  220. data/test/test_package.rb +51 -0
  221. data/test/test_segment.rb +146 -0
  222. data/{tests → test}/test_whisper.rb +70 -0
  223. data/whispercpp.gemspec +2 -3
  224. metadata +91 -43
  225. data/ext/sources/.dockerignore +0 -3
  226. data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
  227. data/ext/sources/ci/run.sh +0 -336
  228. data/ext/sources/close-issue.yml +0 -28
  229. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +0 -2739
  230. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  231. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
  232. data/tests/test_package.rb +0 -46
  233. data/tests/test_segment.rb +0 -74
  234. /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  235. /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  236. /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  237. /data/{tests → test}/jfk_reader/.gitignore +0 -0
  238. /data/{tests → test}/jfk_reader/extconf.rb +0 -0
  239. /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
  240. /data/{tests → test}/test_callback.rb +0 -0
  241. /data/{tests → test}/test_error.rb +0 -0
  242. /data/{tests → test}/test_params.rb +0 -0
  243. /data/{tests → test}/test_vad.rb +0 -0
  244. /data/{tests → test}/test_vad_params.rb +0 -0
@@ -3,11 +3,11 @@
3
3
 
4
4
  #include "ggml-backend-impl.h"
5
5
  #include "ggml-backend.h"
6
- #include "ggml-cpu-traits.h"
6
+ #include "traits.h"
7
7
  #include "ggml-cpu-impl.h"
8
8
  #include "ggml-cpu.h"
9
9
  #include "ggml-impl.h"
10
- #include "ggml-cpu-quants.h"
10
+ #include "quants.h"
11
11
  #include "ggml-threading.h"
12
12
  #include "unary-ops.h"
13
13
  #include "binary-ops.h"
@@ -72,15 +72,13 @@
72
72
  #define UNUSED GGML_UNUSED
73
73
  #define SWAP(x, y, T) do { T SWAP = x; (x) = y; (y) = SWAP; } while (0)
74
74
 
75
+ // precomputed f32 table for f16 (256 KB) (simd-mappings.h)
76
+ float ggml_table_f32_f16[1 << 16];
77
+
75
78
  #if defined(__ARM_ARCH)
76
79
  struct ggml_arm_arch_features_type {
77
- int has_neon;
78
- int has_dotprod;
79
- int has_i8mm;
80
- int has_sve;
81
80
  int sve_cnt;
82
- int has_sme;
83
- } ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
81
+ } ggml_arm_arch_features = { 0 };
84
82
  #endif
85
83
 
86
84
 
@@ -197,6 +195,7 @@ typedef pthread_t ggml_thread_t;
197
195
 
198
196
  static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
199
197
  [GGML_TYPE_F32] = {
198
+ .from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
200
199
  .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
201
200
  .vec_dot_type = GGML_TYPE_F32,
202
201
  .nrows = 1,
@@ -270,7 +269,11 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
270
269
  .from_float = quantize_row_q4_K,
271
270
  .vec_dot = ggml_vec_dot_q4_K_q8_K,
272
271
  .vec_dot_type = GGML_TYPE_Q8_K,
272
+ #if defined (__ARM_FEATURE_MATMUL_INT8)
273
+ .nrows = 2,
274
+ #else
273
275
  .nrows = 1,
276
+ #endif
274
277
  },
275
278
  [GGML_TYPE_Q5_K] = {
276
279
  .from_float = quantize_row_q5_K,
@@ -555,6 +558,14 @@ void ggml_barrier(struct ggml_threadpool * tp) {
555
558
  #endif
556
559
  }
557
560
 
561
+ void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value) {
562
+ atomic_store_explicit(&tp->current_chunk, value, memory_order_relaxed);
563
+ }
564
+
565
+ int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value) {
566
+ return atomic_fetch_add_explicit(&tp->current_chunk, value, memory_order_relaxed);
567
+ }
568
+
558
569
  #if defined(__gnu_linux__)
559
570
  static cpu_set_t ggml_get_numa_affinity(void) {
560
571
  cpu_set_t cpuset;
@@ -666,87 +677,15 @@ bool ggml_is_numa(void) {
666
677
 
667
678
  #if defined(__linux__) && defined(__aarch64__)
668
679
  #include <sys/auxv.h>
669
- #elif defined(__APPLE__)
670
- #include <sys/sysctl.h>
671
- #endif
672
-
673
- #if !defined(HWCAP2_I8MM)
674
- #define HWCAP2_I8MM (1 << 13)
675
- #endif
676
-
677
- #if !defined(HWCAP2_SME)
678
- #define HWCAP2_SME (1 << 23)
679
680
  #endif
680
681
 
681
682
  static void ggml_init_arm_arch_features(void) {
682
- #if defined(__linux__) && defined(__aarch64__)
683
- uint32_t hwcap = getauxval(AT_HWCAP);
684
- uint32_t hwcap2 = getauxval(AT_HWCAP2);
685
-
686
- ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
687
- ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
688
- ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
689
- ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
690
- ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);
691
-
692
- #if defined(__ARM_FEATURE_SVE)
683
+ #if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
693
684
  ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
694
685
  #endif
695
- #elif defined(__APPLE__)
696
- int oldp = 0;
697
- size_t size = sizeof(oldp);
698
- if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
699
- oldp = 0;
700
- }
701
- ggml_arm_arch_features.has_neon = oldp;
702
-
703
- if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
704
- oldp = 0;
705
- }
706
- ggml_arm_arch_features.has_dotprod = oldp;
707
-
708
- if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
709
- oldp = 0;
710
- }
711
- ggml_arm_arch_features.has_i8mm = oldp;
712
-
713
- if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) {
714
- oldp = 0;
715
- }
716
- ggml_arm_arch_features.has_sme = oldp;
717
-
718
- ggml_arm_arch_features.has_sve = 0;
719
- ggml_arm_arch_features.sve_cnt = 0;
720
- #else
721
- // Run-time CPU feature detection not implemented for this platform, fallback to compile time
722
- #if defined(__ARM_NEON)
723
- ggml_arm_arch_features.has_neon = 1;
724
- #else
725
- ggml_arm_arch_features.has_neon = 0;
726
- #endif
727
-
728
- #if defined(__ARM_FEATURE_MATMUL_INT8)
729
- ggml_arm_arch_features.has_i8mm = 1;
730
- #else
731
- ggml_arm_arch_features.has_i8mm = 0;
732
- #endif
733
-
734
- #if defined(__ARM_FEATURE_SVE)
735
- ggml_arm_arch_features.has_sve = 1;
736
- ggml_arm_arch_features.sve_cnt = 16;
737
- #else
738
- ggml_arm_arch_features.has_sve = 0;
739
- ggml_arm_arch_features.sve_cnt = 0;
740
- #endif
741
-
742
- #if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
743
- ggml_arm_arch_features.has_sme = 1;
744
- #else
745
- ggml_arm_arch_features.has_sme = 0;
746
- #endif
747
- #endif
748
686
  }
749
- #endif
687
+
688
+ #endif // __ARM_ARCH
750
689
 
751
690
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
752
691
  GGML_ASSERT(!ggml_get_no_alloc(ctx));
@@ -801,7 +740,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
801
740
  {
802
741
  assert(tensor->nb[0] == sizeof(ggml_fp16_t));
803
742
  for (int i = 0; i < n; i++) {
804
- ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_FP32_TO_FP16(value));
743
+ ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
805
744
  }
806
745
  } break;
807
746
  case GGML_TYPE_BF16:
@@ -860,7 +799,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
860
799
  {
861
800
  assert(tensor->nb[0] == sizeof(ggml_fp16_t));
862
801
  for (int i = 0; i < n; i++) {
863
- ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_FP32_TO_FP16(value));
802
+ ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
864
803
  }
865
804
  } break;
866
805
  case GGML_TYPE_BF16:
@@ -911,7 +850,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
911
850
  case GGML_TYPE_F16:
912
851
  {
913
852
  GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
914
- return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
853
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
915
854
  }
916
855
  case GGML_TYPE_BF16:
917
856
  {
@@ -956,7 +895,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
956
895
  case GGML_TYPE_F16:
957
896
  {
958
897
  GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
959
- ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
898
+ ((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
960
899
  } break;
961
900
  case GGML_TYPE_BF16:
962
901
  {
@@ -985,7 +924,7 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
985
924
  case GGML_TYPE_I32:
986
925
  return ((int32_t *) data)[0];
987
926
  case GGML_TYPE_F16:
988
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
927
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
989
928
  case GGML_TYPE_BF16:
990
929
  return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
991
930
  case GGML_TYPE_F32:
@@ -1012,7 +951,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
1012
951
  } break;
1013
952
  case GGML_TYPE_F16:
1014
953
  {
1015
- ((ggml_fp16_t *)(data))[0] = GGML_FP32_TO_FP16(value);
954
+ ((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
1016
955
  } break;
1017
956
  case GGML_TYPE_BF16:
1018
957
  {
@@ -1050,7 +989,7 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
1050
989
  }
1051
990
  case GGML_TYPE_F16:
1052
991
  {
1053
- return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
992
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
1054
993
  }
1055
994
  case GGML_TYPE_BF16:
1056
995
  {
@@ -1089,7 +1028,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
1089
1028
  } break;
1090
1029
  case GGML_TYPE_F16:
1091
1030
  {
1092
- ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
1031
+ ((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
1093
1032
  } break;
1094
1033
  case GGML_TYPE_BF16:
1095
1034
  {
@@ -1116,7 +1055,7 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
1116
1055
  case GGML_TYPE_I32:
1117
1056
  return ((int32_t *) data)[0];
1118
1057
  case GGML_TYPE_F16:
1119
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
1058
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
1120
1059
  case GGML_TYPE_BF16:
1121
1060
  return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
1122
1061
  case GGML_TYPE_F32:
@@ -1143,7 +1082,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
1143
1082
  } break;
1144
1083
  case GGML_TYPE_F16:
1145
1084
  {
1146
- ((ggml_fp16_t *)(data))[0] = GGML_FP32_TO_FP16(value);
1085
+ ((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
1147
1086
  } break;
1148
1087
  case GGML_TYPE_BF16:
1149
1088
  {
@@ -1254,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
1254
1193
  }
1255
1194
  }
1256
1195
 
1257
- static void ggml_compute_forward_mul_mat(
1196
+ void ggml_compute_forward_mul_mat(
1258
1197
  const struct ggml_compute_params * params,
1259
1198
  struct ggml_tensor * dst) {
1260
1199
 
@@ -1879,6 +1818,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1879
1818
  {
1880
1819
  ggml_compute_forward_get_rows_back(params, tensor);
1881
1820
  } break;
1821
+ case GGML_OP_SET_ROWS:
1822
+ {
1823
+ ggml_compute_forward_set_rows(params, tensor);
1824
+ } break;
1882
1825
  case GGML_OP_DIAG:
1883
1826
  {
1884
1827
  ggml_compute_forward_diag(params, tensor);
@@ -1923,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1923
1866
  {
1924
1867
  ggml_compute_forward_im2col_back_f32(params, tensor);
1925
1868
  } break;
1869
+ case GGML_OP_CONV_2D:
1870
+ {
1871
+ ggml_compute_forward_conv_2d(params, tensor);
1872
+ } break;
1926
1873
  case GGML_OP_CONV_2D_DW:
1927
1874
  {
1928
1875
  ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -1955,6 +1902,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1955
1902
  {
1956
1903
  ggml_compute_forward_pad_reflect_1d(params, tensor);
1957
1904
  } break;
1905
+ case GGML_OP_ROLL:
1906
+ {
1907
+ ggml_compute_forward_roll(params, tensor);
1908
+ } break;
1958
1909
  case GGML_OP_ARANGE:
1959
1910
  {
1960
1911
  ggml_compute_forward_arange(params, tensor);
@@ -2002,6 +1953,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
2002
1953
  {
2003
1954
  ggml_compute_forward_unary(params, tensor);
2004
1955
  } break;
1956
+ case GGML_OP_GLU:
1957
+ {
1958
+ ggml_compute_forward_glu(params, tensor);
1959
+ } break;
2005
1960
  case GGML_OP_GET_REL_POS:
2006
1961
  {
2007
1962
  ggml_compute_forward_get_rel_pos(params, tensor);
@@ -2212,6 +2167,18 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2212
2167
  GGML_ABORT("fatal error");
2213
2168
  }
2214
2169
  break;
2170
+ case GGML_OP_GLU:
2171
+ switch (ggml_get_glu_op(node)) {
2172
+ case GGML_GLU_OP_REGLU:
2173
+ case GGML_GLU_OP_GEGLU:
2174
+ case GGML_GLU_OP_SWIGLU:
2175
+ {
2176
+ n_tasks = n_threads;
2177
+ } break;
2178
+ default:
2179
+ GGML_ABORT("fatal error");
2180
+ }
2181
+ break;
2215
2182
  case GGML_OP_SILU_BACK:
2216
2183
  case GGML_OP_MUL:
2217
2184
  case GGML_OP_DIV:
@@ -2228,6 +2195,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2228
2195
  n_tasks = n_threads;
2229
2196
  } break;
2230
2197
  case GGML_OP_GET_ROWS:
2198
+ case GGML_OP_SET_ROWS:
2231
2199
  {
2232
2200
  // FIXME: get_rows can use additional threads, but the cost of launching additional threads
2233
2201
  // decreases performance with GPU offloading
@@ -2264,6 +2232,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2264
2232
  } break;
2265
2233
  case GGML_OP_IM2COL:
2266
2234
  case GGML_OP_IM2COL_BACK:
2235
+ case GGML_OP_CONV_2D:
2267
2236
  case GGML_OP_CONV_2D_DW:
2268
2237
  case GGML_OP_CONV_TRANSPOSE_1D:
2269
2238
  case GGML_OP_CONV_TRANSPOSE_2D:
@@ -2279,6 +2248,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2279
2248
  case GGML_OP_UPSCALE:
2280
2249
  case GGML_OP_PAD:
2281
2250
  case GGML_OP_PAD_REFLECT_1D:
2251
+ case GGML_OP_ROLL:
2282
2252
  case GGML_OP_ARANGE:
2283
2253
  case GGML_OP_TIMESTEP_EMBEDDING:
2284
2254
  case GGML_OP_ARGSORT:
@@ -2414,12 +2384,32 @@ static bool ggml_thread_apply_priority(int32_t prio) {
2414
2384
  // This is up to the applications.
2415
2385
  DWORD p = THREAD_PRIORITY_NORMAL;
2416
2386
  switch (prio) {
2387
+ case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
2417
2388
  case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
2418
2389
  case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
2419
2390
  case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
2420
2391
  case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
2421
2392
  }
2422
2393
 
2394
+ if (prio != GGML_SCHED_PRIO_LOW) {
2395
+ // Tell Windows that this thread should not be throttled (needs its own CPU core).
2396
+ // Newer Windows 11 versions aggresively park (offline) CPU cores and often place
2397
+ // all our threads onto the first 4 cores which results in terrible performance with
2398
+ // n_threads > 4
2399
+ #if _WIN32_WINNT >= 0x0602
2400
+ THREAD_POWER_THROTTLING_STATE t;
2401
+ ZeroMemory(&t, sizeof(t));
2402
+ t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
2403
+ t.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
2404
+ t.StateMask = 0;
2405
+
2406
+ if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &t, sizeof(t))) {
2407
+ GGML_LOG_DEBUG("failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
2408
+ return false;
2409
+ }
2410
+ #endif
2411
+ }
2412
+
2423
2413
  if (prio == GGML_SCHED_PRIO_NORMAL) {
2424
2414
  // Keep inherited policy/priority
2425
2415
  return true;
@@ -2447,6 +2437,8 @@ static bool ggml_thread_apply_priority(int32_t prio) {
2447
2437
  struct sched_param p;
2448
2438
  int32_t policy = SCHED_OTHER;
2449
2439
  switch (prio) {
2440
+ // TODO: there seems to be no way to set lower prio on Apple platforms
2441
+ case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
2450
2442
  case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
2451
2443
  case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
2452
2444
  case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
@@ -2503,6 +2495,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
2503
2495
  struct sched_param p;
2504
2496
  int32_t policy = SCHED_OTHER;
2505
2497
  switch (prio) {
2498
+ case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
2506
2499
  case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
2507
2500
  case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
2508
2501
  case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
@@ -2758,6 +2751,10 @@ struct ggml_cplan ggml_graph_plan(
2758
2751
  GGML_ABORT("fatal error");
2759
2752
  }
2760
2753
  } break;
2754
+ case GGML_OP_CONV_2D:
2755
+ {
2756
+ cur = GGML_IM2COL_WORK_SIZE;
2757
+ } break;
2761
2758
  case GGML_OP_CONV_TRANSPOSE_2D:
2762
2759
  {
2763
2760
  const int64_t ne00 = node->src[0]->ne[0]; // W
@@ -3158,6 +3155,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
3158
3155
  return ggml_graph_compute(cgraph, &cplan);
3159
3156
  }
3160
3157
 
3158
+ void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) {
3159
+ memcpy(y, x, n * sizeof(float));
3160
+ }
3161
+
3161
3162
  void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
3162
3163
  int64_t i = 0;
3163
3164
  #if defined(__F16C__)
@@ -3178,9 +3179,24 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
3178
3179
  __m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
3179
3180
  _mm_storel_epi64((__m128i *)(y + i), y_vec);
3180
3181
  }
3182
+ #elif defined(__NNPA__)
3183
+ for (; i + 7 < n; i += 8) {
3184
+ float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
3185
+ float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
3186
+ uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
3187
+ uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3188
+ vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
3189
+ }
3190
+ for (; i + 3 < n; i += 4) {
3191
+ float32x4_t v_x = vec_xl(0, (const float *)(x + i));
3192
+ float32x4_t v_zero = vec_splats(0.0f);
3193
+ uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
3194
+ uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3195
+ vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
3196
+ }
3181
3197
  #endif
3182
3198
  for (; i < n; ++i) {
3183
- y[i] = GGML_FP32_TO_FP16(x[i]);
3199
+ y[i] = GGML_CPU_FP32_TO_FP16(x[i]);
3184
3200
  }
3185
3201
  }
3186
3202
 
@@ -3204,9 +3220,25 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
3204
3220
  __m128 y_vec = _mm_cvtph_ps(x_vec);
3205
3221
  _mm_storeu_ps(y + i, y_vec);
3206
3222
  }
3223
+ #elif defined(__NNPA__)
3224
+ for (; i + 7 < n; i += 8) {
3225
+ uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
3226
+ uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
3227
+ float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
3228
+ float32x4_t v_yl = vec_extend_to_fp32_lo(v_yd, 0);
3229
+ vec_xst(v_yh, 0, (float *)(y + i + 0));
3230
+ vec_xst(v_yl, 0, (float *)(y + i + 4));
3231
+ }
3232
+ for (; i + 3 < n; i += 4) {
3233
+ uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
3234
+ uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
3235
+ float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
3236
+ vec_xst(v_yh, 0, (float *)(y + i));
3237
+ }
3207
3238
  #endif
3239
+
3208
3240
  for (; i < n; ++i) {
3209
- y[i] = GGML_FP16_TO_FP32(x[i]);
3241
+ y[i] = GGML_CPU_FP16_TO_FP32(x[i]);
3210
3242
  }
3211
3243
  }
3212
3244
 
@@ -3406,9 +3438,17 @@ int ggml_cpu_has_vxe(void) {
3406
3438
  #endif
3407
3439
  }
3408
3440
 
3441
+ int ggml_cpu_has_nnpa(void) {
3442
+ #if defined(GGML_NNPA)
3443
+ return 1;
3444
+ #else
3445
+ return 0;
3446
+ #endif
3447
+ }
3448
+
3409
3449
  int ggml_cpu_has_neon(void) {
3410
3450
  #if defined(__ARM_ARCH) && defined(__ARM_NEON)
3411
- return ggml_arm_arch_features.has_neon;
3451
+ return 1;
3412
3452
  #else
3413
3453
  return 0;
3414
3454
  #endif
@@ -3416,7 +3456,7 @@ int ggml_cpu_has_neon(void) {
3416
3456
 
3417
3457
  int ggml_cpu_has_dotprod(void) {
3418
3458
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
3419
- return ggml_arm_arch_features.has_dotprod;
3459
+ return 1;
3420
3460
  #else
3421
3461
  return 0;
3422
3462
  #endif
@@ -3424,7 +3464,7 @@ int ggml_cpu_has_dotprod(void) {
3424
3464
 
3425
3465
  int ggml_cpu_has_sve(void) {
3426
3466
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
3427
- return ggml_arm_arch_features.has_sve;
3467
+ return 1;
3428
3468
  #else
3429
3469
  return 0;
3430
3470
  #endif
@@ -3432,7 +3472,7 @@ int ggml_cpu_has_sve(void) {
3432
3472
 
3433
3473
  int ggml_cpu_has_matmul_int8(void) {
3434
3474
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
3435
- return ggml_arm_arch_features.has_i8mm;
3475
+ return 1;
3436
3476
  #else
3437
3477
  return 0;
3438
3478
  #endif
@@ -3448,14 +3488,14 @@ int ggml_cpu_get_sve_cnt(void) {
3448
3488
 
3449
3489
  int ggml_cpu_has_sme(void) {
3450
3490
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME)
3451
- return ggml_arm_arch_features.has_sme;
3491
+ return 1;
3452
3492
  #else
3453
3493
  return 0;
3454
3494
  #endif
3455
3495
  }
3456
3496
 
3457
3497
  void ggml_cpu_init(void) {
3458
- // needed to initialize f16 tables
3498
+ // needed to initialize ggml_time
3459
3499
  {
3460
3500
  struct ggml_init_params params = { 0, NULL, false };
3461
3501
  struct ggml_context * ctx = ggml_init(params);
@@ -3476,9 +3516,10 @@ void ggml_cpu_init(void) {
3476
3516
  uint16_t u16;
3477
3517
  ggml_fp16_t fp16;
3478
3518
  } u = {i};
3479
- float f = GGML_FP16_TO_FP32(u.fp16);
3480
- ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
3481
- ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
3519
+ float f = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
3520
+ ggml_table_f32_f16[i] = f;
3521
+ ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
3522
+ ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
3482
3523
  }
3483
3524
 
3484
3525
  const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
@@ -1,8 +1,8 @@
1
1
  #include "ggml-backend.h"
2
2
  #include "ggml-backend-impl.h"
3
3
  #include "ggml-cpu.h"
4
- #include "ggml-cpu-aarch64.h"
5
- #include "ggml-cpu-traits.h"
4
+ #include "repack.h"
5
+ #include "traits.h"
6
6
  #include "ggml-impl.h"
7
7
  #include "amx/amx.h"
8
8
 
@@ -11,7 +11,7 @@
11
11
  #include <vector>
12
12
 
13
13
  #ifdef GGML_USE_CPU_HBM
14
- # include "ggml-cpu-hbm.h"
14
+ # include "hbm.h"
15
15
  #endif
16
16
 
17
17
  #ifdef GGML_USE_CPU_KLEIDIAI
@@ -51,9 +51,9 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
51
51
  }
52
52
  #endif
53
53
 
54
- #ifdef GGML_USE_CPU_AARCH64
55
- if (ggml_backend_cpu_aarch64_buffer_type()) {
56
- bufts.push_back(ggml_backend_cpu_aarch64_buffer_type());
54
+ #ifdef GGML_USE_CPU_REPACK
55
+ if (ggml_backend_cpu_repack_buffer_type()) {
56
+ bufts.push_back(ggml_backend_cpu_repack_buffer_type());
57
57
  }
58
58
  #endif
59
59
 
@@ -416,6 +416,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
416
416
 
417
417
  switch (op->op) {
418
418
  case GGML_OP_CPY:
419
+ case GGML_OP_SET_ROWS:
419
420
  return
420
421
  op->type != GGML_TYPE_IQ3_XXS &&
421
422
  op->type != GGML_TYPE_IQ3_S &&
@@ -578,6 +579,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
578
579
  if (ggml_cpu_has_vxe()) {
579
580
  features.push_back({ "VXE", "1" });
580
581
  }
582
+ if (ggml_cpu_has_nnpa()) {
583
+ features.push_back({ "NNPA", "1" });
584
+ }
581
585
  if (ggml_cpu_has_wasm_simd()) {
582
586
  features.push_back({ "WASM_SIMD", "1" });
583
587
  }
@@ -596,8 +600,8 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
596
600
  #ifdef GGML_USE_CPU_KLEIDIAI
597
601
  features.push_back({ "KLEIDIAI", "1" });
598
602
  #endif
599
- #ifdef GGML_USE_CPU_AARCH64
600
- features.push_back({ "AARCH64_REPACK", "1" });
603
+ #ifdef GGML_USE_CPU_REPACK
604
+ features.push_back({ "REPACK", "1" });
601
605
  #endif
602
606
 
603
607
  features.push_back({ nullptr, nullptr });
@@ -5,7 +5,7 @@
5
5
  #include "ggml-cpu.h"
6
6
  #include "ggml-impl.h"
7
7
 
8
- #include "ggml-cpu-hbm.h"
8
+ #include "hbm.h"
9
9
 
10
10
  // buffer type HBM
11
11
 
@@ -26,7 +26,7 @@
26
26
  #include "ggml-impl.h"
27
27
  #include "ggml-backend-impl.h"
28
28
  #include "ggml-threading.h"
29
- #include "ggml-cpu-traits.h"
29
+ #include "traits.h"
30
30
 
31
31
  #include "kernels.h"
32
32