@novastera-oss/llamarn 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/android/src/main/cpp/include/llama.h +141 -38
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  10. package/cpp/LlamaCppModel.cpp +58 -24
  11. package/cpp/LlamaCppModel.h +3 -3
  12. package/cpp/PureCppImpl.cpp +1 -1
  13. package/cpp/PureCppImpl.h +2 -2
  14. package/cpp/build-info.cpp +2 -2
  15. package/cpp/llama.cpp/CMakeLists.txt +15 -4
  16. package/cpp/llama.cpp/Makefile +2 -2
  17. package/cpp/llama.cpp/README.md +32 -13
  18. package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
  19. package/cpp/llama.cpp/common/arg.cpp +37 -6
  20. package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
  21. package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
  22. package/cpp/llama.cpp/common/chat-parser.h +2 -0
  23. package/cpp/llama.cpp/common/chat.cpp +12 -9
  24. package/cpp/llama.cpp/common/chat.h +1 -1
  25. package/cpp/llama.cpp/common/common.cpp +53 -40
  26. package/cpp/llama.cpp/common/common.h +6 -2
  27. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  28. package/cpp/llama.cpp/common/speculative.cpp +6 -4
  29. package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
  30. package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
  31. package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
  32. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  33. package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
  34. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
  35. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
  36. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
  38. package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
  39. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
  40. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  67. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
  70. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
  72. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  88. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
  90. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
  91. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
  93. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
  94. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  95. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
  97. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
  105. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
  112. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
  113. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  115. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  116. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
  117. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  133. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
  134. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
  135. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
  136. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
  138. package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
  139. package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
  140. package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
  141. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
  142. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
  143. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
  144. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  145. package/cpp/llama.cpp/include/llama.h +141 -38
  146. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  147. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
  148. package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
  149. package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
  150. package/cpp/llama.cpp/src/llama-arch.h +25 -1
  151. package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
  152. package/cpp/llama.cpp/src/llama-batch.h +110 -57
  153. package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
  154. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  155. package/cpp/llama.cpp/src/llama-context.cpp +360 -266
  156. package/cpp/llama.cpp/src/llama-context.h +27 -23
  157. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
  158. package/cpp/llama.cpp/src/llama-cparams.h +1 -1
  159. package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
  160. package/cpp/llama.cpp/src/llama-graph.h +126 -58
  161. package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
  162. package/cpp/llama.cpp/src/llama-hparams.h +16 -2
  163. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
  164. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
  165. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
  166. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
  167. package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
  168. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
  169. package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
  170. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
  171. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
  172. package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
  173. package/cpp/llama.cpp/src/llama-memory.h +73 -36
  174. package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
  175. package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
  176. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  177. package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
  178. package/cpp/llama.cpp/src/llama-model.h +26 -0
  179. package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
  180. package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
  181. package/cpp/llama.cpp/src/llama-vocab.h +1 -0
  182. package/cpp/llama.cpp/src/llama.cpp +11 -7
  183. package/cpp/llama.cpp/src/unicode.cpp +5 -0
  184. package/cpp/rn-completion.cpp +2 -2
  185. package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
  186. package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
  187. package/ios/include/chat.h +1 -1
  188. package/ios/include/common.h +6 -2
  189. package/ios/include/llama.h +141 -38
  190. package/ios/libs/llama.xcframework/Info.plist +15 -15
  191. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  192. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  193. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  194. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
  195. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
  196. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  197. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  198. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  199. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  200. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  201. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  202. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  203. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  204. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  205. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  206. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
  207. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  208. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
  209. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
  210. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  211. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  219. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  220. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  221. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  222. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
  223. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
  224. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  225. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  226. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  227. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  228. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  229. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  231. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  232. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  233. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
  234. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  235. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
  236. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
  237. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  238. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  239. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
  240. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
  241. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  242. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  243. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  244. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  245. package/package.json +1 -2
  246. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
  247. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  248. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
  249. package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
  250. package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
  251. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  252. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  253. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -0,0 +1,184 @@
1
+ #pragma once
2
+
3
+ // Rename `_generic` functions if no native implementation is available.
4
+ // This effectively selects the generic implementation.
5
+
6
+ #if defined(GGML_CPU_GENERIC)
7
+ // quants.c
8
+ #define quantize_row_q8_0_generic quantize_row_q8_0
9
+ #define quantize_row_q8_1_generic quantize_row_q8_1
10
+ #define quantize_row_q8_K_generic quantize_row_q8_K
11
+ #define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0
12
+ #define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
13
+ #define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
14
+ #define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
15
+ #define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0
16
+ #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
17
+ #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
18
+ #define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
19
+ #define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K
20
+ #define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K
21
+ #define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K
22
+ #define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K
23
+ #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
24
+ #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
25
+ #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
26
+ #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
27
+ #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
28
+ #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
29
+ #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
30
+ #define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
31
+ #define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
32
+ // repack.cpp
33
+ #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
34
+ #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
35
+ #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
36
+ #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
37
+ #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
38
+ #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
39
+ #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
40
+ #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
41
+ #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
42
+ #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
43
+ #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
44
+ #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
45
+ #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
46
+ #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
47
+ // repack.cpp
48
+ #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
49
+ #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
50
+ #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
51
+ #elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
52
+ // repack.cpp
53
+ #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
54
+ #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
55
+ #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
56
+ #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
57
+ #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
58
+ #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
59
+ #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
60
+ #elif defined(__POWERPC__) || defined(__powerpc__)
61
+ // ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679
62
+ // quants.c
63
+ #define quantize_row_q8_K_generic quantize_row_q8_K
64
+ #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
65
+ #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
66
+ #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
67
+ // repack.cpp
68
+ #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
69
+ #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
70
+ #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
71
+ #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
72
+ #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
73
+ #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
74
+ #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
75
+ #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
76
+ #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
77
+ #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
78
+ #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
79
+ #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
80
+ #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
81
+ #elif defined(__loongarch64)
82
+ // quants.c
83
+ #define quantize_row_q8_K_generic quantize_row_q8_K
84
+ #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
85
+ #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
86
+ #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
87
+ // repack.cpp
88
+ #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
89
+ #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
90
+ #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
91
+ #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
92
+ #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
93
+ #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
94
+ #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
95
+ #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
96
+ #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
97
+ #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
98
+ #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
99
+ #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
100
+ #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
101
+ #elif defined(__riscv)
102
+ // quants.c
103
+ #define quantize_row_q8_K_generic quantize_row_q8_K
104
+ #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
105
+ #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
106
+ #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
107
+ #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
108
+ #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
109
+ #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
110
+ #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
111
+ #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
112
+ #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
113
+ #define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
114
+ #define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
115
+ // repack.cpp
116
+ #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
117
+ #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
118
+ #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
119
+ #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
120
+ #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
121
+ #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
122
+ #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
123
+ #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
124
+ #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
125
+ #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
126
+ #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
127
+ #elif defined(__s390x__)
128
+ // quants.c
129
+ #define quantize_row_q8_K_generic quantize_row_q8_K
130
+ #define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
131
+ #define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
132
+ #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
133
+ #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
134
+ #define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
135
+ #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
136
+ #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
137
+ #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
138
+ #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
139
+ #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
140
+ #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
141
+ #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
142
+ // repack.cpp
143
+ #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
144
+ #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
145
+ #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
146
+ #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
147
+ #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
148
+ #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
149
+ #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
150
+ #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
151
+ #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
152
+ #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
153
+ #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
154
+ #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
155
+ #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
156
+ #elif defined(__wasm__)
157
+ // quants.c
158
+ #define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
159
+ #define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
160
+ #define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
161
+ #define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
162
+ #define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
163
+ #define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
164
+ #define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
165
+ #define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
166
+ #define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
167
+ #define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
168
+ #define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
169
+ #define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
170
+ // repack.cpp
171
+ #define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
172
+ #define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
173
+ #define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
174
+ #define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
175
+ #define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
176
+ #define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
177
+ #define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
178
+ #define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
179
+ #define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
180
+ #define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
181
+ #define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
182
+ #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
183
+ #define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
184
+ #endif
@@ -1,9 +1,10 @@
1
1
  #pragma once
2
2
 
3
3
  #include "ggml.h"
4
- #include "ggml-cpu-traits.h"
4
+ #include "traits.h"
5
5
  #include "ggml-cpu-impl.h"
6
6
  #include "ggml-impl.h"
7
+ #include "simd-mappings.h"
7
8
 
8
9
  #ifdef __cplusplus
9
10
 
@@ -12,11 +13,11 @@
12
13
  // convenience functions/macros for use in template calls
13
14
  // note: these won't be required after the 'traits' lookup table is used.
14
15
  static inline ggml_fp16_t f32_to_f16(float x) {
15
- return GGML_FP32_TO_FP16(x);
16
+ return GGML_CPU_FP32_TO_FP16(x);
16
17
  }
17
18
 
18
19
  static inline float f16_to_f32(ggml_fp16_t x) {
19
- return GGML_FP16_TO_FP32(x);
20
+ return GGML_CPU_FP16_TO_FP32(x);
20
21
  }
21
22
 
22
23
  static inline ggml_bf16_t f32_to_bf16(float x) {
@@ -62,11 +62,17 @@ struct ggml_compute_params {
62
62
  #if defined(__s390x__) && defined(__VEC__)
63
63
  #ifndef __VXE__
64
64
  #define __VXE__
65
- #endif
65
+ #endif // __VXE__
66
66
  #ifndef __VXE2__
67
67
  #define __VXE2__
68
- #endif
69
- #endif
68
+ #endif // __VXE2__
69
+ #endif // __s390x__ && __VEC__
70
+
71
+ #if defined(__s390x__) && defined(GGML_NNPA)
72
+ #ifndef __NNPA__
73
+ #define __NNPA__
74
+ #endif // __NNPA__
75
+ #endif // __s390x__ && GGML_NNPA
70
76
 
71
77
  #if defined(__ARM_FEATURE_SVE)
72
78
  #include <sys/prctl.h>
@@ -371,7 +377,7 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
371
377
  #define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
372
378
  #endif
373
379
 
374
- typedef signed char char8x16_t __attribute__((vector_size(16)));
380
+ typedef signed char char8x16_t __attribute__((vector_size(16)));
375
381
  typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
376
382
 
377
383
  typedef int8_t int8x16_t __attribute__((vector_size(16)));
@@ -382,10 +388,10 @@ typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
382
388
  typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
383
389
  typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
384
390
 
385
- typedef float float32x4_t __attribute__((vector_size(16)));
386
- typedef double double64x2_t __attribute((vector_size(16)));
391
+ typedef float float32x4_t __attribute__((vector_size(16)));
392
+ typedef double double64x2_t __attribute__((vector_size(16)));
387
393
 
388
- typedef signed long long long64x2_t __attribute((vector_size(16)));
394
+ typedef signed long long long64x2_t __attribute__((vector_size(16)));
389
395
  typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
390
396
 
391
397
  typedef struct ggml_uint8x16x2_t {
@@ -503,6 +509,9 @@ static __m256 __lasx_xvreplfr2vr_s(const float val) {
503
509
  // TODO: move to ggml-threading
504
510
  void ggml_barrier(struct ggml_threadpool * tp);
505
511
 
512
+ void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value);
513
+ int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value);
514
+
506
515
  #ifdef __cplusplus
507
516
  }
508
517
  #endif
@@ -3,11 +3,11 @@
3
3
 
4
4
  #include "ggml-backend-impl.h"
5
5
  #include "ggml-backend.h"
6
- #include "ggml-cpu-traits.h"
6
+ #include "traits.h"
7
7
  #include "ggml-cpu-impl.h"
8
8
  #include "ggml-cpu.h"
9
9
  #include "ggml-impl.h"
10
- #include "ggml-cpu-quants.h"
10
+ #include "quants.h"
11
11
  #include "ggml-threading.h"
12
12
  #include "unary-ops.h"
13
13
  #include "binary-ops.h"
@@ -72,15 +72,13 @@
72
72
  #define UNUSED GGML_UNUSED
73
73
  #define SWAP(x, y, T) do { T SWAP = x; (x) = y; (y) = SWAP; } while (0)
74
74
 
75
+ // precomputed f32 table for f16 (256 KB) (simd-mappings.h)
76
+ float ggml_table_f32_f16[1 << 16];
77
+
75
78
  #if defined(__ARM_ARCH)
76
79
  struct ggml_arm_arch_features_type {
77
- int has_neon;
78
- int has_dotprod;
79
- int has_i8mm;
80
- int has_sve;
81
80
  int sve_cnt;
82
- int has_sme;
83
- } ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
81
+ } ggml_arm_arch_features = { 0 };
84
82
  #endif
85
83
 
86
84
 
@@ -197,6 +195,7 @@ typedef pthread_t ggml_thread_t;
197
195
 
198
196
  static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
199
197
  [GGML_TYPE_F32] = {
198
+ .from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
200
199
  .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
201
200
  .vec_dot_type = GGML_TYPE_F32,
202
201
  .nrows = 1,
@@ -559,6 +558,14 @@ void ggml_barrier(struct ggml_threadpool * tp) {
559
558
  #endif
560
559
  }
561
560
 
561
+ void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value) {
562
+ atomic_store_explicit(&tp->current_chunk, value, memory_order_relaxed);
563
+ }
564
+
565
+ int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value) {
566
+ return atomic_fetch_add_explicit(&tp->current_chunk, value, memory_order_relaxed);
567
+ }
568
+
562
569
  #if defined(__gnu_linux__)
563
570
  static cpu_set_t ggml_get_numa_affinity(void) {
564
571
  cpu_set_t cpuset;
@@ -670,87 +677,15 @@ bool ggml_is_numa(void) {
670
677
 
671
678
  #if defined(__linux__) && defined(__aarch64__)
672
679
  #include <sys/auxv.h>
673
- #elif defined(__APPLE__)
674
- #include <sys/sysctl.h>
675
- #endif
676
-
677
- #if !defined(HWCAP2_I8MM)
678
- #define HWCAP2_I8MM (1 << 13)
679
- #endif
680
-
681
- #if !defined(HWCAP2_SME)
682
- #define HWCAP2_SME (1 << 23)
683
680
  #endif
684
681
 
685
682
  static void ggml_init_arm_arch_features(void) {
686
- #if defined(__linux__) && defined(__aarch64__)
687
- uint32_t hwcap = getauxval(AT_HWCAP);
688
- uint32_t hwcap2 = getauxval(AT_HWCAP2);
689
-
690
- ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
691
- ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
692
- ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
693
- ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
694
- ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);
695
-
696
- #if defined(__ARM_FEATURE_SVE)
683
+ #if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
697
684
  ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
698
685
  #endif
699
- #elif defined(__APPLE__)
700
- int oldp = 0;
701
- size_t size = sizeof(oldp);
702
- if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
703
- oldp = 0;
704
- }
705
- ggml_arm_arch_features.has_neon = oldp;
706
-
707
- if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
708
- oldp = 0;
709
- }
710
- ggml_arm_arch_features.has_dotprod = oldp;
711
-
712
- if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
713
- oldp = 0;
714
- }
715
- ggml_arm_arch_features.has_i8mm = oldp;
716
-
717
- if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) {
718
- oldp = 0;
719
- }
720
- ggml_arm_arch_features.has_sme = oldp;
721
-
722
- ggml_arm_arch_features.has_sve = 0;
723
- ggml_arm_arch_features.sve_cnt = 0;
724
- #else
725
- // Run-time CPU feature detection not implemented for this platform, fallback to compile time
726
- #if defined(__ARM_NEON)
727
- ggml_arm_arch_features.has_neon = 1;
728
- #else
729
- ggml_arm_arch_features.has_neon = 0;
730
- #endif
731
-
732
- #if defined(__ARM_FEATURE_MATMUL_INT8)
733
- ggml_arm_arch_features.has_i8mm = 1;
734
- #else
735
- ggml_arm_arch_features.has_i8mm = 0;
736
- #endif
737
-
738
- #if defined(__ARM_FEATURE_SVE)
739
- ggml_arm_arch_features.has_sve = 1;
740
- ggml_arm_arch_features.sve_cnt = 16;
741
- #else
742
- ggml_arm_arch_features.has_sve = 0;
743
- ggml_arm_arch_features.sve_cnt = 0;
744
- #endif
745
-
746
- #if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
747
- ggml_arm_arch_features.has_sme = 1;
748
- #else
749
- ggml_arm_arch_features.has_sme = 0;
750
- #endif
751
- #endif
752
686
  }
753
- #endif
687
+
688
+ #endif // __ARM_ARCH
754
689
 
755
690
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
756
691
  GGML_ASSERT(!ggml_get_no_alloc(ctx));
@@ -805,7 +740,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
805
740
  {
806
741
  assert(tensor->nb[0] == sizeof(ggml_fp16_t));
807
742
  for (int i = 0; i < n; i++) {
808
- ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_FP32_TO_FP16(value));
743
+ ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
809
744
  }
810
745
  } break;
811
746
  case GGML_TYPE_BF16:
@@ -864,7 +799,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
864
799
  {
865
800
  assert(tensor->nb[0] == sizeof(ggml_fp16_t));
866
801
  for (int i = 0; i < n; i++) {
867
- ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_FP32_TO_FP16(value));
802
+ ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
868
803
  }
869
804
  } break;
870
805
  case GGML_TYPE_BF16:
@@ -915,7 +850,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
915
850
  case GGML_TYPE_F16:
916
851
  {
917
852
  GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
918
- return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
853
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
919
854
  }
920
855
  case GGML_TYPE_BF16:
921
856
  {
@@ -960,7 +895,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
960
895
  case GGML_TYPE_F16:
961
896
  {
962
897
  GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
963
- ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
898
+ ((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
964
899
  } break;
965
900
  case GGML_TYPE_BF16:
966
901
  {
@@ -989,7 +924,7 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
989
924
  case GGML_TYPE_I32:
990
925
  return ((int32_t *) data)[0];
991
926
  case GGML_TYPE_F16:
992
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
927
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
993
928
  case GGML_TYPE_BF16:
994
929
  return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
995
930
  case GGML_TYPE_F32:
@@ -1016,7 +951,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
1016
951
  } break;
1017
952
  case GGML_TYPE_F16:
1018
953
  {
1019
- ((ggml_fp16_t *)(data))[0] = GGML_FP32_TO_FP16(value);
954
+ ((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
1020
955
  } break;
1021
956
  case GGML_TYPE_BF16:
1022
957
  {
@@ -1054,7 +989,7 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
1054
989
  }
1055
990
  case GGML_TYPE_F16:
1056
991
  {
1057
- return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
992
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
1058
993
  }
1059
994
  case GGML_TYPE_BF16:
1060
995
  {
@@ -1093,7 +1028,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
1093
1028
  } break;
1094
1029
  case GGML_TYPE_F16:
1095
1030
  {
1096
- ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
1031
+ ((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
1097
1032
  } break;
1098
1033
  case GGML_TYPE_BF16:
1099
1034
  {
@@ -1120,7 +1055,7 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
1120
1055
  case GGML_TYPE_I32:
1121
1056
  return ((int32_t *) data)[0];
1122
1057
  case GGML_TYPE_F16:
1123
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
1058
+ return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
1124
1059
  case GGML_TYPE_BF16:
1125
1060
  return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
1126
1061
  case GGML_TYPE_F32:
@@ -1147,7 +1082,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
1147
1082
  } break;
1148
1083
  case GGML_TYPE_F16:
1149
1084
  {
1150
- ((ggml_fp16_t *)(data))[0] = GGML_FP32_TO_FP16(value);
1085
+ ((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
1151
1086
  } break;
1152
1087
  case GGML_TYPE_BF16:
1153
1088
  {
@@ -1883,6 +1818,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1883
1818
  {
1884
1819
  ggml_compute_forward_get_rows_back(params, tensor);
1885
1820
  } break;
1821
+ case GGML_OP_SET_ROWS:
1822
+ {
1823
+ ggml_compute_forward_set_rows(params, tensor);
1824
+ } break;
1886
1825
  case GGML_OP_DIAG:
1887
1826
  {
1888
1827
  ggml_compute_forward_diag(params, tensor);
@@ -1959,6 +1898,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1959
1898
  {
1960
1899
  ggml_compute_forward_pad_reflect_1d(params, tensor);
1961
1900
  } break;
1901
+ case GGML_OP_ROLL:
1902
+ {
1903
+ ggml_compute_forward_roll(params, tensor);
1904
+ } break;
1962
1905
  case GGML_OP_ARANGE:
1963
1906
  {
1964
1907
  ggml_compute_forward_arange(params, tensor);
@@ -2232,6 +2175,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2232
2175
  n_tasks = n_threads;
2233
2176
  } break;
2234
2177
  case GGML_OP_GET_ROWS:
2178
+ case GGML_OP_SET_ROWS:
2235
2179
  {
2236
2180
  // FIXME: get_rows can use additional threads, but the cost of launching additional threads
2237
2181
  // decreases performance with GPU offloading
@@ -2283,6 +2227,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2283
2227
  case GGML_OP_UPSCALE:
2284
2228
  case GGML_OP_PAD:
2285
2229
  case GGML_OP_PAD_REFLECT_1D:
2230
+ case GGML_OP_ROLL:
2286
2231
  case GGML_OP_ARANGE:
2287
2232
  case GGML_OP_TIMESTEP_EMBEDDING:
2288
2233
  case GGML_OP_ARGSORT:
@@ -3185,6 +3130,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
3185
3130
  return ggml_graph_compute(cgraph, &cplan);
3186
3131
  }
3187
3132
 
3133
+ void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) {
3134
+ memcpy(y, x, n * sizeof(float));
3135
+ }
3136
+
3188
3137
  void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
3189
3138
  int64_t i = 0;
3190
3139
  #if defined(__F16C__)
@@ -3205,9 +3154,24 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
3205
3154
  __m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
3206
3155
  _mm_storel_epi64((__m128i *)(y + i), y_vec);
3207
3156
  }
3157
+ #elif defined(__NNPA__)
3158
+ for (; i + 7 < n; i += 8) {
3159
+ float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
3160
+ float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
3161
+ uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
3162
+ uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3163
+ vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
3164
+ }
3165
+ for (; i + 3 < n; i += 4) {
3166
+ float32x4_t v_x = vec_xl(0, (const float *)(x + i));
3167
+ float32x4_t v_zero = vec_splats(0.0f);
3168
+ uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
3169
+ uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3170
+ vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
3171
+ }
3208
3172
  #endif
3209
3173
  for (; i < n; ++i) {
3210
- y[i] = GGML_FP32_TO_FP16(x[i]);
3174
+ y[i] = GGML_CPU_FP32_TO_FP16(x[i]);
3211
3175
  }
3212
3176
  }
3213
3177
 
@@ -3231,9 +3195,25 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
3231
3195
  __m128 y_vec = _mm_cvtph_ps(x_vec);
3232
3196
  _mm_storeu_ps(y + i, y_vec);
3233
3197
  }
3198
+ #elif defined(__NNPA__)
3199
+ for (; i + 7 < n; i += 8) {
3200
+ uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
3201
+ uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
3202
+ float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
3203
+ float32x4_t v_yl = vec_extend_to_fp32_lo(v_yd, 0);
3204
+ vec_xst(v_yh, 0, (float *)(y + i + 0));
3205
+ vec_xst(v_yl, 0, (float *)(y + i + 4));
3206
+ }
3207
+ for (; i + 3 < n; i += 4) {
3208
+ uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
3209
+ uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
3210
+ float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
3211
+ vec_xst(v_yh, 0, (float *)(y + i));
3212
+ }
3234
3213
  #endif
3214
+
3235
3215
  for (; i < n; ++i) {
3236
- y[i] = GGML_FP16_TO_FP32(x[i]);
3216
+ y[i] = GGML_CPU_FP16_TO_FP32(x[i]);
3237
3217
  }
3238
3218
  }
3239
3219
 
@@ -3433,9 +3413,17 @@ int ggml_cpu_has_vxe(void) {
3433
3413
  #endif
3434
3414
  }
3435
3415
 
3416
+ int ggml_cpu_has_nnpa(void) {
3417
+ #if defined(GGML_NNPA)
3418
+ return 1;
3419
+ #else
3420
+ return 0;
3421
+ #endif
3422
+ }
3423
+
3436
3424
  int ggml_cpu_has_neon(void) {
3437
3425
  #if defined(__ARM_ARCH) && defined(__ARM_NEON)
3438
- return ggml_arm_arch_features.has_neon;
3426
+ return 1;
3439
3427
  #else
3440
3428
  return 0;
3441
3429
  #endif
@@ -3443,7 +3431,7 @@ int ggml_cpu_has_neon(void) {
3443
3431
 
3444
3432
  int ggml_cpu_has_dotprod(void) {
3445
3433
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
3446
- return ggml_arm_arch_features.has_dotprod;
3434
+ return 1;
3447
3435
  #else
3448
3436
  return 0;
3449
3437
  #endif
@@ -3451,7 +3439,7 @@ int ggml_cpu_has_dotprod(void) {
3451
3439
 
3452
3440
  int ggml_cpu_has_sve(void) {
3453
3441
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
3454
- return ggml_arm_arch_features.has_sve;
3442
+ return 1;
3455
3443
  #else
3456
3444
  return 0;
3457
3445
  #endif
@@ -3459,7 +3447,7 @@ int ggml_cpu_has_sve(void) {
3459
3447
 
3460
3448
  int ggml_cpu_has_matmul_int8(void) {
3461
3449
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
3462
- return ggml_arm_arch_features.has_i8mm;
3450
+ return 1;
3463
3451
  #else
3464
3452
  return 0;
3465
3453
  #endif
@@ -3475,14 +3463,14 @@ int ggml_cpu_get_sve_cnt(void) {
3475
3463
 
3476
3464
  int ggml_cpu_has_sme(void) {
3477
3465
  #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME)
3478
- return ggml_arm_arch_features.has_sme;
3466
+ return 1;
3479
3467
  #else
3480
3468
  return 0;
3481
3469
  #endif
3482
3470
  }
3483
3471
 
3484
3472
  void ggml_cpu_init(void) {
3485
- // needed to initialize f16 tables
3473
+ // needed to initialize ggml_time
3486
3474
  {
3487
3475
  struct ggml_init_params params = { 0, NULL, false };
3488
3476
  struct ggml_context * ctx = ggml_init(params);
@@ -3503,9 +3491,10 @@ void ggml_cpu_init(void) {
3503
3491
  uint16_t u16;
3504
3492
  ggml_fp16_t fp16;
3505
3493
  } u = {i};
3506
- float f = GGML_FP16_TO_FP32(u.fp16);
3507
- ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
3508
- ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
3494
+ float f = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
3495
+ ggml_table_f32_f16[i] = f;
3496
+ ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
3497
+ ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
3509
3498
  }
3510
3499
 
3511
3500
  const uint64_t t_end = ggml_time_us(); UNUSED(t_end);