cui-llama.rn 1.7.4 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +217 -17
  2. package/android/src/main/CMakeLists.txt +34 -15
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
  4. package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
  5. package/android/src/main/jni.cpp +213 -14
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
  16. package/cpp/README.md +1 -1
  17. package/cpp/chat-parser.cpp +385 -0
  18. package/cpp/chat-parser.h +120 -0
  19. package/cpp/chat.cpp +726 -596
  20. package/cpp/chat.h +71 -6
  21. package/cpp/common.cpp +56 -38
  22. package/cpp/common.h +9 -3
  23. package/cpp/ggml-backend-reg.cpp +5 -0
  24. package/cpp/ggml-backend.cpp +10 -2
  25. package/cpp/ggml-common.h +4 -0
  26. package/cpp/ggml-cpu/amx/amx.cpp +1 -1
  27. package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
  28. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  29. package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
  30. package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
  31. package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  32. package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
  33. package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  34. package/cpp/ggml-cpu/arch-fallback.h +184 -0
  35. package/cpp/ggml-cpu/common.h +4 -3
  36. package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
  37. package/cpp/ggml-cpu/ggml-cpu.c +123 -104
  38. package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
  39. package/cpp/ggml-cpu/ops.cpp +330 -148
  40. package/cpp/ggml-cpu/ops.h +1 -0
  41. package/cpp/ggml-cpu/quants.c +1158 -0
  42. package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  43. package/cpp/ggml-cpu/repack.cpp +1571 -0
  44. package/cpp/ggml-cpu/repack.h +98 -0
  45. package/cpp/ggml-cpu/simd-mappings.h +330 -38
  46. package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  47. package/cpp/ggml-cpu/vec.cpp +87 -18
  48. package/cpp/ggml-cpu/vec.h +249 -94
  49. package/cpp/ggml-cpu.h +1 -0
  50. package/cpp/ggml-impl.h +63 -183
  51. package/cpp/ggml-llama-sim.metallib +0 -0
  52. package/cpp/ggml-llama.metallib +0 -0
  53. package/cpp/ggml-metal.m +152 -45
  54. package/cpp/ggml-quants.c +0 -2
  55. package/cpp/ggml.c +61 -21
  56. package/cpp/ggml.h +22 -3
  57. package/cpp/gguf.cpp +24 -3
  58. package/cpp/json-partial.cpp +256 -0
  59. package/cpp/json-partial.h +38 -0
  60. package/cpp/json-schema-to-grammar.cpp +5 -47
  61. package/cpp/json-schema-to-grammar.h +4 -4
  62. package/cpp/llama-arch.cpp +153 -3
  63. package/cpp/llama-arch.h +27 -1
  64. package/cpp/llama-batch.cpp +741 -272
  65. package/cpp/llama-batch.h +112 -54
  66. package/cpp/llama-chat.cpp +30 -8
  67. package/cpp/llama-chat.h +1 -0
  68. package/cpp/llama-context.cpp +524 -339
  69. package/cpp/llama-context.h +38 -17
  70. package/cpp/llama-cparams.cpp +4 -0
  71. package/cpp/llama-cparams.h +2 -0
  72. package/cpp/llama-grammar.cpp +12 -2
  73. package/cpp/llama-graph.cpp +431 -356
  74. package/cpp/llama-graph.h +126 -58
  75. package/cpp/llama-hparams.cpp +10 -2
  76. package/cpp/llama-hparams.h +19 -2
  77. package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
  78. package/cpp/llama-kv-cache-unified-iswa.h +128 -0
  79. package/cpp/llama-kv-cache-unified.cpp +1841 -0
  80. package/cpp/llama-kv-cache-unified.h +303 -0
  81. package/cpp/llama-kv-cells.h +439 -0
  82. package/cpp/llama-memory-hybrid.cpp +246 -0
  83. package/cpp/llama-memory-hybrid.h +138 -0
  84. package/cpp/llama-memory-recurrent.cpp +1112 -0
  85. package/cpp/llama-memory-recurrent.h +183 -0
  86. package/cpp/llama-memory.cpp +41 -0
  87. package/cpp/llama-memory.h +86 -5
  88. package/cpp/llama-mmap.cpp +1 -1
  89. package/cpp/llama-model-loader.cpp +42 -17
  90. package/cpp/llama-model-saver.cpp +1 -0
  91. package/cpp/llama-model.cpp +1639 -513
  92. package/cpp/llama-model.h +26 -0
  93. package/cpp/llama-sampling.cpp +2 -2
  94. package/cpp/llama-vocab.cpp +65 -28
  95. package/cpp/llama-vocab.h +1 -0
  96. package/cpp/llama.cpp +11 -7
  97. package/cpp/llama.h +150 -42
  98. package/cpp/minja/chat-template.hpp +1 -1
  99. package/cpp/minja/minja.hpp +1 -1
  100. package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
  101. package/cpp/nlohmann/json_fwd.hpp +187 -0
  102. package/cpp/regex-partial.cpp +204 -0
  103. package/cpp/regex-partial.h +56 -0
  104. package/cpp/rn-llama.cpp +646 -35
  105. package/cpp/rn-llama.h +32 -1
  106. package/cpp/rn-tts.h +39 -0
  107. package/cpp/sampling.cpp +7 -8
  108. package/cpp/tools/mtmd/clip-impl.h +5 -0
  109. package/cpp/tools/mtmd/clip.cpp +572 -436
  110. package/cpp/tools/mtmd/clip.h +14 -4
  111. package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
  112. package/cpp/tools/mtmd/mtmd-audio.h +2 -17
  113. package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
  114. package/cpp/tools/mtmd/mtmd-helper.h +91 -0
  115. package/cpp/tools/mtmd/mtmd.cpp +368 -248
  116. package/cpp/tools/mtmd/mtmd.h +6 -70
  117. package/cpp/unicode.cpp +5 -0
  118. package/ios/CMakeLists.txt +26 -6
  119. package/ios/RNLlama.h +1 -1
  120. package/ios/RNLlama.mm +153 -3
  121. package/ios/RNLlamaContext.h +9 -1
  122. package/ios/RNLlamaContext.mm +112 -9
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  131. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  132. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  133. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  134. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  135. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  136. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  137. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  138. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  139. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  140. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  141. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  142. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  143. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  144. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  145. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  146. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  147. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
  148. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  149. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  150. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
  151. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  152. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  153. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  154. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  155. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  156. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  160. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  161. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  162. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  163. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  164. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  165. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  166. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  167. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  168. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  169. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  170. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  171. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  172. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  173. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  174. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  175. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  176. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  177. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  178. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  179. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  180. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  181. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  182. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  183. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  184. package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  185. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  186. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  187. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  188. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  189. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  190. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  191. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
  192. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
  193. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
  194. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
  195. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
  196. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
  197. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
  198. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
  199. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  200. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
  201. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
  202. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  203. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
  204. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
  205. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
  206. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
  207. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  208. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  209. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  210. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  211. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  212. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
  213. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
  214. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
  215. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
  216. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  217. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
  218. package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
  219. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  220. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
  221. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
  222. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
  223. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  224. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  225. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
  226. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
  227. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
  228. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
  229. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
  230. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
  231. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
  232. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
  233. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
  234. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
  235. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
  236. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  237. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
  238. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
  239. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
  240. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
  241. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
  242. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
  243. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
  244. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
  245. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
  246. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
  247. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
  248. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
  249. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
  250. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
  251. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
  252. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
  253. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
  254. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
  255. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
  256. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
  257. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  258. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  259. package/jest/mock.js +24 -0
  260. package/package.json +1 -1
  261. package/src/NativeRNLlama.ts +46 -2
  262. package/src/index.ts +105 -1
  263. package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  264. package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
  265. package/cpp/ggml-cpu/sgemm.cpp +0 -3544
  266. package/cpp/ggml-cpu/sgemm.h +0 -14
  267. package/cpp/llama-kv-cache.cpp +0 -2827
  268. package/cpp/llama-kv-cache.h +0 -515
  269. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  270. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  271. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  272. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
  273. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
  274. /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
  275. /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
  276. /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
@@ -5,6 +5,7 @@
5
5
  #include "ggml-impl.h"
6
6
  #include "simd-mappings.h"
7
7
  #include "ggml.h"
8
+ #include "ggml-cpu.h"
8
9
 
9
10
  #if defined(LM_GGML_USE_ACCELERATE)
10
11
  #include <Accelerate/Accelerate.h>
@@ -57,7 +58,7 @@ inline static void lm_ggml_vec_set_bf16(const int n, lm_ggml_bf16_t * x, const l
57
58
  inline static void lm_ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; }
58
59
  inline static void lm_ggml_vec_add_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
59
60
  for (int i = 0; i < n; ++i) {
60
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) + LM_GGML_FP16_TO_FP32(y[i]));
61
+ z[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(x[i]) + LM_GGML_CPU_FP16_TO_FP32(y[i]));
61
62
  }
62
63
  }
63
64
  inline static void lm_ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; }
@@ -66,7 +67,7 @@ inline static void lm_ggml_vec_acc1_f32(const int n, float * y, const float v)
66
67
  inline static void lm_ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; }
67
68
  inline static void lm_ggml_vec_sub_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
68
69
  for (int i = 0; i < n; ++i) {
69
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) - LM_GGML_FP16_TO_FP32(y[i]));
70
+ z[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(x[i]) - LM_GGML_CPU_FP16_TO_FP32(y[i]));
70
71
  }
71
72
  }
72
73
  inline static void lm_ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; }
@@ -74,20 +75,20 @@ inline static void lm_ggml_vec_cpy_f32 (const int n, float * y, const float * x)
74
75
  inline static void lm_ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
75
76
  inline static void lm_ggml_vec_neg_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
76
77
  for (int i = 0; i < n; ++i) {
77
- y[i] = LM_GGML_FP32_TO_FP16(-LM_GGML_FP16_TO_FP32(x[i]));
78
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(-LM_GGML_CPU_FP16_TO_FP32(x[i]));
78
79
  }
79
80
  }
80
81
 
81
82
  inline static void lm_ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
82
83
  inline static void lm_ggml_vec_mul_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
83
84
  for (int i = 0; i < n; ++i) {
84
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) * LM_GGML_FP16_TO_FP32(y[i]));
85
+ z[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(x[i]) * LM_GGML_CPU_FP16_TO_FP32(y[i]));
85
86
  }
86
87
  }
87
88
  inline static void lm_ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
88
89
  inline static void lm_ggml_vec_div_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
89
90
  for (int i = 0; i < n; ++i) {
90
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) / LM_GGML_FP16_TO_FP32(y[i]));
91
+ z[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(x[i]) / LM_GGML_CPU_FP16_TO_FP32(y[i]));
91
92
  }
92
93
  }
93
94
 
@@ -130,13 +131,13 @@ inline static void lm_ggml_vec_dot_f16_unroll(const int n, const int xs, float *
130
131
  // leftovers
131
132
  for (int i = np; i < n; ++i) {
132
133
  for (int j = 0; j < LM_GGML_VEC_DOT_UNROLL; ++j) {
133
- sumf[j] += (lm_ggml_float)(LM_GGML_FP16_TO_FP32(x[j][i])*LM_GGML_FP16_TO_FP32(y[i]));
134
+ sumf[j] += (lm_ggml_float)(LM_GGML_CPU_FP16_TO_FP32(x[j][i])*LM_GGML_CPU_FP16_TO_FP32(y[i]));
134
135
  }
135
136
  }
136
137
  #else
137
138
  for (int i = 0; i < n; ++i) {
138
139
  for (int j = 0; j < LM_GGML_VEC_DOT_UNROLL; ++j) {
139
- sumf[j] += (lm_ggml_float)(LM_GGML_FP16_TO_FP32(x[j][i])*LM_GGML_FP16_TO_FP32(y[i]));
140
+ sumf[j] += (lm_ggml_float)(LM_GGML_CPU_FP16_TO_FP32(x[j][i])*LM_GGML_CPU_FP16_TO_FP32(y[i]));
140
141
  }
141
142
  }
142
143
  #endif
@@ -148,27 +149,108 @@ inline static void lm_ggml_vec_dot_f16_unroll(const int n, const int xs, float *
148
149
 
149
150
  inline static void lm_ggml_vec_mad_f32(const int n, float * LM_GGML_RESTRICT y, const float * LM_GGML_RESTRICT x, const float v) {
150
151
  #if defined(LM_GGML_SIMD)
151
- const int np = (n & ~(LM_GGML_F32_STEP - 1));
152
+ #if defined(__ARM_FEATURE_SVE)
152
153
 
153
- LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
154
+ const int sve_register_length = lm_ggml_cpu_get_sve_cnt() * 8;
155
+ const int lm_ggml_f32_epr = sve_register_length / 32;//8;//svcntw(); // SVE128:4, SVE256:8, SVE512:16
156
+ const int lm_ggml_f32_step = 8 * lm_ggml_f32_epr; // choose 8 SVE registers
157
+ LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
154
158
 
155
- LM_GGML_F32_VEC ax[LM_GGML_F32_ARR];
156
- LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
159
+ const int np = (n & ~(lm_ggml_f32_step - 1));
160
+ svfloat32_t ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8;
161
+ svfloat32_t ay1, ay2, ay3, ay4, ay5, ay6, ay7, ay8;
162
+ for (int i = 0; i < np; i += lm_ggml_f32_step) {
157
163
 
158
- for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
159
- for (int j = 0; j < LM_GGML_F32_ARR; j++) {
160
- ax[j] = LM_GGML_F32_VEC_LOAD(x + i + j*LM_GGML_F32_EPR);
161
- ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
162
- ay[j] = LM_GGML_F32_VEC_FMA(ay[j], ax[j], vx);
164
+ ax1 = LM_GGML_F32_VEC_LOAD(x + i);
165
+ ay1 = LM_GGML_F32_VEC_LOAD(y + i);
166
+ ay1 = LM_GGML_F32_VEC_FMA(ax1, vx, ay1);
163
167
 
164
- LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
168
+ LM_GGML_F32_VEC_STORE(y + i, ay1);
169
+
170
+ ax2 = LM_GGML_F32_VEC_LOAD(x + i + 1*lm_ggml_f32_epr);
171
+ ay2 = LM_GGML_F32_VEC_LOAD(y + i + 1*lm_ggml_f32_epr);
172
+ ay2 = LM_GGML_F32_VEC_FMA(ax2, vx, ay2);
173
+
174
+ LM_GGML_F32_VEC_STORE(y + i + 1*lm_ggml_f32_epr, ay2);
175
+
176
+ ax3 = LM_GGML_F32_VEC_LOAD(x + i + 2*lm_ggml_f32_epr);
177
+ ay3 = LM_GGML_F32_VEC_LOAD(y + i + 2*lm_ggml_f32_epr);
178
+ ay3 = LM_GGML_F32_VEC_FMA(ax3, vx, ay3);
179
+
180
+ LM_GGML_F32_VEC_STORE(y + i + 2*lm_ggml_f32_epr, ay3);
181
+
182
+ ax4 = LM_GGML_F32_VEC_LOAD(x + i + 3*lm_ggml_f32_epr);
183
+ ay4 = LM_GGML_F32_VEC_LOAD(y + i + 3*lm_ggml_f32_epr);
184
+ ay4 = LM_GGML_F32_VEC_FMA(ax4, vx, ay4);
185
+
186
+ LM_GGML_F32_VEC_STORE(y + i + 3*lm_ggml_f32_epr, ay4);
187
+
188
+ ax5 = LM_GGML_F32_VEC_LOAD(x + i + 4*lm_ggml_f32_epr);
189
+ ay5 = LM_GGML_F32_VEC_LOAD(y + i + 4*lm_ggml_f32_epr);
190
+ ay5 = LM_GGML_F32_VEC_FMA(ax5, vx, ay5);
191
+
192
+ LM_GGML_F32_VEC_STORE(y + i + 4*lm_ggml_f32_epr, ay5);
193
+
194
+ ax6 = LM_GGML_F32_VEC_LOAD(x + i + 5*lm_ggml_f32_epr);
195
+ ay6 = LM_GGML_F32_VEC_LOAD(y + i + 5*lm_ggml_f32_epr);
196
+ ay6 = LM_GGML_F32_VEC_FMA(ax6, vx, ay6);
197
+
198
+ LM_GGML_F32_VEC_STORE(y + i + 5*lm_ggml_f32_epr, ay6);
199
+
200
+ ax7 = LM_GGML_F32_VEC_LOAD(x + i + 6*lm_ggml_f32_epr);
201
+ ay7 = LM_GGML_F32_VEC_LOAD(y + i + 6*lm_ggml_f32_epr);
202
+ ay7 = LM_GGML_F32_VEC_FMA(ax7, vx, ay7);
203
+
204
+ LM_GGML_F32_VEC_STORE(y + i + 6*lm_ggml_f32_epr, ay7);
205
+
206
+ ax8 = LM_GGML_F32_VEC_LOAD(x + i + 7*lm_ggml_f32_epr);
207
+ ay8 = LM_GGML_F32_VEC_LOAD(y + i + 7*lm_ggml_f32_epr);
208
+ ay8 = LM_GGML_F32_VEC_FMA(ax8, vx, ay8);
209
+
210
+ LM_GGML_F32_VEC_STORE(y + i + 7*lm_ggml_f32_epr, ay8);
165
211
  }
166
- }
212
+ // leftovers
213
+ // Since 8 unrolls are done in above loop, leftovers lie in range [0, lm_ggml_f32_step] which is handled in below loop
214
+ const int np2 = (n & ~(lm_ggml_f32_epr - 1));
215
+ for (int i = np; i < np2; i += lm_ggml_f32_epr) {
216
+ ax1 = LM_GGML_F32_VEC_LOAD(x + i);
217
+ ay1 = LM_GGML_F32_VEC_LOAD(y + i);
218
+ ay1 = LM_GGML_F32_VEC_FMA(ax1, vx, ay1);
219
+
220
+ LM_GGML_F32_VEC_STORE(y + i, ay1);
221
+ }
222
+ // maximum number of leftover elements will be less that lm_ggml_f32_epr. Apply predicated svmad on available elements only
223
+ if (np2 < n) {
224
+ svbool_t pg =svwhilelt_b32(np2, n);
225
+ ax1 = svld1_f32(pg, x + np2);
226
+ ay1 = svld1_f32(pg, y + np2);
227
+ ay1 = svmad_f32_m(pg, ax1, vx, ay1);
228
+
229
+ svst1_f32(pg, y + np2, ay1);
230
+ }
231
+ #else
232
+ const int np = (n & ~(LM_GGML_F32_STEP - 1));
167
233
 
168
- // leftovers
169
- for (int i = np; i < n; ++i) {
170
- y[i] += x[i]*v;
171
- }
234
+ LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
235
+
236
+ LM_GGML_F32_VEC ax[LM_GGML_F32_ARR];
237
+ LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
238
+
239
+ for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
240
+ for (int j = 0; j < LM_GGML_F32_ARR; j++) {
241
+ ax[j] = LM_GGML_F32_VEC_LOAD(x + i + j*LM_GGML_F32_EPR);
242
+ ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
243
+ ay[j] = LM_GGML_F32_VEC_FMA(ay[j], ax[j], vx);
244
+
245
+ LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
246
+ }
247
+ }
248
+
249
+ // leftovers
250
+ for (int i = np; i < n; ++i) {
251
+ y[i] += x[i]*v;
252
+ }
253
+ #endif
172
254
  #else
173
255
  // scalar
174
256
  for (int i = 0; i < n; ++i) {
@@ -198,12 +280,12 @@ inline static void lm_ggml_vec_mad_f16(const int n, lm_ggml_fp16_t * LM_GGML_RES
198
280
 
199
281
  // leftovers
200
282
  for (int i = np; i < n; ++i) {
201
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i]) + LM_GGML_FP16_TO_FP32(x[i])*v);
283
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(y[i]) + LM_GGML_CPU_FP16_TO_FP32(x[i])*v);
202
284
  }
203
285
  #else
204
286
  // scalar
205
287
  for (int i = 0; i < n; ++i) {
206
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i]) + LM_GGML_FP16_TO_FP32(x[i])*v);
288
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(y[i]) + LM_GGML_CPU_FP16_TO_FP32(x[i])*v);
207
289
  }
208
290
  #endif
209
291
  }
@@ -220,36 +302,45 @@ inline static void lm_ggml_vec_mad_f32_unroll(const int n, const int xs, const i
220
302
  }
221
303
 
222
304
  #if defined(LM_GGML_SIMD)
223
- const int np = (n & ~(LM_GGML_F32_STEP - 1));
305
+ #if defined(__ARM_FEATURE_SVE)
306
+ // scalar Route to scalar implementation //TODO: Write SVE code
307
+ for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
308
+ for (int i = 0; i < n; ++i) {
309
+ y[i] += x[k][i]*v[k][0];
310
+ }
311
+ }
312
+ #else
313
+ const int np = (n & ~(LM_GGML_F32_STEP - 1));
224
314
 
225
- LM_GGML_F32_VEC vx[LM_GGML_VEC_MAD_UNROLL];
315
+ LM_GGML_F32_VEC vx[LM_GGML_VEC_MAD_UNROLL];
226
316
 
227
- for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
228
- vx[k] = LM_GGML_F32_VEC_SET1(v[k][0]);
229
- }
317
+ for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
318
+ vx[k] = LM_GGML_F32_VEC_SET1(v[k][0]);
319
+ }
230
320
 
231
- LM_GGML_F32_VEC ax[LM_GGML_VEC_MAD_UNROLL][LM_GGML_F32_ARR];
232
- LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
321
+ LM_GGML_F32_VEC ax[LM_GGML_VEC_MAD_UNROLL][LM_GGML_F32_ARR];
322
+ LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
233
323
 
234
- for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
235
- for (int j = 0; j < LM_GGML_F32_ARR; j++) {
236
- ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
324
+ for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
325
+ for (int j = 0; j < LM_GGML_F32_ARR; j++) {
326
+ ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
237
327
 
238
- for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
239
- ax[k][j] = LM_GGML_F32_VEC_LOAD(x[k] + i + j*LM_GGML_F32_EPR);
240
- ay[j] = LM_GGML_F32_VEC_FMA(ay[j], ax[k][j], vx[k]);
241
- }
328
+ for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
329
+ ax[k][j] = LM_GGML_F32_VEC_LOAD(x[k] + i + j*LM_GGML_F32_EPR);
330
+ ay[j] = LM_GGML_F32_VEC_FMA(ay[j], ax[k][j], vx[k]);
331
+ }
242
332
 
243
- LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
333
+ LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
334
+ }
244
335
  }
245
- }
246
336
 
247
- // leftovers
248
- for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
249
- for (int i = np; i < n; ++i) {
250
- y[i] += x[k][i]*v[k][0];
337
+ // leftovers
338
+ for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
339
+ for (int i = np; i < n; ++i) {
340
+ y[i] += x[k][i]*v[k][0];
341
+ }
251
342
  }
252
- }
343
+ #endif
253
344
  #else
254
345
  // scalar
255
346
  for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
@@ -265,25 +356,53 @@ inline static void lm_ggml_vec_scale_f32(const int n, float * y, const float v
265
356
  #if defined(LM_GGML_USE_ACCELERATE)
266
357
  vDSP_vsmul(y, 1, &v, y, 1, n);
267
358
  #elif defined(LM_GGML_SIMD)
268
- const int np = (n & ~(LM_GGML_F32_STEP - 1));
359
+ #if defined(__ARM_FEATURE_SVE)
360
+ const int sve_register_length = lm_ggml_cpu_get_sve_cnt() * 8;
361
+ const int lm_ggml_f32_epr = sve_register_length / 32;//8;//svcntw(); // SVE128:4, SVE256:8, SVE512:16
362
+ const int lm_ggml_f32_step = 2 * lm_ggml_f32_epr;
363
+
364
+ LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
365
+ const int np = (n & ~(lm_ggml_f32_step - 1));
366
+ svfloat32_t ay1;
367
+ svfloat32_t ay2;
368
+ for (int i = 0; i < np; i += lm_ggml_f32_step) {
369
+ ay1 = LM_GGML_F32_VEC_LOAD(y + i);
370
+ ay1 = LM_GGML_F32_VEC_MUL(ay1, vx);
371
+ LM_GGML_F32_VEC_STORE(y + i, ay1);
372
+
373
+ ay2 = LM_GGML_F32_VEC_LOAD(y + i + 1*lm_ggml_f32_epr);
374
+ ay2 = LM_GGML_F32_VEC_MUL(ay2, vx);
375
+ LM_GGML_F32_VEC_STORE(y + i + 1*lm_ggml_f32_epr, ay2);
376
+ }
377
+ // leftovers
378
+ // maximum number of leftover elements will be less that lm_ggml_f32_epr. Apply predicated svmad on available elements only
379
+ if (np < n) {
380
+ svbool_t pg = svwhilelt_b32(np, n);
381
+ ay1 = svld1_f32(pg, y + np);
382
+ ay1 = svmul_f32_m(pg, ay1, vx);
383
+ svst1_f32(pg, y + np, ay1);
384
+ }
385
+ #else
386
+ const int np = (n & ~(LM_GGML_F32_STEP - 1));
269
387
 
270
- LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
388
+ LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
271
389
 
272
- LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
390
+ LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
273
391
 
274
- for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
275
- for (int j = 0; j < LM_GGML_F32_ARR; j++) {
276
- ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
277
- ay[j] = LM_GGML_F32_VEC_MUL(ay[j], vx);
392
+ for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
393
+ for (int j = 0; j < LM_GGML_F32_ARR; j++) {
394
+ ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
395
+ ay[j] = LM_GGML_F32_VEC_MUL(ay[j], vx);
278
396
 
279
- LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
397
+ LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
398
+ }
280
399
  }
281
- }
282
400
 
283
- // leftovers
284
- for (int i = np; i < n; ++i) {
285
- y[i] *= v;
286
- }
401
+ // leftovers
402
+ for (int i = np; i < n; ++i) {
403
+ y[i] *= v;
404
+ }
405
+ #endif
287
406
  #else
288
407
  // scalar
289
408
  for (int i = 0; i < n; ++i) {
@@ -311,12 +430,12 @@ inline static void lm_ggml_vec_scale_f16(const int n, lm_ggml_fp16_t * y, const
311
430
 
312
431
  // leftovers
313
432
  for (int i = np; i < n; ++i) {
314
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i])*v);
433
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(y[i])*v);
315
434
  }
316
435
  #else
317
436
  // scalar
318
437
  for (int i = 0; i < n; ++i) {
319
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i])*v);
438
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(y[i])*v);
320
439
  }
321
440
  #endif
322
441
  }
@@ -325,103 +444,103 @@ inline static void lm_ggml_vec_norm_f32 (const int n, float * s, const float * x
325
444
  inline static void lm_ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
326
445
  inline static void lm_ggml_vec_sqr_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
327
446
  for (int i = 0; i < n; ++i) {
328
- float v = LM_GGML_FP16_TO_FP32(x[i]);
329
- y[i] = LM_GGML_FP32_TO_FP16(v*v);
447
+ float v = LM_GGML_CPU_FP16_TO_FP32(x[i]);
448
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(v*v);
330
449
  }
331
450
  }
332
451
  inline static void lm_ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
333
452
  inline static void lm_ggml_vec_sqrt_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
334
453
  for (int i = 0; i < n; ++i) {
335
- y[i] = LM_GGML_FP32_TO_FP16(sqrtf(LM_GGML_FP16_TO_FP32(x[i])));
454
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(sqrtf(LM_GGML_CPU_FP16_TO_FP32(x[i])));
336
455
  }
337
456
  }
338
457
  inline static void lm_ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); }
339
458
  inline static void lm_ggml_vec_log_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
340
459
  for (int i = 0; i < n; ++i) {
341
- y[i] = LM_GGML_FP32_TO_FP16(logf(LM_GGML_FP16_TO_FP32(x[i])));
460
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(logf(LM_GGML_CPU_FP16_TO_FP32(x[i])));
342
461
  }
343
462
  }
344
463
  inline static void lm_ggml_vec_sin_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sinf(x[i]); }
345
464
  inline static void lm_ggml_vec_sin_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
346
465
  for (int i = 0; i < n; ++i) {
347
- y[i] = LM_GGML_FP32_TO_FP16(sinf(LM_GGML_FP16_TO_FP32(x[i])));
466
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(sinf(LM_GGML_CPU_FP16_TO_FP32(x[i])));
348
467
  }
349
468
  }
350
469
  inline static void lm_ggml_vec_cos_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = cosf(x[i]); }
351
470
  inline static void lm_ggml_vec_cos_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
352
471
  for (int i = 0; i < n; ++i) {
353
- y[i] = LM_GGML_FP32_TO_FP16(cosf(LM_GGML_FP16_TO_FP32(x[i])));
472
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(cosf(LM_GGML_CPU_FP16_TO_FP32(x[i])));
354
473
  }
355
474
  }
356
475
  inline static void lm_ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
357
476
  inline static void lm_ggml_vec_abs_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
358
477
  for (int i = 0; i < n; ++i) {
359
- y[i] = LM_GGML_FP32_TO_FP16(fabsf(LM_GGML_FP16_TO_FP32(x[i])));
478
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(fabsf(LM_GGML_CPU_FP16_TO_FP32(x[i])));
360
479
  }
361
480
  }
362
481
  inline static void lm_ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
363
482
  inline static void lm_ggml_vec_sgn_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
364
483
  for (int i = 0; i < n; ++i) {
365
- float v = LM_GGML_FP16_TO_FP32(x[i]);
366
- y[i] = LM_GGML_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f));
484
+ float v = LM_GGML_CPU_FP16_TO_FP32(x[i]);
485
+ y[i] = LM_GGML_CPU_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f));
367
486
  }
368
487
  }
369
488
  inline static void lm_ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
370
489
  inline static void lm_ggml_vec_step_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
371
490
  for (int i = 0; i < n; ++i) {
372
- y[i] = LM_GGML_FP32_TO_FP16((LM_GGML_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f);
491
+ y[i] = LM_GGML_CPU_FP32_TO_FP16((LM_GGML_CPU_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f);
373
492
  }
374
493
  }
375
494
  inline static void lm_ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
376
495
  inline static void lm_ggml_vec_tanh_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
377
496
  for (int i = 0; i < n; ++i) {
378
- y[i] = LM_GGML_FP32_TO_FP16(tanhf(LM_GGML_FP16_TO_FP32(x[i])));
497
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(tanhf(LM_GGML_CPU_FP16_TO_FP32(x[i])));
379
498
  }
380
499
  }
381
500
  inline static void lm_ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
382
501
  inline static void lm_ggml_vec_elu_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
383
502
  for (int i = 0; i < n; ++i) {
384
- y[i] = LM_GGML_FP32_TO_FP16(expm1f(LM_GGML_FP16_TO_FP32(x[i])));
503
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(expm1f(LM_GGML_CPU_FP16_TO_FP32(x[i])));
385
504
  }
386
505
  }
387
506
  inline static void lm_ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
388
507
  inline static void lm_ggml_vec_relu_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
389
508
  for (int i = 0; i < n; ++i) {
390
- float v = LM_GGML_FP16_TO_FP32(x[i]);
391
- y[i] = LM_GGML_FP32_TO_FP16((v > 0.f) ? v : 0.f);
509
+ float v = LM_GGML_CPU_FP16_TO_FP32(x[i]);
510
+ y[i] = LM_GGML_CPU_FP32_TO_FP16((v > 0.f) ? v : 0.f);
392
511
  }
393
512
  }
394
513
  inline static void lm_ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
395
514
  inline static void lm_ggml_vec_leaky_relu_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x, const float ns) {
396
515
  for (int i = 0; i < n; ++i) {
397
- float v = LM_GGML_FP16_TO_FP32(x[i]);
398
- y[i] = LM_GGML_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
516
+ float v = LM_GGML_CPU_FP16_TO_FP32(x[i]);
517
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
399
518
  }
400
519
  }
401
520
  inline static void lm_ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
402
521
  inline static void lm_ggml_vec_sigmoid_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
403
522
  for (int i = 0; i < n; ++i) {
404
- y[i] = LM_GGML_FP32_TO_FP16(1.f / (1.f + expf(-LM_GGML_FP16_TO_FP32(x[i]))));
523
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(1.f / (1.f + expf(-LM_GGML_CPU_FP16_TO_FP32(x[i]))));
405
524
  }
406
525
  }
407
526
  // TODO: optimize performance
408
527
  inline static void lm_ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
409
528
  inline static void lm_ggml_vec_hardswish_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
410
529
  for (int i = 0; i < n; ++i) {
411
- float v = LM_GGML_FP16_TO_FP32(x[i]);
412
- y[i] = LM_GGML_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f)));
530
+ float v = LM_GGML_CPU_FP16_TO_FP32(x[i]);
531
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f)));
413
532
  }
414
533
  }
415
534
  inline static void lm_ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
416
535
  inline static void lm_ggml_vec_hardsigmoid_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
417
536
  for (int i = 0; i < n; ++i) {
418
- y[i] = LM_GGML_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (LM_GGML_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f)));
537
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (LM_GGML_CPU_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f)));
419
538
  }
420
539
  }
421
540
  inline static void lm_ggml_vec_exp_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = expf(x[i]); }
422
541
  inline static void lm_ggml_vec_exp_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
423
542
  for (int i = 0; i < n; ++i) {
424
- y[i] = LM_GGML_FP32_TO_FP16(expf(LM_GGML_FP16_TO_FP32(x[i])));
543
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(expf(LM_GGML_CPU_FP16_TO_FP32(x[i])));
425
544
  }
426
545
  }
427
546
 
@@ -443,9 +562,9 @@ inline static void lm_ggml_vec_gelu_f16(const int n, lm_ggml_fp16_t * y, const l
443
562
 
444
563
  inline static void lm_ggml_vec_gelu_erf_f16(const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
445
564
  for (int i = 0; i < n; ++i) {
446
- float xi = LM_GGML_FP16_TO_FP32(x[i]);
565
+ float xi = LM_GGML_CPU_FP16_TO_FP32(x[i]);
447
566
  float res = 0.5f*xi*(1.0f + erff(xi*SQRT_2_INV));
448
- y[i] = LM_GGML_FP32_TO_FP16(res);
567
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(res);
449
568
  }
450
569
  }
451
570
 
@@ -458,9 +577,9 @@ inline static void lm_ggml_vec_gelu_f32(const int n, float * y, const float * x)
458
577
  } else if (x[i] >= 10.0f) {
459
578
  y[i] = x[i];
460
579
  } else {
461
- lm_ggml_fp16_t fp16 = LM_GGML_FP32_TO_FP16(x[i]);
580
+ lm_ggml_fp16_t fp16 = LM_GGML_CPU_FP32_TO_FP16(x[i]);
462
581
  memcpy(&t, &fp16, sizeof(uint16_t));
463
- y[i] = LM_GGML_FP16_TO_FP32(lm_ggml_table_gelu_f16[t]);
582
+ y[i] = LM_GGML_CPU_FP16_TO_FP32(lm_ggml_table_gelu_f16[t]);
464
583
  }
465
584
  }
466
585
  }
@@ -494,9 +613,9 @@ inline static float lm_ggml_gelu_quick_f32(float x) {
494
613
  inline static void lm_ggml_vec_gelu_quick_f32(const int n, float * y, const float * x) {
495
614
  uint16_t t;
496
615
  for (int i = 0; i < n; ++i) {
497
- lm_ggml_fp16_t fp16 = LM_GGML_FP32_TO_FP16(x[i]);
616
+ lm_ggml_fp16_t fp16 = LM_GGML_CPU_FP32_TO_FP16(x[i]);
498
617
  memcpy(&t, &fp16, sizeof(uint16_t));
499
- y[i] = LM_GGML_FP16_TO_FP32(lm_ggml_table_gelu_quick_f16[t]);
618
+ y[i] = LM_GGML_CPU_FP16_TO_FP32(lm_ggml_table_gelu_quick_f16[t]);
500
619
  }
501
620
  }
502
621
  #else
@@ -509,8 +628,8 @@ inline static void lm_ggml_vec_gelu_quick_f32(const int n, float * y, const floa
509
628
 
510
629
  inline static void lm_ggml_vec_gelu_quick_f16(const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
511
630
  for (int i = 0; i < n; ++i) {
512
- float v = LM_GGML_FP16_TO_FP32(x[i]);
513
- y[i] = LM_GGML_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v))));
631
+ float v = LM_GGML_CPU_FP16_TO_FP32(x[i]);
632
+ y[i] = LM_GGML_CPU_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v))));
514
633
  }
515
634
  }
516
635
 
@@ -519,8 +638,8 @@ inline static float lm_ggml_silu_f32(float x) {
519
638
  return x/(1.0f + expf(-x));
520
639
  }
521
640
  inline static lm_ggml_fp16_t lm_ggml_silu_f16(lm_ggml_fp16_t x) {
522
- float v = LM_GGML_FP16_TO_FP32(x);
523
- return LM_GGML_FP32_TO_FP16(v/(1.0f + expf(-v)));
641
+ float v = LM_GGML_CPU_FP16_TO_FP32(x);
642
+ return LM_GGML_CPU_FP32_TO_FP16(v/(1.0f + expf(-v)));
524
643
  }
525
644
 
526
645
  #if __FINITE_MATH_ONLY__
@@ -528,6 +647,42 @@ inline static lm_ggml_fp16_t lm_ggml_silu_f16(lm_ggml_fp16_t x) {
528
647
  #error "ref: https://github.com/ggml-org/llama.cpp/pull/7154#issuecomment-2143844461"
529
648
  #endif
530
649
 
650
+ /* Below function was borrowed from the GitHub repository:
651
+ https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp */
652
+ #if defined(__ARM_FEATURE_SVE) && defined(__aarch64__)
653
+ inline static svfloat32_t exp_ps_sve(svbool_t pg, svfloat32_t src) {
654
+ // Constants
655
+ const svfloat32_t log2_e = svdup_n_f32(1.4426950409f);
656
+ const svfloat32_t ln2 = svdup_n_f32(0.6931473921f);
657
+ const svfloat32_t half_ln2_sq = svdup_n_f32(0.2413862043f);
658
+ const svuint32_t not_mask17 = svdup_n_u32(~((1u << 17) - 1));
659
+ const svfloat32_t one = svdup_n_f32(1.0f);
660
+ const svfloat32_t inactive1 = svdup_n_f32(0.0f);
661
+ const svint32_t inactive2 = svdup_n_s32(0);
662
+
663
+ // Algorithm starts here
664
+ svfloat32_t t0 = svmul_f32_m(pg, src, log2_e); // y = x * log2(e)
665
+ svfloat32_t t1 = svrintm_f32_m(inactive1, pg, t0); // rount to int (float)
666
+ svint32_t t2 = svcvt_s32_f32_m(inactive2, pg, t1); // n
667
+
668
+ t1 = svsub_f32_m(pg, t0, t1); // a = y - floor(y)
669
+ t1 = svadd_f32_m(pg, t1, one); // b = a + 1
670
+
671
+ svuint32_t t3 = svlsr_n_u32_m(pg, svreinterpret_u32_f32(t1), 17); // v = b >> 17 (u32)
672
+ svfloat32_t t4 = svexpa_f32(t3); // c = fexpa(v)
673
+ t4 = svscale_f32_m(pg, t4, t2); // fexpa(v) * 2^(n)
674
+
675
+ // and_(t2.d, t1.d, not_mask17.d)
676
+ svfloat32_t t5 = svreinterpret_f32_u32(svand_u32_m(pg, svreinterpret_u32_f32(t1), not_mask17));
677
+ t5 = svsub_f32_m(pg, t1, t5); // z
678
+ t0 = svmla_f32_m(pg, ln2, t5, half_ln2_sq); // ln2 + half_ln2_sq * z
679
+ t0 = svmla_f32_m(pg, one, t5, t0); // 1 + (ln2 * z) + (half_ln2_sq * z * z)
680
+ t0 = svmul_f32_m(pg, t0, t4); // Final result
681
+
682
+ return t0;
683
+ }
684
+ #endif
685
+
531
686
  #if defined(__ARM_NEON) && defined(__aarch64__)
532
687
 
533
688
  // adapted from arm limited optimized routine
@@ -733,9 +888,9 @@ inline static float lm_ggml_silu_backward_f32(float x, float dy) {
733
888
  }
734
889
 
735
890
  inline static lm_ggml_fp16_t lm_ggml_silu_backward_f16(lm_ggml_fp16_t x, lm_ggml_fp16_t dy) {
736
- const float v = LM_GGML_FP16_TO_FP32(x);
891
+ const float v = LM_GGML_CPU_FP16_TO_FP32(x);
737
892
  const float s = 1.0f/(1.0f + expf(-v));
738
- return LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s)));
893
+ return LM_GGML_CPU_FP32_TO_FP16(LM_GGML_CPU_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s)));
739
894
  }
740
895
 
741
896
  inline static void lm_ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) {
@@ -773,7 +928,7 @@ inline static void lm_ggml_vec_sum_f32_ggf(const int n, lm_ggml_float * s, const
773
928
  inline static void lm_ggml_vec_sum_f16_ggf(const int n, float * s, const lm_ggml_fp16_t * x) {
774
929
  float sum = 0.0f;
775
930
  for (int i = 0; i < n; ++i) {
776
- sum += LM_GGML_FP16_TO_FP32(x[i]);
931
+ sum += LM_GGML_CPU_FP16_TO_FP32(x[i]);
777
932
  }
778
933
  *s = sum;
779
934
  }
package/cpp/ggml-cpu.h CHANGED
@@ -101,6 +101,7 @@ extern "C" {
101
101
  LM_GGML_BACKEND_API int lm_ggml_cpu_has_riscv_v (void);
102
102
  LM_GGML_BACKEND_API int lm_ggml_cpu_has_vsx (void);
103
103
  LM_GGML_BACKEND_API int lm_ggml_cpu_has_vxe (void);
104
+ LM_GGML_BACKEND_API int lm_ggml_cpu_has_nnpa (void);
104
105
  LM_GGML_BACKEND_API int lm_ggml_cpu_has_wasm_simd (void);
105
106
  LM_GGML_BACKEND_API int lm_ggml_cpu_has_llamafile (void);
106
107