cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +35 -7
  2. package/android/src/main/CMakeLists.txt +22 -11
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
  4. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  5. package/android/src/main/jni.cpp +173 -18
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  16. package/cpp/LICENSE +21 -0
  17. package/cpp/chat.cpp +129 -107
  18. package/cpp/chat.h +2 -0
  19. package/cpp/common.cpp +58 -78
  20. package/cpp/common.h +29 -21
  21. package/cpp/ggml-alloc.c +4 -1
  22. package/cpp/ggml-backend.cpp +9 -5
  23. package/cpp/ggml-backend.h +4 -4
  24. package/cpp/ggml-cpp.h +1 -1
  25. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  26. package/cpp/ggml-cpu/amx/amx.h +8 -0
  27. package/cpp/ggml-cpu/amx/common.h +91 -0
  28. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  29. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  30. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
  31. package/cpp/ggml-cpu/common.h +72 -0
  32. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
  33. package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
  34. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
  35. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
  36. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
  37. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
  38. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  39. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
  40. package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
  41. package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
  42. package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
  43. package/cpp/ggml-cpu.h +5 -0
  44. package/cpp/ggml-impl.h +16 -9
  45. package/cpp/ggml-llama-sim.metallib +0 -0
  46. package/cpp/ggml-llama.metallib +0 -0
  47. package/cpp/ggml-metal-impl.h +36 -11
  48. package/cpp/ggml-metal.m +810 -176
  49. package/cpp/ggml-opt.cpp +373 -190
  50. package/cpp/ggml-opt.h +49 -28
  51. package/cpp/ggml-quants.c +0 -6
  52. package/cpp/ggml.c +227 -282
  53. package/cpp/ggml.h +82 -101
  54. package/cpp/gguf.cpp +33 -33
  55. package/cpp/json-schema-to-grammar.cpp +3 -0
  56. package/cpp/llama-adapter.cpp +6 -0
  57. package/cpp/llama-arch.cpp +49 -17
  58. package/cpp/llama-arch.h +9 -0
  59. package/cpp/llama-batch.cpp +8 -2
  60. package/cpp/llama-batch.h +2 -1
  61. package/cpp/llama-chat.cpp +39 -16
  62. package/cpp/llama-chat.h +4 -2
  63. package/cpp/llama-context.cpp +440 -611
  64. package/cpp/llama-context.h +44 -33
  65. package/cpp/llama-cparams.h +1 -0
  66. package/cpp/llama-graph.cpp +214 -291
  67. package/cpp/llama-graph.h +69 -21
  68. package/cpp/llama-hparams.cpp +17 -1
  69. package/cpp/llama-hparams.h +39 -5
  70. package/cpp/llama-kv-cache.cpp +2067 -620
  71. package/cpp/llama-kv-cache.h +410 -108
  72. package/cpp/llama-memory.h +12 -1
  73. package/cpp/llama-model-loader.cpp +24 -15
  74. package/cpp/llama-model-saver.cpp +281 -0
  75. package/cpp/llama-model-saver.h +37 -0
  76. package/cpp/llama-model.cpp +1089 -359
  77. package/cpp/llama-model.h +19 -3
  78. package/cpp/llama-sampling.cpp +20 -7
  79. package/cpp/llama-vocab.cpp +54 -9
  80. package/cpp/llama-vocab.h +6 -0
  81. package/cpp/llama.cpp +14 -0
  82. package/cpp/llama.h +86 -142
  83. package/cpp/minja/chat-template.hpp +9 -5
  84. package/cpp/minja/minja.hpp +69 -36
  85. package/cpp/rn-llama.cpp +602 -190
  86. package/cpp/rn-llama.h +34 -8
  87. package/cpp/sampling.cpp +57 -50
  88. package/cpp/tools/mtmd/clip-impl.h +462 -0
  89. package/cpp/tools/mtmd/clip.cpp +4024 -0
  90. package/cpp/tools/mtmd/clip.h +101 -0
  91. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  92. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  93. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  94. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  95. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  96. package/cpp/tools/mtmd/mtmd.h +362 -0
  97. package/cpp/tools/mtmd/stb_image.h +7988 -0
  98. package/ios/CMakeLists.txt +20 -10
  99. package/ios/RNLlama.h +6 -0
  100. package/ios/RNLlama.mm +82 -3
  101. package/ios/RNLlamaContext.h +5 -1
  102. package/ios/RNLlamaContext.mm +131 -38
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  160. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  161. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  188. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  189. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  190. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  191. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  192. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  193. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  194. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  195. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  196. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  197. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  198. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  199. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  200. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  201. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  202. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  203. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  204. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  205. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  206. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  207. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  208. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  209. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  210. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  211. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  212. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  213. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  214. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  215. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  216. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  217. package/jest/mock.js +33 -7
  218. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  219. package/lib/commonjs/index.js +153 -21
  220. package/lib/commonjs/index.js.map +1 -1
  221. package/lib/module/NativeRNLlama.js.map +1 -1
  222. package/lib/module/index.js +152 -20
  223. package/lib/module/index.js.map +1 -1
  224. package/lib/typescript/NativeRNLlama.d.ts +54 -4
  225. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  226. package/lib/typescript/index.d.ts +72 -6
  227. package/lib/typescript/index.d.ts.map +1 -1
  228. package/package.json +1 -1
  229. package/src/NativeRNLlama.ts +72 -4
  230. package/src/index.ts +212 -38
  231. package/cpp/binary-ops.h +0 -16
  232. package/cpp/ops.h +0 -128
  233. package/cpp/simd-mappings.h +0 -888
  234. package/cpp/unary-ops.h +0 -28
  235. package/cpp/vec.h +0 -802
  236. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  237. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  238. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  239. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  240. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  241. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
  242. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  243. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  244. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
  245. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  246. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  247. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  248. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  249. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  250. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  251. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  271. package/lib/commonjs/chat.js +0 -37
  272. package/lib/commonjs/chat.js.map +0 -1
  273. package/lib/module/chat.js +0 -33
  274. package/lib/module/chat.js.map +0 -1
  275. package/lib/typescript/chat.d.ts +0 -10
  276. package/lib/typescript/chat.d.ts.map +0 -1
  277. package/src/chat.ts +0 -44
  278. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  279. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  280. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  281. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  282. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  283. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  284. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  285. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -1,802 +0,0 @@
1
- // Vectorized functions for fundamental operations
2
-
3
- #pragma once
4
-
5
- #include "ggml-impl.h"
6
- #include "simd-mappings.h"
7
- #include "ggml.h"
8
-
9
- #if defined(LM_GGML_USE_ACCELERATE)
10
- #include <Accelerate/Accelerate.h>
11
- #endif
12
-
13
- // floating point type used to accumulate sums
14
- typedef double lm_ggml_float;
15
-
16
- #define LM_GGML_GELU_FP16
17
- #define LM_GGML_GELU_QUICK_FP16
18
-
19
- #define LM_GGML_SOFT_MAX_UNROLL 4
20
- #define LM_GGML_VEC_DOT_UNROLL 2
21
- #define LM_GGML_VEC_MAD_UNROLL 32
22
-
23
- #ifdef __cplusplus
24
- extern "C" {
25
- #endif
26
-
27
- //
28
- // global data
29
- //
30
-
31
- // precomputed gelu table for f16 (128 KB)
32
- extern lm_ggml_fp16_t lm_ggml_table_gelu_f16[1 << 16];
33
-
34
- // precomputed quick gelu table for f16 (128 KB)
35
- extern lm_ggml_fp16_t lm_ggml_table_gelu_quick_f16[1 << 16];
36
-
37
- //
38
- // fundamental operations
39
- //
40
-
41
- void lm_ggml_vec_dot_f32(int n, float * LM_GGML_RESTRICT s, size_t bs, const float * LM_GGML_RESTRICT x, size_t bx, const float * LM_GGML_RESTRICT y, size_t by, int nrc);
42
- void lm_ggml_vec_dot_bf16(int n, float * LM_GGML_RESTRICT s, size_t bs, lm_ggml_bf16_t * LM_GGML_RESTRICT x, size_t bx, lm_ggml_bf16_t * LM_GGML_RESTRICT y, size_t by, int nrc);
43
- void lm_ggml_vec_dot_f16(int n, float * LM_GGML_RESTRICT s, size_t bs, lm_ggml_fp16_t * LM_GGML_RESTRICT x, size_t bx, lm_ggml_fp16_t * LM_GGML_RESTRICT y, size_t by, int nrc);
44
-
45
- void lm_ggml_vec_silu_f32(const int n, float * y, const float * x);
46
- lm_ggml_float lm_ggml_vec_soft_max_f32(const int n, float * y, const float * x, float max);
47
- lm_ggml_float lm_ggml_vec_log_soft_max_f32(const int n, float * y, const float * x, float max);
48
-
49
- inline static void lm_ggml_vec_set_i8(const int n, int8_t * x, const int8_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
50
- inline static void lm_ggml_vec_set_i16(const int n, int16_t * x, const int16_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
51
-
52
- inline static void lm_ggml_vec_set_i32(const int n, int32_t * x, const int32_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
53
- inline static void lm_ggml_vec_cpy_i32(const int n, int32_t * y, const int32_t * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; }
54
-
55
- inline static void lm_ggml_vec_set_f16(const int n, lm_ggml_fp16_t * x, const lm_ggml_fp16_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
56
- inline static void lm_ggml_vec_set_bf16(const int n, lm_ggml_bf16_t * x, const lm_ggml_bf16_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
57
- inline static void lm_ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; }
58
- inline static void lm_ggml_vec_add_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
59
- for (int i = 0; i < n; ++i) {
60
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) + LM_GGML_FP16_TO_FP32(y[i]));
61
- }
62
- }
63
- inline static void lm_ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; }
64
- inline static void lm_ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] += x[i]; }
65
- inline static void lm_ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] += v; }
66
- inline static void lm_ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; }
67
- inline static void lm_ggml_vec_sub_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
68
- for (int i = 0; i < n; ++i) {
69
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) - LM_GGML_FP16_TO_FP32(y[i]));
70
- }
71
- }
72
- inline static void lm_ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; }
73
- inline static void lm_ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; }
74
- inline static void lm_ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
75
- inline static void lm_ggml_vec_neg_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
76
- for (int i = 0; i < n; ++i) {
77
- y[i] = LM_GGML_FP32_TO_FP16(-LM_GGML_FP16_TO_FP32(x[i]));
78
- }
79
- }
80
-
81
- inline static void lm_ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
82
- inline static void lm_ggml_vec_mul_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
83
- for (int i = 0; i < n; ++i) {
84
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) * LM_GGML_FP16_TO_FP32(y[i]));
85
- }
86
- }
87
- inline static void lm_ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
88
- inline static void lm_ggml_vec_div_f16 (const int n, lm_ggml_fp16_t * z, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * y) {
89
- for (int i = 0; i < n; ++i) {
90
- z[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(x[i]) / LM_GGML_FP16_TO_FP32(y[i]));
91
- }
92
- }
93
-
94
- // compute LM_GGML_VEC_DOT_UNROLL dot products at once
95
- // xs - x row stride in bytes
96
- inline static void lm_ggml_vec_dot_f16_unroll(const int n, const int xs, float * LM_GGML_RESTRICT s, void * LM_GGML_RESTRICT xv, lm_ggml_fp16_t * LM_GGML_RESTRICT y) {
97
- lm_ggml_float sumf[LM_GGML_VEC_DOT_UNROLL] = { 0.0 };
98
-
99
- lm_ggml_fp16_t * LM_GGML_RESTRICT x[LM_GGML_VEC_DOT_UNROLL];
100
-
101
- for (int i = 0; i < LM_GGML_VEC_DOT_UNROLL; ++i) {
102
- x[i] = (lm_ggml_fp16_t *) ((char *) xv + i*xs);
103
- }
104
-
105
- #if defined(LM_GGML_SIMD)
106
- const int np = (n & ~(LM_GGML_F16_STEP - 1));
107
-
108
- LM_GGML_F16_VEC sum[LM_GGML_VEC_DOT_UNROLL][LM_GGML_F16_ARR] = { { LM_GGML_F16_VEC_ZERO } };
109
-
110
- LM_GGML_F16_VEC ax[LM_GGML_F16_ARR];
111
- LM_GGML_F16_VEC ay[LM_GGML_F16_ARR];
112
-
113
- for (int i = 0; i < np; i += LM_GGML_F16_STEP) {
114
- for (int j = 0; j < LM_GGML_F16_ARR; j++) {
115
- ay[j] = LM_GGML_F16_VEC_LOAD(y + i + j*LM_GGML_F16_EPR, j);
116
-
117
- for (int k = 0; k < LM_GGML_VEC_DOT_UNROLL; ++k) {
118
- ax[j] = LM_GGML_F16_VEC_LOAD(x[k] + i + j*LM_GGML_F16_EPR, j);
119
-
120
- sum[k][j] = LM_GGML_F16_VEC_FMA(sum[k][j], ax[j], ay[j]);
121
- }
122
- }
123
- }
124
-
125
- // reduce sum0..sum3 to sum0
126
- for (int k = 0; k < LM_GGML_VEC_DOT_UNROLL; ++k) {
127
- LM_GGML_F16_VEC_REDUCE(sumf[k], sum[k]);
128
- }
129
-
130
- // leftovers
131
- for (int i = np; i < n; ++i) {
132
- for (int j = 0; j < LM_GGML_VEC_DOT_UNROLL; ++j) {
133
- sumf[j] += (lm_ggml_float)(LM_GGML_FP16_TO_FP32(x[j][i])*LM_GGML_FP16_TO_FP32(y[i]));
134
- }
135
- }
136
- #else
137
- for (int i = 0; i < n; ++i) {
138
- for (int j = 0; j < LM_GGML_VEC_DOT_UNROLL; ++j) {
139
- sumf[j] += (lm_ggml_float)(LM_GGML_FP16_TO_FP32(x[j][i])*LM_GGML_FP16_TO_FP32(y[i]));
140
- }
141
- }
142
- #endif
143
-
144
- for (int i = 0; i < LM_GGML_VEC_DOT_UNROLL; ++i) {
145
- s[i] = (float)sumf[i];
146
- }
147
- }
148
-
149
- inline static void lm_ggml_vec_mad_f32(const int n, float * LM_GGML_RESTRICT y, const float * LM_GGML_RESTRICT x, const float v) {
150
- #if defined(LM_GGML_SIMD)
151
- const int np = (n & ~(LM_GGML_F32_STEP - 1));
152
-
153
- LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
154
-
155
- LM_GGML_F32_VEC ax[LM_GGML_F32_ARR];
156
- LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
157
-
158
- for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
159
- for (int j = 0; j < LM_GGML_F32_ARR; j++) {
160
- ax[j] = LM_GGML_F32_VEC_LOAD(x + i + j*LM_GGML_F32_EPR);
161
- ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
162
- ay[j] = LM_GGML_F32_VEC_FMA(ay[j], ax[j], vx);
163
-
164
- LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
165
- }
166
- }
167
-
168
- // leftovers
169
- for (int i = np; i < n; ++i) {
170
- y[i] += x[i]*v;
171
- }
172
- #else
173
- // scalar
174
- for (int i = 0; i < n; ++i) {
175
- y[i] += x[i]*v;
176
- }
177
- #endif
178
- }
179
-
180
- inline static void lm_ggml_vec_mad_f16(const int n, lm_ggml_fp16_t * LM_GGML_RESTRICT y, const lm_ggml_fp16_t * LM_GGML_RESTRICT x, const float v) {
181
- #if defined(LM_GGML_SIMD)
182
- const int np = (n & ~(LM_GGML_F16_STEP - 1));
183
-
184
- LM_GGML_F16_VEC vx = LM_GGML_F16_VEC_SET1(v);
185
-
186
- LM_GGML_F16_VEC ax[LM_GGML_F16_ARR];
187
- LM_GGML_F16_VEC ay[LM_GGML_F16_ARR];
188
-
189
- for (int i = 0; i < np; i += LM_GGML_F16_STEP) {
190
- for (int j = 0; j < LM_GGML_F16_ARR; j++) {
191
- ax[j] = LM_GGML_F16_VEC_LOAD(x + i + j*LM_GGML_F16_EPR, j);
192
- ay[j] = LM_GGML_F16_VEC_LOAD(y + i + j*LM_GGML_F16_EPR, j);
193
- ay[j] = LM_GGML_F16_VEC_FMA(ay[j], ax[j], vx);
194
-
195
- LM_GGML_F16_VEC_STORE(y + i + j*LM_GGML_F16_EPR, ay, j);
196
- }
197
- }
198
-
199
- // leftovers
200
- for (int i = np; i < n; ++i) {
201
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i]) + LM_GGML_FP16_TO_FP32(x[i])*v);
202
- }
203
- #else
204
- // scalar
205
- for (int i = 0; i < n; ++i) {
206
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i]) + LM_GGML_FP16_TO_FP32(x[i])*v);
207
- }
208
- #endif
209
- }
210
-
211
- // xs and vs are byte strides of x and v
212
- inline static void lm_ggml_vec_mad_f32_unroll(const int n, const int xs, const int vs, float * LM_GGML_RESTRICT y, const float * LM_GGML_RESTRICT xv, const float * LM_GGML_RESTRICT vv) {
213
-
214
- const float * LM_GGML_RESTRICT x[LM_GGML_VEC_MAD_UNROLL];
215
- const float * LM_GGML_RESTRICT v[LM_GGML_VEC_MAD_UNROLL];
216
-
217
- for (int i = 0; i < LM_GGML_VEC_MAD_UNROLL; ++i) {
218
- x[i] = (const float *) ((const char *) xv + i*xs);
219
- v[i] = (const float *) ((const char *) vv + i*vs);
220
- }
221
-
222
- #if defined(LM_GGML_SIMD)
223
- const int np = (n & ~(LM_GGML_F32_STEP - 1));
224
-
225
- LM_GGML_F32_VEC vx[LM_GGML_VEC_MAD_UNROLL];
226
-
227
- for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
228
- vx[k] = LM_GGML_F32_VEC_SET1(v[k][0]);
229
- }
230
-
231
- LM_GGML_F32_VEC ax[LM_GGML_VEC_MAD_UNROLL][LM_GGML_F32_ARR];
232
- LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
233
-
234
- for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
235
- for (int j = 0; j < LM_GGML_F32_ARR; j++) {
236
- ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
237
-
238
- for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
239
- ax[k][j] = LM_GGML_F32_VEC_LOAD(x[k] + i + j*LM_GGML_F32_EPR);
240
- ay[j] = LM_GGML_F32_VEC_FMA(ay[j], ax[k][j], vx[k]);
241
- }
242
-
243
- LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
244
- }
245
- }
246
-
247
- // leftovers
248
- for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
249
- for (int i = np; i < n; ++i) {
250
- y[i] += x[k][i]*v[k][0];
251
- }
252
- }
253
- #else
254
- // scalar
255
- for (int k = 0; k < LM_GGML_VEC_MAD_UNROLL; ++k) {
256
- for (int i = 0; i < n; ++i) {
257
- y[i] += x[k][i]*v[k][0];
258
- }
259
- }
260
- #endif
261
- }
262
-
263
- //inline static void lm_ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
264
- inline static void lm_ggml_vec_scale_f32(const int n, float * y, const float v) {
265
- #if defined(LM_GGML_USE_ACCELERATE)
266
- vDSP_vsmul(y, 1, &v, y, 1, n);
267
- #elif defined(LM_GGML_SIMD)
268
- const int np = (n & ~(LM_GGML_F32_STEP - 1));
269
-
270
- LM_GGML_F32_VEC vx = LM_GGML_F32_VEC_SET1(v);
271
-
272
- LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
273
-
274
- for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
275
- for (int j = 0; j < LM_GGML_F32_ARR; j++) {
276
- ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
277
- ay[j] = LM_GGML_F32_VEC_MUL(ay[j], vx);
278
-
279
- LM_GGML_F32_VEC_STORE(y + i + j*LM_GGML_F32_EPR, ay[j]);
280
- }
281
- }
282
-
283
- // leftovers
284
- for (int i = np; i < n; ++i) {
285
- y[i] *= v;
286
- }
287
- #else
288
- // scalar
289
- for (int i = 0; i < n; ++i) {
290
- y[i] *= v;
291
- }
292
- #endif
293
- }
294
-
295
- inline static void lm_ggml_vec_scale_f16(const int n, lm_ggml_fp16_t * y, const float v) {
296
- #if defined(LM_GGML_SIMD)
297
- const int np = (n & ~(LM_GGML_F16_STEP - 1));
298
-
299
- LM_GGML_F16_VEC vx = LM_GGML_F16_VEC_SET1(v);
300
-
301
- LM_GGML_F16_VEC ay[LM_GGML_F16_ARR];
302
-
303
- for (int i = 0; i < np; i += LM_GGML_F16_STEP) {
304
- for (int j = 0; j < LM_GGML_F16_ARR; j++) {
305
- ay[j] = LM_GGML_F16_VEC_LOAD(y + i + j*LM_GGML_F16_EPR, j);
306
- ay[j] = LM_GGML_F16_VEC_MUL(ay[j], vx);
307
-
308
- LM_GGML_F16_VEC_STORE(y + i + j*LM_GGML_F16_EPR, ay, j);
309
- }
310
- }
311
-
312
- // leftovers
313
- for (int i = np; i < n; ++i) {
314
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i])*v);
315
- }
316
- #else
317
- // scalar
318
- for (int i = 0; i < n; ++i) {
319
- y[i] = LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(y[i])*v);
320
- }
321
- #endif
322
- }
323
-
324
- inline static void lm_ggml_vec_norm_f32 (const int n, float * s, const float * x) { lm_ggml_vec_dot_f32(n, s, 0, x, 0, x, 0, 1); *s = sqrtf(*s); }
325
- inline static void lm_ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; }
326
- inline static void lm_ggml_vec_sqr_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
327
- for (int i = 0; i < n; ++i) {
328
- float v = LM_GGML_FP16_TO_FP32(x[i]);
329
- y[i] = LM_GGML_FP32_TO_FP16(v*v);
330
- }
331
- }
332
- inline static void lm_ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); }
333
- inline static void lm_ggml_vec_sqrt_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
334
- for (int i = 0; i < n; ++i) {
335
- y[i] = LM_GGML_FP32_TO_FP16(sqrtf(LM_GGML_FP16_TO_FP32(x[i])));
336
- }
337
- }
338
- inline static void lm_ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); }
339
- inline static void lm_ggml_vec_log_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
340
- for (int i = 0; i < n; ++i) {
341
- y[i] = LM_GGML_FP32_TO_FP16(logf(LM_GGML_FP16_TO_FP32(x[i])));
342
- }
343
- }
344
- inline static void lm_ggml_vec_sin_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sinf(x[i]); }
345
- inline static void lm_ggml_vec_sin_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
346
- for (int i = 0; i < n; ++i) {
347
- y[i] = LM_GGML_FP32_TO_FP16(sinf(LM_GGML_FP16_TO_FP32(x[i])));
348
- }
349
- }
350
- inline static void lm_ggml_vec_cos_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = cosf(x[i]); }
351
- inline static void lm_ggml_vec_cos_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
352
- for (int i = 0; i < n; ++i) {
353
- y[i] = LM_GGML_FP32_TO_FP16(cosf(LM_GGML_FP16_TO_FP32(x[i])));
354
- }
355
- }
356
- inline static void lm_ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
357
- inline static void lm_ggml_vec_abs_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
358
- for (int i = 0; i < n; ++i) {
359
- y[i] = LM_GGML_FP32_TO_FP16(fabsf(LM_GGML_FP16_TO_FP32(x[i])));
360
- }
361
- }
362
- inline static void lm_ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
363
- inline static void lm_ggml_vec_sgn_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
364
- for (int i = 0; i < n; ++i) {
365
- float v = LM_GGML_FP16_TO_FP32(x[i]);
366
- y[i] = LM_GGML_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f));
367
- }
368
- }
369
- inline static void lm_ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
370
- inline static void lm_ggml_vec_step_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
371
- for (int i = 0; i < n; ++i) {
372
- y[i] = LM_GGML_FP32_TO_FP16((LM_GGML_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f);
373
- }
374
- }
375
- inline static void lm_ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
376
- inline static void lm_ggml_vec_tanh_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
377
- for (int i = 0; i < n; ++i) {
378
- y[i] = LM_GGML_FP32_TO_FP16(tanhf(LM_GGML_FP16_TO_FP32(x[i])));
379
- }
380
- }
381
- inline static void lm_ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
382
- inline static void lm_ggml_vec_elu_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
383
- for (int i = 0; i < n; ++i) {
384
- y[i] = LM_GGML_FP32_TO_FP16(expm1f(LM_GGML_FP16_TO_FP32(x[i])));
385
- }
386
- }
387
- inline static void lm_ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
388
- inline static void lm_ggml_vec_relu_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
389
- for (int i = 0; i < n; ++i) {
390
- float v = LM_GGML_FP16_TO_FP32(x[i]);
391
- y[i] = LM_GGML_FP32_TO_FP16((v > 0.f) ? v : 0.f);
392
- }
393
- }
394
- inline static void lm_ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
395
- inline static void lm_ggml_vec_leaky_relu_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x, const float ns) {
396
- for (int i = 0; i < n; ++i) {
397
- float v = LM_GGML_FP16_TO_FP32(x[i]);
398
- y[i] = LM_GGML_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
399
- }
400
- }
401
- inline static void lm_ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
402
- inline static void lm_ggml_vec_sigmoid_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
403
- for (int i = 0; i < n; ++i) {
404
- y[i] = LM_GGML_FP32_TO_FP16(1.f / (1.f + expf(-LM_GGML_FP16_TO_FP32(x[i]))));
405
- }
406
- }
407
- // TODO: optimize performance
408
- inline static void lm_ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
409
- inline static void lm_ggml_vec_hardswish_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
410
- for (int i = 0; i < n; ++i) {
411
- float v = LM_GGML_FP16_TO_FP32(x[i]);
412
- y[i] = LM_GGML_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f)));
413
- }
414
- }
415
- inline static void lm_ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
416
- inline static void lm_ggml_vec_hardsigmoid_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
417
- for (int i = 0; i < n; ++i) {
418
- y[i] = LM_GGML_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (LM_GGML_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f)));
419
- }
420
- }
421
- inline static void lm_ggml_vec_exp_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = expf(x[i]); }
422
- inline static void lm_ggml_vec_exp_f16 (const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
423
- for (int i = 0; i < n; ++i) {
424
- y[i] = LM_GGML_FP32_TO_FP16(expf(LM_GGML_FP16_TO_FP32(x[i])));
425
- }
426
- }
427
-
428
- static const float GELU_COEF_A = 0.044715f;
429
- static const float GELU_QUICK_COEF = -1.702f;
430
- static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
431
-
432
- inline static float lm_ggml_gelu_f32(float x) {
433
- return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x)));
434
- }
435
-
436
- inline static void lm_ggml_vec_gelu_f16(const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
437
- const uint16_t * i16 = (const uint16_t *) x;
438
- for (int i = 0; i < n; ++i) {
439
- y[i] = lm_ggml_table_gelu_f16[i16[i]];
440
- }
441
- }
442
-
443
- #ifdef LM_GGML_GELU_FP16
444
- inline static void lm_ggml_vec_gelu_f32(const int n, float * y, const float * x) {
445
- uint16_t t;
446
- for (int i = 0; i < n; ++i) {
447
- if (x[i] <= -10.0f) {
448
- y[i] = 0.0f;
449
- } else if (x[i] >= 10.0f) {
450
- y[i] = x[i];
451
- } else {
452
- lm_ggml_fp16_t fp16 = LM_GGML_FP32_TO_FP16(x[i]);
453
- memcpy(&t, &fp16, sizeof(uint16_t));
454
- y[i] = LM_GGML_FP16_TO_FP32(lm_ggml_table_gelu_f16[t]);
455
- }
456
- }
457
- }
458
- #else
459
- inline static void lm_ggml_vec_gelu_f32(const int n, float * y, const float * x) {
460
- for (int i = 0; i < n; ++i) {
461
- y[i] = lm_ggml_gelu_f32(x[i]);
462
- }
463
- }
464
- #endif
465
-
466
- inline static float lm_ggml_gelu_quick_f32(float x) {
467
- return x*(1.0f/(1.0f+expf(GELU_QUICK_COEF*x)));
468
- }
469
-
470
- //inline static void lm_ggml_vec_gelu_quick_f16(const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
471
- // const uint16_t * i16 = (const uint16_t *) x;
472
- // for (int i = 0; i < n; ++i) {
473
- // y[i] = lm_ggml_table_gelu_quick_f16[i16[i]];
474
- // }
475
- //}
476
-
477
- #ifdef LM_GGML_GELU_QUICK_FP16
478
- inline static void lm_ggml_vec_gelu_quick_f32(const int n, float * y, const float * x) {
479
- uint16_t t;
480
- for (int i = 0; i < n; ++i) {
481
- lm_ggml_fp16_t fp16 = LM_GGML_FP32_TO_FP16(x[i]);
482
- memcpy(&t, &fp16, sizeof(uint16_t));
483
- y[i] = LM_GGML_FP16_TO_FP32(lm_ggml_table_gelu_quick_f16[t]);
484
- }
485
- }
486
- #else
487
- inline static void lm_ggml_vec_gelu_quick_f32(const int n, float * y, const float * x) {
488
- for (int i = 0; i < n; ++i) {
489
- y[i] = lm_ggml_gelu_quick_f32(x[i]);
490
- }
491
- }
492
- #endif
493
-
494
- inline static void lm_ggml_vec_gelu_quick_f16(const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
495
- for (int i = 0; i < n; ++i) {
496
- float v = LM_GGML_FP16_TO_FP32(x[i]);
497
- y[i] = LM_GGML_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v))));
498
- }
499
- }
500
-
501
- // Sigmoid Linear Unit (SiLU) function
502
- inline static float lm_ggml_silu_f32(float x) {
503
- return x/(1.0f + expf(-x));
504
- }
505
- inline static lm_ggml_fp16_t lm_ggml_silu_f16(lm_ggml_fp16_t x) {
506
- float v = LM_GGML_FP16_TO_FP32(x);
507
- return LM_GGML_FP32_TO_FP16(v/(1.0f + expf(-v)));
508
- }
509
-
510
- #if __FINITE_MATH_ONLY__
511
- #error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
512
- #error "ref: https://github.com/ggml-org/llama.cpp/pull/7154#issuecomment-2143844461"
513
- #endif
514
-
515
- #if defined(__ARM_NEON) && defined(__aarch64__)
516
-
517
- // adapted from arm limited optimized routine
518
- // the maximum error is 1.45358 plus 0.5 ulps
519
- // numbers above 88.38 will flush to infinity
520
- // numbers beneath -103.97 will flush to zero
521
- inline static float32x4_t lm_ggml_v_expf(float32x4_t x) {
522
- const float32x4_t r = vdupq_n_f32(0x1.8p23f);
523
- const float32x4_t z = vfmaq_f32(r, x, vdupq_n_f32(0x1.715476p+0f));
524
- const float32x4_t n = vsubq_f32(z, r);
525
- const float32x4_t b = vfmsq_f32(vfmsq_f32(x, n, vdupq_n_f32(0x1.62e4p-1f)), n,
526
- vdupq_n_f32(0x1.7f7d1cp-20f));
527
- const uint32x4_t e = vshlq_n_u32(vreinterpretq_u32_f32(z), 23);
528
- const float32x4_t k = vreinterpretq_f32_u32(vaddq_u32(e, vreinterpretq_u32_f32(vdupq_n_f32(1))));
529
- const uint32x4_t c = vcagtq_f32(n, vdupq_n_f32(126));
530
- const float32x4_t u = vmulq_f32(b, b);
531
- const float32x4_t j = vfmaq_f32(
532
- vmulq_f32(vdupq_n_f32(0x1.ffffecp-1f), b),
533
- vfmaq_f32(vfmaq_f32(vdupq_n_f32(0x1.fffdb6p-2f), vdupq_n_f32(0x1.555e66p-3f), b),
534
- vfmaq_f32(vdupq_n_f32(0x1.573e2ep-5f), vdupq_n_f32(0x1.0e4020p-7f), b), u), u);
535
- if (!vpaddd_u64(vreinterpretq_u64_u32(c)))
536
- return vfmaq_f32(k, j, k);
537
- const uint32x4_t d = vandq_u32(vclezq_f32(n), vdupq_n_u32(0x82000000));
538
- const float32x4_t s1 = vreinterpretq_f32_u32(vaddq_u32(d, vdupq_n_u32(0x7f000000)));
539
- const float32x4_t s2 = vreinterpretq_f32_u32(vsubq_u32(e, d));
540
- return vbslq_f32(vcagtq_f32(n, vdupq_n_f32(192)), vmulq_f32(s1, s1),
541
- vbslq_f32(c, vmulq_f32(vfmaq_f32(s2, s2, j), s1), vfmaq_f32(k, k, j)));
542
- }
543
-
544
- // computes silu x/(1+exp(-x)) in single precision vector
545
- inline static float32x4_t lm_ggml_v_silu(float32x4_t x) {
546
- const float32x4_t one = vdupq_n_f32(1.0f);
547
- const float32x4_t zero = vdupq_n_f32(0.0f);
548
- const float32x4_t neg_x = vsubq_f32(zero, x);
549
- const float32x4_t exp_neg_x = lm_ggml_v_expf(neg_x);
550
- const float32x4_t one_plus_exp_neg_x = vaddq_f32(one, exp_neg_x);
551
- return vdivq_f32(x, one_plus_exp_neg_x);
552
- }
553
-
554
- #elif defined(__AVX512F__) && defined(__AVX512DQ__)
555
-
556
- // adapted from arm limited optimized routine
557
- // the maximum error is 1.45358 plus 0.5 ulps
558
- // numbers above 88.38 will flush to infinity
559
- // numbers beneath -103.97 will flush to zero
560
- inline static __m512 lm_ggml_v_expf(__m512 x) {
561
- const __m512 r = _mm512_set1_ps(0x1.8p23f);
562
- const __m512 z = _mm512_fmadd_ps(x, _mm512_set1_ps(0x1.715476p+0f), r);
563
- const __m512 n = _mm512_sub_ps(z, r);
564
- const __m512 b =
565
- _mm512_fnmadd_ps(n, _mm512_set1_ps(0x1.7f7d1cp-20f),
566
- _mm512_fnmadd_ps(n, _mm512_set1_ps(0x1.62e4p-1f), x));
567
- const __mmask16 d =
568
- _mm512_cmp_ps_mask(_mm512_abs_ps(n), _mm512_set1_ps(192), _CMP_GT_OQ);
569
- const __m512 u = _mm512_mul_ps(b, b);
570
- const __m512 j = _mm512_fmadd_ps(
571
- _mm512_fmadd_ps(_mm512_fmadd_ps(_mm512_set1_ps(0x1.0e4020p-7f), b,
572
- _mm512_set1_ps(0x1.573e2ep-5f)),
573
- u,
574
- _mm512_fmadd_ps(_mm512_set1_ps(0x1.555e66p-3f), b,
575
- _mm512_set1_ps(0x1.fffdb6p-2f))),
576
- u,
577
- _mm512_fmadd_ps(_mm512_set1_ps(0x1.ffffecp-1f), b, _mm512_set1_ps(1.0F)));
578
- const __m512 res = _mm512_scalef_ps(j, n);
579
- if (_mm512_kortestz(d, d))
580
- return res;
581
- const __m512 zero = _mm512_setzero_ps();
582
- const __m512 alt = _mm512_mask_blend_ps(
583
- _mm512_cmp_ps_mask(n, zero, _CMP_LE_OQ), _mm512_set1_ps(INFINITY), zero);
584
- return _mm512_mask_blend_ps(d, res, alt);
585
- }
586
-
587
- // computes silu x/(1+exp(-x)) in single precision vector
588
- inline static __m512 lm_ggml_v_silu(__m512 x) {
589
- const __m512 one = _mm512_set1_ps(1);
590
- const __m512 zero = _mm512_setzero_ps();
591
- const __m512 neg_x = _mm512_sub_ps(zero, x);
592
- const __m512 exp_neg_x = lm_ggml_v_expf(neg_x);
593
- const __m512 one_plus_exp_neg_x = _mm512_add_ps(one, exp_neg_x);
594
- return _mm512_div_ps(x, one_plus_exp_neg_x);
595
- }
596
-
597
- #elif defined(__AVX2__) && defined(__FMA__)
598
-
599
- // adapted from arm limited optimized routine
600
- // the maximum error is 1.45358 plus 0.5 ulps
601
- // numbers above 88.38 will flush to infinity
602
- // numbers beneath -103.97 will flush to zero
603
- inline static __m256 lm_ggml_v_expf(__m256 x) {
604
- const __m256 r = _mm256_set1_ps(0x1.8p23f);
605
- const __m256 z = _mm256_fmadd_ps(x, _mm256_set1_ps(0x1.715476p+0f), r);
606
- const __m256 n = _mm256_sub_ps(z, r);
607
- const __m256 b = _mm256_fnmadd_ps(n, _mm256_set1_ps(0x1.7f7d1cp-20f),
608
- _mm256_fnmadd_ps(n, _mm256_set1_ps(0x1.62e4p-1f), x));
609
- const __m256i e = _mm256_slli_epi32(_mm256_castps_si256(z), 23);
610
- const __m256 k = _mm256_castsi256_ps(
611
- _mm256_add_epi32(e, _mm256_castps_si256(_mm256_set1_ps(1))));
612
- const __m256i c = _mm256_castps_si256(
613
- _mm256_cmp_ps(_mm256_andnot_ps(_mm256_set1_ps(-0.f), n),
614
- _mm256_set1_ps(126), _CMP_GT_OQ));
615
- const __m256 u = _mm256_mul_ps(b, b);
616
- const __m256 j = _mm256_fmadd_ps(_mm256_fmadd_ps(_mm256_fmadd_ps(_mm256_set1_ps(0x1.0e4020p-7f), b,
617
- _mm256_set1_ps(0x1.573e2ep-5f)), u,
618
- _mm256_fmadd_ps(_mm256_set1_ps(0x1.555e66p-3f), b,
619
- _mm256_set1_ps(0x1.fffdb6p-2f))),
620
- u, _mm256_mul_ps(_mm256_set1_ps(0x1.ffffecp-1f), b));
621
- if (!_mm256_movemask_ps(_mm256_castsi256_ps(c)))
622
- return _mm256_fmadd_ps(j, k, k);
623
- const __m256i g = _mm256_and_si256(
624
- _mm256_castps_si256(_mm256_cmp_ps(n, _mm256_setzero_ps(), _CMP_LE_OQ)),
625
- _mm256_set1_epi32(0x82000000u));
626
- const __m256 s1 =
627
- _mm256_castsi256_ps(_mm256_add_epi32(g, _mm256_set1_epi32(0x7f000000u)));
628
- const __m256 s2 = _mm256_castsi256_ps(_mm256_sub_epi32(e, g));
629
- const __m256i d = _mm256_castps_si256(
630
- _mm256_cmp_ps(_mm256_andnot_ps(_mm256_set1_ps(-0.f), n),
631
- _mm256_set1_ps(192), _CMP_GT_OQ));
632
- return _mm256_or_ps(
633
- _mm256_and_ps(_mm256_castsi256_ps(d), _mm256_mul_ps(s1, s1)),
634
- _mm256_andnot_ps(
635
- _mm256_castsi256_ps(d),
636
- _mm256_or_ps(
637
- _mm256_and_ps(_mm256_castsi256_ps(c),
638
- _mm256_mul_ps(_mm256_fmadd_ps(s2, j, s2), s1)),
639
- _mm256_andnot_ps(_mm256_castsi256_ps(c), _mm256_fmadd_ps(k, j, k)))));
640
- }
641
-
642
- // computes silu x/(1+exp(-x)) in single precision vector
643
- inline static __m256 lm_ggml_v_silu(__m256 x) {
644
- const __m256 one = _mm256_set1_ps(1);
645
- const __m256 zero = _mm256_setzero_ps();
646
- const __m256 neg_x = _mm256_sub_ps(zero, x);
647
- const __m256 exp_neg_x = lm_ggml_v_expf(neg_x);
648
- const __m256 one_plus_exp_neg_x = _mm256_add_ps(one, exp_neg_x);
649
- return _mm256_div_ps(x, one_plus_exp_neg_x);
650
- }
651
-
652
- #elif defined(__SSE2__) // __AVX2__ / __ARM_NEON
653
-
654
- #if defined(__FMA__)
655
- #define MADD128(x, y, z) _mm_fmadd_ps(x, y, z)
656
- #define NMADD128(x, y, z) _mm_fnmadd_ps(x, y, z)
657
- #else
658
- #define MADD128(x, y, z) _mm_add_ps(_mm_mul_ps(x, y), z)
659
- #define NMADD128(x, y, z) _mm_sub_ps(z, _mm_mul_ps(x, y))
660
- #endif
661
-
662
- // adapted from arm limited optimized routine
663
- // the maximum error is 1.45358 plus 0.5 ulps
664
- // numbers above 88.38 will flush to infinity
665
- // numbers beneath -103.97 will flush to zero
666
- inline static __m128 lm_ggml_v_expf(__m128 x) {
667
- const __m128 r = _mm_set1_ps(0x1.8p23f);
668
- const __m128 z = MADD128(x, _mm_set1_ps(0x1.715476p+0f), r);
669
- const __m128 n = _mm_sub_ps(z, r);
670
- const __m128 b =
671
- NMADD128(n, _mm_set1_ps(0x1.7f7d1cp-20f), NMADD128(n, _mm_set1_ps(0x1.62e4p-1f), x));
672
- const __m128i e = _mm_slli_epi32(_mm_castps_si128(z), 23);
673
- const __m128 k = _mm_castsi128_ps(_mm_add_epi32(e, _mm_castps_si128(_mm_set1_ps(1))));
674
- const __m128i c =
675
- _mm_castps_si128(_mm_cmpgt_ps(_mm_andnot_ps(_mm_set1_ps(-0.f), n), _mm_set1_ps(126)));
676
- const __m128 u = _mm_mul_ps(b, b);
677
- const __m128 j =
678
- MADD128(MADD128(MADD128(_mm_set1_ps(0x1.0e4020p-7f), b, _mm_set1_ps(0x1.573e2ep-5f)), u,
679
- MADD128(_mm_set1_ps(0x1.555e66p-3f), b, _mm_set1_ps(0x1.fffdb6p-2f))),
680
- u, _mm_mul_ps(_mm_set1_ps(0x1.ffffecp-1f), b));
681
- if (!_mm_movemask_epi8(c))
682
- return MADD128(j, k, k);
683
- const __m128i g = _mm_and_si128(_mm_castps_si128(_mm_cmple_ps(n, _mm_setzero_ps())),
684
- _mm_set1_epi32(0x82000000u));
685
- const __m128 s1 = _mm_castsi128_ps(_mm_add_epi32(g, _mm_set1_epi32(0x7f000000u)));
686
- const __m128 s2 = _mm_castsi128_ps(_mm_sub_epi32(e, g));
687
- const __m128i d =
688
- _mm_castps_si128(_mm_cmpgt_ps(_mm_andnot_ps(_mm_set1_ps(-0.f), n), _mm_set1_ps(192)));
689
- return _mm_or_ps(
690
- _mm_and_ps(_mm_castsi128_ps(d), _mm_mul_ps(s1, s1)),
691
- _mm_andnot_ps(_mm_castsi128_ps(d),
692
- _mm_or_ps(_mm_and_ps(_mm_castsi128_ps(c), _mm_mul_ps(MADD128(s2, j, s2), s1)),
693
- _mm_andnot_ps(_mm_castsi128_ps(c), MADD128(k, j, k)))));
694
- }
695
-
696
- // computes silu x/(1+exp(-x)) in single precision vector
697
- inline static __m128 lm_ggml_v_silu(__m128 x) {
698
- const __m128 one = _mm_set1_ps(1);
699
- const __m128 zero = _mm_setzero_ps();
700
- const __m128 neg_x = _mm_sub_ps(zero, x);
701
- const __m128 exp_neg_x = lm_ggml_v_expf(neg_x);
702
- const __m128 one_plus_exp_neg_x = _mm_add_ps(one, exp_neg_x);
703
- return _mm_div_ps(x, one_plus_exp_neg_x);
704
- }
705
-
706
- #endif // __ARM_NEON / __AVX2__ / __SSE2__
707
-
708
- inline static void lm_ggml_vec_silu_f16(const int n, lm_ggml_fp16_t * y, const lm_ggml_fp16_t * x) {
709
- for (int i = 0; i < n; ++i) {
710
- y[i] = lm_ggml_silu_f16(x[i]);
711
- }
712
- }
713
-
714
- inline static float lm_ggml_silu_backward_f32(float x, float dy) {
715
- const float s = 1.0f/(1.0f + expf(-x));
716
- return dy*s*(1.0f + x*(1.0f - s));
717
- }
718
-
719
- inline static lm_ggml_fp16_t lm_ggml_silu_backward_f16(lm_ggml_fp16_t x, lm_ggml_fp16_t dy) {
720
- const float v = LM_GGML_FP16_TO_FP32(x);
721
- const float s = 1.0f/(1.0f + expf(-v));
722
- return LM_GGML_FP32_TO_FP16(LM_GGML_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s)));
723
- }
724
-
725
- inline static void lm_ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) {
726
- for (int i = 0; i < n; ++i) {
727
- dx[i] = lm_ggml_silu_backward_f32(x[i], dy[i]);
728
- }
729
- }
730
-
731
- inline static void lm_ggml_vec_silu_backward_f16(const int n, lm_ggml_fp16_t * dx, const lm_ggml_fp16_t * x, const lm_ggml_fp16_t * dy) {
732
- for (int i = 0; i < n; ++i) {
733
- dx[i] = lm_ggml_silu_backward_f16(x[i], dy[i]);
734
- }
735
- }
736
-
737
- inline static void lm_ggml_vec_sum_f32(const int n, float * s, const float * x) {
738
- #ifndef LM_GGML_USE_ACCELERATE
739
- lm_ggml_float sum = 0.0;
740
- for (int i = 0; i < n; ++i) {
741
- sum += (lm_ggml_float)x[i];
742
- }
743
- *s = (float)sum;
744
- #else
745
- vDSP_sve(x, 1, s, n);
746
- #endif
747
- }
748
-
749
- inline static void lm_ggml_vec_sum_f32_ggf(const int n, lm_ggml_float * s, const float * x) {
750
- lm_ggml_float sum = 0.0;
751
- for (int i = 0; i < n; ++i) {
752
- sum += (lm_ggml_float)x[i];
753
- }
754
- *s = sum;
755
- }
756
-
757
- inline static void lm_ggml_vec_sum_f16_ggf(const int n, float * s, const lm_ggml_fp16_t * x) {
758
- float sum = 0.0f;
759
- for (int i = 0; i < n; ++i) {
760
- sum += LM_GGML_FP16_TO_FP32(x[i]);
761
- }
762
- *s = sum;
763
- }
764
-
765
- inline static void lm_ggml_vec_sum_bf16_ggf(const int n, float * s, const lm_ggml_bf16_t * x) {
766
- float sum = 0.0f;
767
- for (int i = 0; i < n; ++i) {
768
- sum += LM_GGML_BF16_TO_FP32(x[i]);
769
- }
770
- *s = sum;
771
- }
772
-
773
- inline static void lm_ggml_vec_max_f32(const int n, float * s, const float * x) {
774
- #ifndef LM_GGML_USE_ACCELERATE
775
- float max = -INFINITY;
776
- for (int i = 0; i < n; ++i) {
777
- max = MAX(max, x[i]);
778
- }
779
- *s = max;
780
- #else
781
- vDSP_maxv(x, 1, s, n);
782
- #endif
783
- }
784
-
785
- inline static void lm_ggml_vec_norm_inv_f32(const int n, float * s, const float * x) {
786
- lm_ggml_vec_norm_f32(n, s, x);
787
- *s = 1.f/(*s);
788
- }
789
-
790
- inline static void lm_ggml_vec_argmax_f32(const int n, int * s, const float * x) {
791
- float max = -INFINITY;
792
- int idx = 0;
793
- for (int i = 0; i < n; ++i) {
794
- max = MAX(max, x[i]);
795
- if (max == x[i]) { idx = i; }
796
- }
797
- *s = idx;
798
- }
799
-
800
- #ifdef __cplusplus
801
- }
802
- #endif