@novastera-oss/llamarn 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/build-info.cpp +2 -2
  14. package/cpp/llama.cpp/README.md +11 -3
  15. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  16. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  17. package/cpp/llama.cpp/common/arg.cpp +153 -113
  18. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  19. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  20. package/cpp/llama.cpp/common/chat.cpp +847 -699
  21. package/cpp/llama.cpp/common/chat.h +73 -6
  22. package/cpp/llama.cpp/common/common.cpp +50 -82
  23. package/cpp/llama.cpp/common/common.h +21 -17
  24. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  25. package/cpp/llama.cpp/common/json-partial.h +37 -0
  26. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  27. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  28. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  29. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  30. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  31. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  32. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  33. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  34. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  35. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  36. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  37. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  74. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  120. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  121. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  122. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  123. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  124. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  125. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  126. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  127. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  128. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  129. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  130. package/cpp/llama.cpp/include/llama.h +62 -125
  131. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  132. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  133. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  150. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  159. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  160. package/cpp/llama.cpp/models/templates/README.md +2 -0
  161. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  162. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  163. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  164. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  165. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  166. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  167. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  168. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  169. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  170. package/cpp/llama.cpp/src/llama-context.h +30 -0
  171. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  172. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  173. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  174. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  175. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  176. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  177. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  178. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  179. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  180. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  181. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  182. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  183. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  184. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  185. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  186. package/cpp/llama.cpp/src/llama-model.h +6 -1
  187. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  188. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  189. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  190. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  191. package/cpp/llama.cpp/src/llama.cpp +14 -0
  192. package/cpp/rn-completion.cpp +4 -2
  193. package/ios/include/chat.h +73 -6
  194. package/ios/include/common/minja/chat-template.hpp +9 -5
  195. package/ios/include/common/minja/minja.hpp +69 -36
  196. package/ios/include/common.h +21 -17
  197. package/ios/include/llama.h +62 -125
  198. package/ios/libs/llama.xcframework/Info.plist +19 -19
  199. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  200. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  201. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  205. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  206. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  227. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  228. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  229. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  240. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  241. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  242. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  246. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  247. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  253. package/package.json +1 -1
  254. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  255. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  256. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  257. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -84,6 +84,15 @@ static void gelu_quick(const T *x, T *dst, int k,
84
84
  dst[i] = x[i] * (static_cast<T>(1.0f) / (static_cast<T>(1.0f) + sycl::native::exp(GELU_QUICK_COEF * x[i])));
85
85
  }
86
86
 
87
+ template<typename T>
88
+ static void gelu_erf(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) {
89
+ const T SQRT_2_INV = static_cast<T>(0.70710678118654752440084436210484f);
90
+ for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) {
91
+ auto x_i = x[i];
92
+ dst[i] = static_cast<T>(0.5f) * x_i * (static_cast<T>(1.0f) + sycl::erf(x_i * SQRT_2_INV));
93
+ }
94
+ }
95
+
87
96
  template<typename T>
88
97
  static void tanh(const T *x, T *dst, int k,
89
98
  const sycl::nd_item<3> &item_ct1) {
@@ -400,6 +409,20 @@ static void gelu_quick_sycl(const T *x, T *dst, const int k,
400
409
  });
401
410
  }
402
411
 
412
+
413
+ template<typename T>
414
+ static void gelu_erf_sycl(const T *x, T *dst, const int k,
415
+ queue_ptr stream) {
416
+ const int num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
417
+ stream->parallel_for(
418
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) *
419
+ sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
420
+ sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
421
+ [=](sycl::nd_item<3> item_ct1) {
422
+ gelu_erf(x, dst, k, item_ct1);
423
+ });
424
+ }
425
+
403
426
  template<typename T>
404
427
  static void tanh_sycl(const T *x, T *dst, const int k,
405
428
  queue_ptr stream) {
@@ -655,7 +678,6 @@ inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
655
678
  }
656
679
  default:
657
680
  GGML_ABORT("GGML tensor type not supported!\n");
658
- break;
659
681
  }
660
682
  }
661
683
 
@@ -688,7 +710,6 @@ inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
688
710
  }
689
711
  default:
690
712
  GGML_ABORT("GGML tensor type not supported!\n");
691
- break;
692
713
  }
693
714
  }
694
715
 
@@ -722,7 +743,6 @@ inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
722
743
  }
723
744
  default:
724
745
  GGML_ABORT("GGML tensor type not supported!\n");
725
- break;
726
746
  }
727
747
  }
728
748
 
@@ -754,7 +774,6 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
754
774
  }
755
775
  default:
756
776
  GGML_ABORT("GGML tensor type not supported!\n");
757
- break;
758
777
  }
759
778
  }
760
779
 
@@ -786,7 +805,6 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
786
805
  }
787
806
  default:
788
807
  GGML_ABORT("GGML tensor type not supported!\n");
789
- break;
790
808
  }
791
809
  }
792
810
 
@@ -818,10 +836,41 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor
818
836
  }
819
837
  default:
820
838
  GGML_ABORT("GGML tensor type not supported!\n");
821
- break;
822
839
  }
823
840
  }
824
841
 
842
+ inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
843
+ #if defined (GGML_SYCL_F16)
844
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
845
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
846
+ #else
847
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
848
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
849
+ #endif
850
+ GGML_ASSERT(dst->src[0]->type == dst->type);
851
+ dpct::queue_ptr main_stream = ctx.stream();
852
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
853
+ switch (dst->type) {
854
+ #if defined (GGML_SYCL_F16)
855
+ case GGML_TYPE_F16:
856
+ {
857
+ auto data_pts = cast_data<sycl::half>(dst);
858
+ gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
859
+ break;
860
+ }
861
+ #endif
862
+ case GGML_TYPE_F32:
863
+ {
864
+ auto data_pts = cast_data<float>(dst);
865
+ gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
866
+ break;
867
+ }
868
+ default:
869
+ GGML_ABORT("GGML tensor type not supported!\n");
870
+ }
871
+ }
872
+
873
+
825
874
  inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
826
875
  #if defined (GGML_SYCL_F16)
827
876
  GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
@@ -850,7 +899,6 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst
850
899
  }
851
900
  default:
852
901
  GGML_ABORT("GGML tensor type not supported!\n");
853
- break;
854
902
  }
855
903
  }
856
904
 
@@ -883,7 +931,6 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst
883
931
  }
884
932
  default:
885
933
  GGML_ABORT("GGML tensor type not supported!\n");
886
- break;
887
934
  }
888
935
  }
889
936
 
@@ -917,7 +964,6 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tenso
917
964
  }
918
965
  default:
919
966
  GGML_ABORT("GGML tensor type not supported!\n");
920
- break;
921
967
  }
922
968
  }
923
969
 
@@ -949,7 +995,6 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor
949
995
  }
950
996
  default:
951
997
  GGML_ABORT("GGML tensor type not supported!\n");
952
- break;
953
998
  }
954
999
  }
955
1000
 
@@ -981,7 +1026,6 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
981
1026
  }
982
1027
  default:
983
1028
  GGML_ABORT("GGML tensor type not supported!\n");
984
- break;
985
1029
  }
986
1030
  }
987
1031
 
@@ -1013,7 +1057,6 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
1013
1057
  }
1014
1058
  default:
1015
1059
  GGML_ABORT("GGML tensor type not supported!\n");
1016
- break;
1017
1060
  }
1018
1061
  }
1019
1062
 
@@ -1045,7 +1088,6 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *
1045
1088
  }
1046
1089
  default:
1047
1090
  GGML_ABORT("GGML tensor type not supported!\n");
1048
- break;
1049
1091
  }
1050
1092
  }
1051
1093
 
@@ -1078,7 +1120,6 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst
1078
1120
  }
1079
1121
  default:
1080
1122
  GGML_ABORT("GGML tensor type not supported!\n");
1081
- break;
1082
1123
  }
1083
1124
  }
1084
1125
 
@@ -1110,7 +1151,6 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
1110
1151
  }
1111
1152
  default:
1112
1153
  GGML_ABORT("GGML tensor type not supported!\n");
1113
- break;
1114
1154
  }
1115
1155
  }
1116
1156
 
@@ -1142,7 +1182,6 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
1142
1182
  }
1143
1183
  default:
1144
1184
  GGML_ABORT("GGML tensor type not supported!\n");
1145
- break;
1146
1185
  }
1147
1186
  }
1148
1187
 
@@ -1174,7 +1213,6 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst
1174
1213
  }
1175
1214
  default:
1176
1215
  GGML_ABORT("GGML tensor type not supported!\n");
1177
- break;
1178
1216
  }
1179
1217
  }
1180
1218
 
@@ -1206,7 +1244,6 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
1206
1244
  }
1207
1245
  default:
1208
1246
  GGML_ABORT("GGML tensor type not supported!\n");
1209
- break;
1210
1247
  }
1211
1248
  }
1212
1249
 
@@ -1241,7 +1278,6 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor
1241
1278
  }
1242
1279
  default:
1243
1280
  GGML_ABORT("GGML tensor type not supported!\n");
1244
- break;
1245
1281
  }
1246
1282
  }
1247
1283
 
@@ -1273,7 +1309,6 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
1273
1309
  }
1274
1310
  default:
1275
1311
  GGML_ABORT("GGML tensor type not supported!\n");
1276
- break;
1277
1312
  }
1278
1313
  }
1279
1314
 
@@ -1315,7 +1350,6 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor *
1315
1350
  }
1316
1351
  default:
1317
1352
  GGML_ABORT("GGML tensor type not supported!\n");
1318
- break;
1319
1353
  }
1320
1354
  }
1321
1355
 
@@ -1350,7 +1384,6 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
1350
1384
  }
1351
1385
  default:
1352
1386
  GGML_ABORT("GGML tensor type not supported!\n");
1353
- break;
1354
1387
  }
1355
1388
  }
1356
1389
 
@@ -1388,7 +1421,6 @@ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * ds
1388
1421
  }
1389
1422
  default:
1390
1423
  GGML_ABORT("GGML tensor type not supported!\n");
1391
- break;
1392
1424
  }
1393
1425
  }
1394
1426
 
@@ -1414,146 +1446,126 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
1414
1446
 
1415
1447
 
1416
1448
  void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1417
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1449
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1418
1450
  ggml_sycl_op_sqrt(ctx, dst);
1419
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1420
1451
  }
1421
1452
 
1422
1453
  void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1423
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1454
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1424
1455
  ggml_sycl_op_sin(ctx, dst);
1425
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1426
1456
  }
1427
1457
 
1428
1458
  void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1429
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1459
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1430
1460
  ggml_sycl_op_cos(ctx, dst);
1431
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1432
1461
  }
1433
1462
 
1434
1463
  void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1435
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1464
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
1436
1465
  ggml_sycl_op_acc(ctx, dst);
1437
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1438
1466
  }
1439
1467
 
1440
1468
  void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1441
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1469
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1442
1470
  ggml_sycl_op_gelu(ctx, dst);
1443
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1444
1471
  }
1445
1472
 
1446
1473
  void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1447
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1474
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1448
1475
  ggml_sycl_op_silu(ctx, dst);
1449
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1450
1476
  }
1451
1477
 
1452
1478
  void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1453
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1479
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1454
1480
  ggml_sycl_op_gelu_quick(ctx, dst);
1455
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1481
+ }
1482
+
1483
+ void ggml_sycl_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1484
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1485
+ ggml_sycl_op_gelu_erf(ctx, dst);
1456
1486
  }
1457
1487
 
1458
1488
  void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1459
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1489
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1460
1490
  ggml_sycl_op_tanh(ctx, dst);
1461
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1462
1491
  }
1463
1492
 
1464
1493
  void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1465
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1494
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1466
1495
  ggml_sycl_op_relu(ctx, dst);
1467
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1468
1496
  }
1469
1497
 
1470
1498
  void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1471
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1499
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1472
1500
  ggml_sycl_op_sigmoid(ctx, dst);
1473
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1474
1501
  }
1475
1502
 
1476
1503
  void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1477
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1504
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1478
1505
  ggml_sycl_op_hardsigmoid(ctx, dst);
1479
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1480
1506
  }
1481
1507
 
1482
1508
  void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1483
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1509
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1484
1510
  ggml_sycl_op_hardswish(ctx, dst);
1485
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1486
1511
  }
1487
1512
 
1488
-
1489
1513
  void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1490
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1514
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1491
1515
  ggml_sycl_op_exp(ctx, dst);
1492
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1493
1516
  }
1494
1517
 
1495
1518
  void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1496
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1519
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1497
1520
  ggml_sycl_op_log(ctx, dst);
1498
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1499
1521
  }
1500
1522
 
1501
1523
  void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1502
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1524
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1503
1525
  ggml_sycl_op_neg(ctx, dst);
1504
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1505
1526
  }
1506
1527
 
1507
1528
  void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1508
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1529
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1509
1530
  ggml_sycl_op_step(ctx, dst);
1510
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1511
1531
  }
1512
1532
 
1513
1533
  void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1514
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1534
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1515
1535
  ggml_sycl_op_leaky_relu(ctx, dst);
1516
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1517
1536
  }
1518
1537
 
1519
1538
  void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1520
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1539
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1521
1540
  ggml_sycl_op_sqr(ctx, dst);
1522
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1523
1541
  }
1524
1542
 
1525
1543
  void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1526
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1544
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1527
1545
  ggml_sycl_op_upscale(ctx, dst);
1528
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1529
1546
  }
1530
1547
 
1531
1548
  void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1532
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1549
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1533
1550
  ggml_sycl_op_pad(ctx, dst);
1534
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1535
1551
  }
1536
1552
 
1537
1553
  void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1538
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1554
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1539
1555
  ggml_sycl_op_clamp(ctx, dst);
1540
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1541
1556
  }
1542
1557
 
1543
1558
  void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1544
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1559
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1545
1560
  ggml_sycl_op_sgn(ctx, dst);
1546
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1547
1561
  }
1548
1562
 
1549
1563
  void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1550
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1564
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1551
1565
  ggml_sycl_op_abs(ctx, dst);
1552
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1553
1566
  }
1554
1567
 
1555
1568
  void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1556
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
1569
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1557
1570
  ggml_sycl_op_elu(ctx, dst);
1558
- GGML_SYCL_DEBUG("call %s done\n", __func__);
1559
1571
  }
@@ -38,6 +38,8 @@ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
38
38
 
39
39
  void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
40
40
 
41
+ void ggml_sycl_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
42
+
41
43
  void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
42
44
 
43
45
  void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
@@ -32,16 +32,36 @@ public:
32
32
  else static_assert(0);
33
33
  }
34
34
 
35
- static inline void row_gemm(ggml_backend_sycl_context & ctx, bool a_trans, bool b_trans, int m, int n, int k,
36
- const void * a, dt at, const void * b, dt bt, void * c, dt ct, const queue_ptr & q) {
35
+ // matrix A has m rows, k columns
36
+ // matrix B has k rows, n columns
37
+ // nra - number of elements to skip when moving into next row in A
38
+ // nrb - number of elements to skip when moving into next row in B
39
+ // nca - number of elements to skip when moving into next column in A
40
+ // ncb - number of elements to skip when moving into next column in B
41
+ // stride_a - number of elements to skip when moving to next A matrix
42
+ // stride_b - number of elements to skip when moving to next B matrix
43
+ // batches_a - number of A matrices
44
+ // batches_b - number of B matrices
45
+ static void gemm(ggml_backend_sycl_context & ctx, int m, int n, int k,
46
+ const void * a, dt at, dnnl_dim_t nra, dnnl_dim_t nca, dnnl_dim_t stride_a,
47
+ const void * b, dt bt, dnnl_dim_t nrb, dnnl_dim_t ncb, dnnl_dim_t stride_b,
48
+ void * c, dt ct, const queue_ptr & q, dnnl_dim_t batches_a, dnnl_dim_t batches_b) {
49
+
37
50
  auto stream = ctx.stream_dnnl(q);
38
51
  auto eng = ctx.engine_dnnl(q);
39
- dnnl::memory::dims a_dims = { m, k };
40
- dnnl::memory::dims b_dims = { k, n };
41
- dnnl::memory::dims c_dims = { m, n };
42
- const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
43
- const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
44
- const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
52
+
53
+ // { # strides, # rows, # columns }
54
+ dnnl::memory::dims a_dims = { batches_a, m, k };
55
+ dnnl::memory::dims b_dims = { batches_b, k, n };
56
+ dnnl::memory::dims c_dims = { std::max(batches_a, batches_b), m, n };
57
+
58
+ // { # elements to skip to next stride, # elements to skip to next row, # elements to skip to next column }
59
+ dnnl::memory::dims a_strides = { stride_a, nra, nca };
60
+ dnnl::memory::dims b_strides = { stride_b, nrb, ncb };
61
+
62
+ const auto a_in_md = dnnl::memory::desc(a_dims, at, a_strides);
63
+ const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_strides);
64
+ const auto c_md = dnnl::memory::desc(c_dims, ct, tag::abc);
45
65
 
46
66
  dnnl::primitive_attr primitive_attr;
47
67
  primitive_attr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
@@ -63,6 +83,15 @@ public:
63
83
 
64
84
  matmul_prim.execute(stream, matmul_args);
65
85
  }
86
+
87
+ // matrices A and B are column major, both having k rows
88
+ // matrix A has m column, matrix B has n columns
89
+ // output: column major matrix C = A transposed * B
90
+ static void row_gemm(ggml_backend_sycl_context & ctx, int m, int n, int k,
91
+ const void * a, dt at, const void * b, dt bt, void * c, dt ct, const queue_ptr & q) {
92
+
93
+ gemm(ctx, m, n, k, a, at, k, 1, k * m, b, bt, 1, k, n * k, c, ct, q, 1, 1);
94
+ }
66
95
  };
67
96
 
68
97
  #endif
@@ -257,8 +257,7 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
257
257
  GGML_UNUSED(ctx);
258
258
  }
259
259
 
260
- void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
261
-
260
+ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
262
261
  GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
263
262
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
264
263
 
@@ -308,4 +307,3 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
308
307
  GGML_ABORT("fatal error");
309
308
  }
310
309
  }
311
-