@novastera-oss/llamarn 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/android/src/main/cpp/include/llama.h +141 -38
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  10. package/cpp/LlamaCppModel.cpp +58 -24
  11. package/cpp/LlamaCppModel.h +3 -3
  12. package/cpp/PureCppImpl.cpp +1 -1
  13. package/cpp/PureCppImpl.h +2 -2
  14. package/cpp/build-info.cpp +2 -2
  15. package/cpp/llama.cpp/CMakeLists.txt +15 -4
  16. package/cpp/llama.cpp/Makefile +2 -2
  17. package/cpp/llama.cpp/README.md +32 -13
  18. package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
  19. package/cpp/llama.cpp/common/arg.cpp +37 -6
  20. package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
  21. package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
  22. package/cpp/llama.cpp/common/chat-parser.h +2 -0
  23. package/cpp/llama.cpp/common/chat.cpp +12 -9
  24. package/cpp/llama.cpp/common/chat.h +1 -1
  25. package/cpp/llama.cpp/common/common.cpp +53 -40
  26. package/cpp/llama.cpp/common/common.h +6 -2
  27. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  28. package/cpp/llama.cpp/common/speculative.cpp +6 -4
  29. package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
  30. package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
  31. package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
  32. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  33. package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
  34. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
  35. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
  36. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
  38. package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
  39. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
  40. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  67. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
  70. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
  72. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  88. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
  90. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
  91. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
  93. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
  94. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  95. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
  97. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
  105. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
  112. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
  113. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  115. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  116. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
  117. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  133. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
  134. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
  135. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
  136. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
  138. package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
  139. package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
  140. package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
  141. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
  142. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
  143. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
  144. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  145. package/cpp/llama.cpp/include/llama.h +141 -38
  146. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  147. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
  148. package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
  149. package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
  150. package/cpp/llama.cpp/src/llama-arch.h +25 -1
  151. package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
  152. package/cpp/llama.cpp/src/llama-batch.h +110 -57
  153. package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
  154. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  155. package/cpp/llama.cpp/src/llama-context.cpp +360 -266
  156. package/cpp/llama.cpp/src/llama-context.h +27 -23
  157. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
  158. package/cpp/llama.cpp/src/llama-cparams.h +1 -1
  159. package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
  160. package/cpp/llama.cpp/src/llama-graph.h +126 -58
  161. package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
  162. package/cpp/llama.cpp/src/llama-hparams.h +16 -2
  163. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
  164. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
  165. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
  166. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
  167. package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
  168. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
  169. package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
  170. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
  171. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
  172. package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
  173. package/cpp/llama.cpp/src/llama-memory.h +73 -36
  174. package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
  175. package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
  176. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  177. package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
  178. package/cpp/llama.cpp/src/llama-model.h +26 -0
  179. package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
  180. package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
  181. package/cpp/llama.cpp/src/llama-vocab.h +1 -0
  182. package/cpp/llama.cpp/src/llama.cpp +11 -7
  183. package/cpp/llama.cpp/src/unicode.cpp +5 -0
  184. package/cpp/rn-completion.cpp +2 -2
  185. package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
  186. package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
  187. package/ios/include/chat.h +1 -1
  188. package/ios/include/common.h +6 -2
  189. package/ios/include/llama.h +141 -38
  190. package/ios/libs/llama.xcframework/Info.plist +15 -15
  191. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  192. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  193. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  194. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
  195. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
  196. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  197. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  198. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  199. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  200. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  201. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  202. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  203. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  204. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  205. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  206. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
  207. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  208. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
  209. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
  210. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  211. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  219. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  220. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  221. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  222. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
  223. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
  224. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  225. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  226. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  227. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  228. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  229. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  231. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  232. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  233. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
  234. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  235. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
  236. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
  237. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  238. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  239. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
  240. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
  241. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  242. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  243. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  244. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  245. package/package.json +1 -2
  246. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
  247. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  248. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
  249. package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
  250. package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
  251. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  252. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  253. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -2425,8 +2425,6 @@ void dequantize_row_iq1_m(const block_iq1_m * GGML_RESTRICT x, float * GGML_REST
2425
2425
  }
2426
2426
  }
2427
2427
 
2428
- static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
2429
-
2430
2428
  void dequantize_row_iq4_nl(const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2431
2429
  assert(k % QK4_NL == 0);
2432
2430
  const int64_t nb = k / QK4_NL;
@@ -53,6 +53,9 @@ struct socket_t {
53
53
  }
54
54
  };
55
55
 
56
+ // macro for nicer error messages on server crash
57
+ #define RPC_STATUS_ASSERT(x) if (!(x)) GGML_ABORT("Remote RPC server crashed or returned malformed response")
58
+
56
59
  // all RPC structures must be packed
57
60
  #pragma pack(push, 1)
58
61
  // ggml_tensor is serialized into rpc_tensor
@@ -425,7 +428,7 @@ static bool send_rpc_cmd(const std::shared_ptr<socket_t> & sock, enum rpc_cmd cm
425
428
  static bool check_server_version(const std::shared_ptr<socket_t> & sock) {
426
429
  rpc_msg_hello_rsp response;
427
430
  bool status = send_rpc_cmd(sock, RPC_CMD_HELLO, nullptr, 0, &response, sizeof(response));
428
- GGML_ASSERT(status);
431
+ RPC_STATUS_ASSERT(status);
429
432
  if (response.major != RPC_PROTO_MAJOR_VERSION || response.minor > RPC_PROTO_MINOR_VERSION) {
430
433
  fprintf(stderr, "RPC server version mismatch: %d.%d.%d\n", response.major, response.minor, response.patch);
431
434
  return false;
@@ -481,7 +484,7 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
481
484
  ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
482
485
  rpc_msg_free_buffer_req request = {ctx->remote_ptr};
483
486
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_FREE_BUFFER, &request, sizeof(request), nullptr, 0);
484
- GGML_ASSERT(status);
487
+ RPC_STATUS_ASSERT(status);
485
488
  delete ctx;
486
489
  }
487
490
 
@@ -493,7 +496,7 @@ static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
493
496
  rpc_msg_buffer_get_base_req request = {ctx->remote_ptr};
494
497
  rpc_msg_buffer_get_base_rsp response;
495
498
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_BUFFER_GET_BASE, &request, sizeof(request), &response, sizeof(response));
496
- GGML_ASSERT(status);
499
+ RPC_STATUS_ASSERT(status);
497
500
  ctx->base_ptr = reinterpret_cast<void *>(response.base_ptr);
498
501
  return ctx->base_ptr;
499
502
  }
@@ -545,7 +548,7 @@ static enum ggml_status ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_
545
548
  request.tensor = serialize_tensor(tensor);
546
549
 
547
550
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
548
- GGML_ASSERT(status);
551
+ RPC_STATUS_ASSERT(status);
549
552
  }
550
553
  return GGML_STATUS_SUCCESS;
551
554
  }
@@ -560,7 +563,7 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
560
563
  request.hash = fnv_hash((const uint8_t*)data, size);
561
564
  rpc_msg_set_tensor_hash_rsp response;
562
565
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, &request, sizeof(request), &response, sizeof(response));
563
- GGML_ASSERT(status);
566
+ RPC_STATUS_ASSERT(status);
564
567
  if (response.result) {
565
568
  // the server has the same data, no need to send it
566
569
  return;
@@ -573,7 +576,7 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
573
576
  memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset));
574
577
  memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), data, size);
575
578
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR, input.data(), input.size());
576
- GGML_ASSERT(status);
579
+ RPC_STATUS_ASSERT(status);
577
580
  }
578
581
 
579
582
  static void ggml_backend_rpc_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
@@ -583,7 +586,7 @@ static void ggml_backend_rpc_buffer_get_tensor(ggml_backend_buffer_t buffer, con
583
586
  request.offset = offset;
584
587
  request.size = size;
585
588
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_GET_TENSOR, &request, sizeof(request), data, size);
586
- GGML_ASSERT(status);
589
+ RPC_STATUS_ASSERT(status);
587
590
  }
588
591
 
589
592
  static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
@@ -601,7 +604,7 @@ static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, con
601
604
  request.dst = serialize_tensor(dst);
602
605
  rpc_msg_copy_tensor_rsp response;
603
606
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_COPY_TENSOR, &request, sizeof(request), &response, sizeof(response));
604
- GGML_ASSERT(status);
607
+ RPC_STATUS_ASSERT(status);
605
608
  return response.result;
606
609
  }
607
610
 
@@ -609,7 +612,7 @@ static void ggml_backend_rpc_buffer_clear(ggml_backend_buffer_t buffer, uint8_t
609
612
  ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
610
613
  rpc_msg_buffer_clear_req request = {ctx->remote_ptr, value};
611
614
  bool status = send_rpc_cmd(ctx->sock, RPC_CMD_BUFFER_CLEAR, &request, sizeof(request), nullptr, 0);
612
- GGML_ASSERT(status);
615
+ RPC_STATUS_ASSERT(status);
613
616
  }
614
617
 
615
618
  static ggml_backend_buffer_i ggml_backend_rpc_buffer_interface = {
@@ -635,7 +638,7 @@ static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer(ggml_back
635
638
  rpc_msg_alloc_buffer_rsp response;
636
639
  auto sock = get_socket(buft_ctx->endpoint);
637
640
  bool status = send_rpc_cmd(sock, RPC_CMD_ALLOC_BUFFER, &request, sizeof(request), &response, sizeof(response));
638
- GGML_ASSERT(status);
641
+ RPC_STATUS_ASSERT(status);
639
642
  if (response.remote_ptr != 0) {
640
643
  ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
641
644
  ggml_backend_rpc_buffer_interface,
@@ -650,7 +653,7 @@ static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer(ggml_back
650
653
  static size_t get_alignment(const std::shared_ptr<socket_t> & sock) {
651
654
  rpc_msg_get_alignment_rsp response;
652
655
  bool status = send_rpc_cmd(sock, RPC_CMD_GET_ALIGNMENT, nullptr, 0, &response, sizeof(response));
653
- GGML_ASSERT(status);
656
+ RPC_STATUS_ASSERT(status);
654
657
  return response.alignment;
655
658
  }
656
659
 
@@ -662,7 +665,7 @@ static size_t ggml_backend_rpc_buffer_type_get_alignment(ggml_backend_buffer_typ
662
665
  static size_t get_max_size(const std::shared_ptr<socket_t> & sock) {
663
666
  rpc_msg_get_max_size_rsp response;
664
667
  bool status = send_rpc_cmd(sock, RPC_CMD_GET_MAX_SIZE, nullptr, 0, &response, sizeof(response));
665
- GGML_ASSERT(status);
668
+ RPC_STATUS_ASSERT(status);
666
669
  return response.max_size;
667
670
  }
668
671
 
@@ -683,7 +686,7 @@ static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_ty
683
686
 
684
687
  rpc_msg_get_alloc_size_rsp response;
685
688
  bool status = send_rpc_cmd(sock, RPC_CMD_GET_ALLOC_SIZE, &request, sizeof(request), &response, sizeof(response));
686
- GGML_ASSERT(status);
689
+ RPC_STATUS_ASSERT(status);
687
690
 
688
691
  return response.alloc_size;
689
692
  } else {
@@ -761,7 +764,7 @@ static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, g
761
764
  rpc_msg_graph_compute_rsp response;
762
765
  auto sock = get_socket(rpc_ctx->endpoint);
763
766
  bool status = send_rpc_cmd(sock, RPC_CMD_GRAPH_COMPUTE, input.data(), input.size(), &response, sizeof(response));
764
- GGML_ASSERT(status);
767
+ RPC_STATUS_ASSERT(status);
765
768
  return (enum ggml_status)response.result;
766
769
  }
767
770
 
@@ -835,7 +838,7 @@ bool ggml_backend_is_rpc(ggml_backend_t backend) {
835
838
  static void get_device_memory(const std::shared_ptr<socket_t> & sock, size_t * free, size_t * total) {
836
839
  rpc_msg_get_device_memory_rsp response;
837
840
  bool status = send_rpc_cmd(sock, RPC_CMD_GET_DEVICE_MEMORY, nullptr, 0, &response, sizeof(response));
838
- GGML_ASSERT(status);
841
+ RPC_STATUS_ASSERT(status);
839
842
  *free = response.free_mem;
840
843
  *total = response.total_mem;
841
844
  }
@@ -142,7 +142,7 @@ else()
142
142
  FetchContent_Declare(
143
143
  ONEMATH
144
144
  GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
145
- GIT_TAG c255b1b4c41e2ee3059455c1f96a965d6a62568a
145
+ GIT_TAG 8efe85f5aaebb37f1d8c503b7af66315feabf142
146
146
  )
147
147
  FetchContent_MakeAvailable(ONEMATH)
148
148
  # Create alias to match with find_package targets name
@@ -225,9 +225,9 @@ struct bin_bcast_sycl {
225
225
  dpct::has_capability_or_fail(stream->get_device(),
226
226
  {sycl::aspect::fp16});
227
227
 
228
- stream->parallel_for(
229
- sycl::nd_range<3>(sycl::range<3>(1, 1, block_num) *
230
- sycl::range<3>(1, 1, block_size),
228
+ sycl_parallel_for(
229
+ stream,
230
+ sycl::nd_range<3>(sycl::range<3>(1, 1, block_num) * sycl::range<3>(1, 1, block_size),
231
231
  sycl::range<3>(1, 1, block_size)),
232
232
  [=](sycl::nd_item<3> item_ct1) {
233
233
  k_bin_bcast_unravel<bin_op>(
@@ -246,9 +246,8 @@ struct bin_bcast_sycl {
246
246
  dpct::has_capability_or_fail(stream->get_device(),
247
247
  {sycl::aspect::fp16});
248
248
 
249
- stream->parallel_for(
250
- sycl::nd_range<3>(block_nums * block_dims, block_dims),
251
- [=](sycl::nd_item<3> item_ct1) {
249
+ sycl_parallel_for(
250
+ stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
252
251
  k_bin_bcast<bin_op>(src0_dd, src1_dd, dst_dd, ne0, ne1,
253
252
  ne2, ne3, ne10, ne11, ne12, ne13,
254
253
  s1, s2, s3, s01, s02, s03, s11, s12, s13,
@@ -149,8 +149,6 @@ typedef sycl::float2 dfloat2;
149
149
 
150
150
  #define MMVQ_MAX_BATCH_SIZE 8
151
151
 
152
- static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
153
-
154
152
  static int g_all_sycl_device_count = -1;
155
153
  static bool g_ggml_backend_sycl_buffer_type_initialized = false;
156
154
 
@@ -201,7 +199,7 @@ struct sycl_device_info {
201
199
  // size_t smpb; // max. shared memory per block
202
200
  bool vmm; // virtual memory support
203
201
  size_t total_vram;
204
- sycl_hw_info hw_info;
202
+ //sycl_hw_info hw_info; \\ device id and aarch, currently not used
205
203
  optimize_feature opt_feature;
206
204
  };
207
205
 
@@ -288,29 +286,6 @@ struct ggml_tensor_extra_gpu {
288
286
 
289
287
  void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
290
288
 
291
- inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
292
- optimize_feature opt;
293
-
294
- opt.reorder =
295
- (arch == syclex::architecture::intel_gpu_dg1 ||
296
- arch == syclex::architecture::intel_gpu_acm_g10 ||
297
- arch == syclex::architecture::intel_gpu_acm_g11 ||
298
- arch == syclex::architecture::intel_gpu_acm_g12 ||
299
- arch == syclex::architecture::intel_gpu_pvc ||
300
- arch == syclex::architecture::intel_gpu_pvc_vg ||
301
- arch == syclex::architecture::intel_gpu_mtl_u ||
302
- arch == syclex::architecture::intel_gpu_mtl_s ||
303
- arch == syclex::architecture::intel_gpu_mtl_h ||
304
- arch == syclex::architecture::intel_gpu_arl_u ||
305
- arch == syclex::architecture::intel_gpu_arl_s ||
306
- arch == syclex::architecture::intel_gpu_arl_h ||
307
- arch == syclex::architecture::intel_gpu_bmg_g21 ||
308
- arch == syclex::architecture::intel_gpu_lnl_m
309
- );
310
-
311
- return opt;
312
- }
313
-
314
289
  namespace sycl_ex = sycl::ext::oneapi::experimental;
315
290
  struct ggml_backend_sycl_context {
316
291
  int device;
@@ -515,9 +490,9 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
515
490
 
516
491
  bool gpu_has_xmx(sycl::device &dev);
517
492
 
518
- template <int N, class T> void debug_print_array(const std::string & prefix, const T array[N]) {
493
+ template <int N, class T> std::string debug_get_array_str(const std::string & prefix, const T array[N]) {
519
494
  if (LIKELY(!g_ggml_sycl_debug)) {
520
- return;
495
+ return "";
521
496
  }
522
497
  std::stringstream ss;
523
498
  ss << prefix << "=[";
@@ -528,29 +503,26 @@ template <int N, class T> void debug_print_array(const std::string & prefix, con
528
503
  ss << array[N - 1];
529
504
  }
530
505
  ss << "]";
531
- GGML_SYCL_DEBUG("%s", ss.str().c_str());
506
+ return ss.str();
532
507
  }
533
508
 
534
- inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor,
535
- const std::string & suffix = "") {
536
- if (LIKELY(!g_ggml_sycl_debug)) {
537
- return;
538
- }
539
- GGML_SYCL_DEBUG("%s=", prefix.c_str());
509
+ inline std::string debug_get_tensor_str(const std::string &prefix,
510
+ const ggml_tensor *tensor, const std::string &suffix = "") {
511
+ std::stringstream ss;
512
+ if (LIKELY(!g_ggml_sycl_debug)) { return ss.str(); }
513
+ ss << prefix.c_str() << "=";
540
514
  if (tensor) {
541
- GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));
542
- debug_print_array<GGML_MAX_DIMS>(";ne", tensor->ne);
543
- debug_print_array<GGML_MAX_DIMS>(";nb", tensor->nb);
544
- if (!ggml_is_contiguous(tensor)) {
545
- GGML_SYCL_DEBUG(";strided");
546
- }
547
- if (ggml_is_permuted(tensor)) {
548
- GGML_SYCL_DEBUG(";permuted");
549
- }
515
+ ss << "'" << tensor->name << "':type=" << ggml_type_name(tensor->type);
516
+ ss << debug_get_array_str<GGML_MAX_DIMS>(";ne", tensor->ne);
517
+ ss << debug_get_array_str<GGML_MAX_DIMS>(";nb", tensor->nb);
518
+
519
+ if (!ggml_is_contiguous(tensor)) { ss << ";strided"; }
520
+ if (ggml_is_permuted(tensor)) { ss << ";permuted"; }
550
521
  } else {
551
- GGML_SYCL_DEBUG("nullptr");
522
+ ss << "nullptr";
552
523
  }
553
- GGML_SYCL_DEBUG("%s", suffix.c_str());
524
+ ss << suffix;
525
+ return ss.str();
554
526
  }
555
527
 
556
528
  // Use scope_op_debug_print to log operations coming from running a model
@@ -566,10 +538,10 @@ struct scope_op_debug_print {
566
538
  return;
567
539
  }
568
540
  GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
569
- debug_print_tensor(" dst", dst);
541
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" dst", dst).c_str());
570
542
  if (dst) {
571
543
  for (std::size_t i = 0; i < num_src; ++i) {
572
- debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
544
+ GGML_SYCL_DEBUG("%s", debug_get_tensor_str("\tsrc" + std::to_string(i), dst->src[i]).c_str());
573
545
  }
574
546
  }
575
547
  GGML_SYCL_DEBUG("%s\n", suffix.data());
@@ -89,33 +89,24 @@ static void concat_f32_sycl(const float *x, const float *y, float *dst,
89
89
  sycl::range<3> gridDim(ne2, ne1, num_blocks);
90
90
  switch (dim) {
91
91
  case 0:
92
- stream->parallel_for(
93
- sycl::nd_range<3>(gridDim *
94
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
95
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
96
- [=](sycl::nd_item<3> item_ct1) {
97
- concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1);
98
- });
99
- break;
92
+ sycl_parallel_for(stream,
93
+ sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
94
+ sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
95
+ [=](sycl::nd_item<3> item_ct1) { concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1); });
96
+ break;
100
97
  case 1:
101
- stream->parallel_for(
102
- sycl::nd_range<3>(gridDim *
103
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
104
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
105
- [=](sycl::nd_item<3> item_ct1) {
106
- concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1);
107
- });
108
- break;
98
+ sycl_parallel_for(stream,
99
+ sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
100
+ sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
101
+ [=](sycl::nd_item<3> item_ct1) { concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1); });
102
+ break;
109
103
  // dim >=2 will be dispatched to the default path
110
104
  default:
111
- stream->parallel_for(
112
- sycl::nd_range<3>(gridDim *
113
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
114
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
115
- [=](sycl::nd_item<3> item_ct1) {
116
- concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1);
117
- });
118
- break;
105
+ sycl_parallel_for(stream,
106
+ sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
107
+ sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
108
+ [=](sycl::nd_item<3> item_ct1) { concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1); });
109
+ break;
119
110
  }
120
111
  }
121
112
 
@@ -129,33 +120,29 @@ static void concat_f32_sycl_non_cont(
129
120
  int64_t ne2, int64_t ne3, uint64_t nb0, uint64_t nb1, uint64_t nb2,
130
121
  uint64_t nb3, int32_t dim) {
131
122
  sycl::range<3> gridDim(ne3, ne2, ne1);
132
- stream->parallel_for(
133
- sycl::nd_range<3>(gridDim, sycl::range<3>(1, 1, 1)),
134
- [=](sycl::nd_item<3> item_ct1) {
135
- int64_t i3 = item_ct1.get_group(0);
136
- int64_t i2 = item_ct1.get_group(1);
137
- int64_t i1 = item_ct1.get_group(2);
123
+ sycl_parallel_for(stream, sycl::nd_range<3>(gridDim, sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) {
124
+ int64_t i3 = item_ct1.get_group(0);
125
+ int64_t i2 = item_ct1.get_group(1);
126
+ int64_t i1 = item_ct1.get_group(2);
138
127
 
139
- int64_t o[4] = {0, 0, 0, 0};
140
- o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03));
128
+ int64_t o[4] = { 0, 0, 0, 0 };
129
+ o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03));
141
130
 
142
- const float *x;
131
+ const float * x;
143
132
 
144
- for (int i0 = item_ct1.get_local_id(2); i0 < ne0;
145
- i0 += item_ct1.get_local_range(2)) {
133
+ for (int i0 = item_ct1.get_local_id(2); i0 < ne0; i0 += item_ct1.get_local_range(2)) {
146
134
  if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
147
- x = (const float *)(src0 + (i3)*nb03 + (i2)*nb02 + (i1)*nb01 +
148
- (i0)*nb00);
135
+ x = (const float *) (src0 + (i3) *nb03 + (i2) *nb02 + (i1) *nb01 + (i0) *nb00);
149
136
  } else {
150
- x = (const float *)(src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 +
151
- (i1 - o[1]) * nb11 + (i0 - o[0]) * nb10);
137
+ x = (const float *) (src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 + (i1 - o[1]) * nb11 +
138
+ (i0 - o[0]) * nb10);
152
139
  }
153
140
 
154
141
  float *y = (float *)(dst + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0);
155
142
 
156
143
  *y = *x;
157
- }
158
- });
144
+ }
145
+ });
159
146
  }
160
147
 
161
148
  void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
@@ -59,16 +59,10 @@ static void conv_transpose_1d_f32_f32_sycl(
59
59
  const int num_blocks = (output_size + SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE - 1) / SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE;
60
60
  const sycl::range<3> block_dims(1, 1, SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE);
61
61
  const sycl::range<3> block_nums(1, 1, num_blocks);
62
- stream->parallel_for(
63
- sycl::nd_range<3>(
64
- block_nums * block_dims, block_dims),
65
- [=](sycl::nd_item<3> item_ct1) {
66
- conv_transpose_1d_kernel(
67
- s0, output_size,
68
- src0_ne0, src0_ne1, src0_ne2,
69
- src1_ne0, dst_ne0,
70
- src0, src1, dst, item_ct1);
71
- });
62
+ sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
63
+ conv_transpose_1d_kernel(s0, output_size, src0_ne0, src0_ne1, src0_ne2, src1_ne0, dst_ne0, src0, src1, dst,
64
+ item_ct1);
65
+ });
72
66
  }
73
67
 
74
68
  void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {