@fugood/llama.node 0.6.2 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -10
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/patches/node-api-headers+1.1.0.patch +0 -26
  27. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  28. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  29. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  30. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  31. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  32. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  33. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  37. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  38. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  39. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  40. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  41. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  42. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  43. package/src/llama.cpp/cmake/common.cmake +0 -35
  44. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  45. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  46. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  47. package/src/llama.cpp/docs/build.md +0 -561
  48. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  49. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  50. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  51. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  53. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  54. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  55. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  56. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  57. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  58. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  59. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  61. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  62. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  63. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  64. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  68. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  70. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  71. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  72. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  73. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  74. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  75. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  76. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  77. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  79. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  80. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  81. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  82. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  83. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  84. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  85. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  86. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  87. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  88. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  89. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  90. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  91. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  92. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  93. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  94. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  95. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  96. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  97. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  98. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  99. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  100. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  101. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  102. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  103. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  104. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  105. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  106. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  107. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  108. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  109. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  110. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  111. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  112. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  113. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  114. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  115. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  116. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  117. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  118. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  119. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  120. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  121. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  122. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  123. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  125. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  127. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  128. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  129. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  130. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  131. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  132. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  135. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  136. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  137. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  138. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  139. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  140. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  141. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  142. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  143. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  144. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  145. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  146. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  147. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  148. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  149. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  150. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  151. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  153. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  155. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  157. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  159. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  161. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  163. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  167. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  169. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  170. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  172. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  173. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  175. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  177. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  181. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  183. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  185. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  186. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  187. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  189. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  191. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  193. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  195. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  196. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  198. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  199. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  200. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  204. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  205. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  206. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  207. package/src/llama.cpp/models/.editorconfig +0 -1
  208. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  212. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  214. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  217. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  228. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  231. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  233. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  234. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  242. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  250. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  253. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  256. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  258. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  261. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  262. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  263. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  264. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  265. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  266. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  267. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  268. package/src/llama.cpp/prompts/assistant.txt +0 -31
  269. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  270. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  271. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  273. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  274. package/src/llama.cpp/prompts/chat.txt +0 -28
  275. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  276. package/src/llama.cpp/prompts/dan.txt +0 -1
  277. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  278. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  279. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  280. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  281. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  284. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  285. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  286. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  287. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  289. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  290. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  291. package/src/llama.cpp/requirements.txt +0 -13
  292. package/src/llama.cpp/scripts/build-info.sh +0 -30
  293. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  294. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  295. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  296. package/src/llama.cpp/tests/get-model.cpp +0 -21
  297. package/src/llama.cpp/tests/get-model.h +0 -2
  298. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  299. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  300. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  301. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  302. package/src/llama.cpp/tests/test-c.c +0 -7
  303. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  304. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  305. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  306. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  307. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  308. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  309. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  310. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  311. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  312. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  313. package/src/llama.cpp/tests/test-log.cpp +0 -39
  314. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  315. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  316. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  317. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  318. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  319. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  320. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  321. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  322. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  323. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  324. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  325. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  326. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  327. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  328. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  329. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  330. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  331. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  332. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  333. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  334. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  335. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  336. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  337. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  338. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  339. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  340. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  341. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  342. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  343. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  344. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  345. package/src/llama.cpp/tools/main/main.cpp +0 -977
  346. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  347. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  348. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  349. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  350. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  351. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  353. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  354. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  355. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  356. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  357. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  358. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  360. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  361. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  362. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  363. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  364. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  365. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  367. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  368. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  369. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  370. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  371. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  372. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  373. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  374. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  375. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  376. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  377. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  378. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,189 +0,0 @@
1
- message(STATUS "GGML_SYCL_TARGET=${GGML_SYCL_TARGET}")
2
-
3
- if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$")
4
- message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD")
5
- endif()
6
-
7
- check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL)
8
-
9
- if (DEFINED ENV{ONEAPI_ROOT})
10
- message(STATUS "Using oneAPI Release SYCL compiler (icpx).")
11
- elseif(SUPPORTS_SYCL)
12
- message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}.
13
- If you expected the oneAPI Release compiler, please install oneAPI & source it, like:
14
- source /opt/intel/oneapi/setvars.sh")
15
- else()
16
- message(FATAL_ERROR, "C++ compiler lacks SYCL support.")
17
- endif()
18
- message(STATUS "SYCL found")
19
- #todo: AOT
20
-
21
- ggml_add_backend_library(ggml-sycl
22
- ggml-sycl.cpp
23
- ../../include/ggml-sycl.h
24
- )
25
-
26
- file(GLOB GGML_HEADERS_SYCL "*.hpp")
27
- file(GLOB GGML_SOURCES_SYCL "*.cpp")
28
- target_sources(ggml-sycl PRIVATE ${GGML_HEADERS_SYCL} ${GGML_SOURCES_SYCL})
29
-
30
- if (WIN32)
31
- # To generate a Visual Studio solution, using Intel C++ Compiler for ggml-sycl is mandatory
32
- if( ${CMAKE_GENERATOR} MATCHES "Visual Studio" AND NOT (${CMAKE_GENERATOR_TOOLSET} MATCHES "Intel C"))
33
- set_target_properties(ggml-sycl PROPERTIES VS_PLATFORM_TOOLSET "Intel C++ Compiler 2025")
34
- set(CMAKE_CXX_COMPILER "icx")
35
- set(CMAKE_CXX_COMPILER_ID "IntelLLVM")
36
- endif()
37
- endif()
38
-
39
- find_package(IntelSYCL)
40
- if (IntelSYCL_FOUND)
41
- # Use oneAPI CMake when possible
42
- target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
43
- else()
44
- # Fallback to the simplest way of enabling SYCL when using intel/llvm nightly for instance
45
- target_compile_options(ggml-sycl PRIVATE "-fsycl")
46
- target_link_options(ggml-sycl PRIVATE "-fsycl")
47
- endif()
48
-
49
- target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing")
50
-
51
- # Link against oneDNN
52
- set(GGML_SYCL_DNNL 0)
53
- if(GGML_SYCL_DNN)
54
- find_package(DNNL)
55
- if(DNNL_FOUND)
56
- if (NOT DEFINED DNNL_GPU_VENDOR)
57
- # default to intel target
58
- set(DNNL_GPU_VENDOR "INTEL")
59
- if(NOT "${GGML_SYCL_TARGET}" STREQUAL "INTEL")
60
- message(WARNING "oneDNN builds bundled with oneapi release only support INTEL target")
61
- endif()
62
- endif()
63
-
64
- # Verify oneDNN was compiled for the same target as llama
65
- if("${GGML_SYCL_TARGET}" STREQUAL "${DNNL_GPU_VENDOR}")
66
- target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
67
- set(GGML_SYCL_DNNL 1)
68
- get_target_property(CONFIGS DNNL::dnnl IMPORTED_CONFIGURATIONS)
69
- foreach(CONFIG ${CONFIGS})
70
- get_target_property(DNNL_LIB DNNL::dnnl IMPORTED_LOCATION_${CONFIG})
71
- message(STATUS "Found oneDNN: ${DNNL_LIB}")
72
- endforeach()
73
- else()
74
- message(WARNING
75
- "oneDNN must be compiled for the same target as llama.cpp.
76
- llama.cpp: ${GGML_SYCL_TARGET}, oneDNN: ${DNNL_GPU_VENDOR}.
77
- Disabling oneDNN support.")
78
- endif()
79
- else()
80
- message(STATUS "oneDNN not found, disabling oneDNN support")
81
- endif()
82
- else()
83
- message(STATUS "oneDNN support disabled by the user")
84
- endif()
85
- target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_DNNL=${GGML_SYCL_DNNL})
86
-
87
- if (GGML_SYCL_F16)
88
- if (GGML_SYCL_TARGET STREQUAL "AMD")
89
- message(WARNING "AMD target does not entirely support FP16 in the SYCL backend.")
90
- endif()
91
- add_compile_definitions(GGML_SYCL_F16)
92
- endif()
93
-
94
- if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
95
- add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
96
- elseif (GGML_SYCL_TARGET STREQUAL "AMD")
97
- # INFO: Allowed Sub_group_sizes are not consistent through all
98
- # hip targets. For example, 64 is used for certain models, but the backend
99
- # does not support it.
100
- # Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
101
- add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
102
- else()
103
- add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
104
- endif()
105
-
106
- if (GGML_SYCL_GRAPH)
107
- target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GRAPH)
108
- endif()
109
-
110
- # Link against Intel oneMKL or oneMath
111
- if (GGML_SYCL_TARGET STREQUAL "INTEL")
112
- # Intel devices use Intel oneMKL directly instead of oneMath to avoid the limitation of linking Intel oneMKL statically
113
- # See https://github.com/uxlfoundation/oneMath/issues/654
114
- if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
115
- set(SYCL_COMPILER ON)
116
- endif()
117
- find_package(MKL REQUIRED)
118
- target_link_libraries(ggml-sycl PRIVATE MKL::MKL_SYCL::BLAS)
119
- target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_USE_INTEL_ONEMKL)
120
- else()
121
- find_package(oneMath QUIET)
122
- if (NOT oneMath_FOUND)
123
- message(STATUS "oneMath not found: oneMath will be automatically downloaded")
124
- # Use FetchContent to automatically pull and build oneMath
125
- include(FetchContent)
126
- set(BUILD_FUNCTIONAL_TESTS False)
127
- set(BUILD_EXAMPLES False)
128
- set(TARGET_DOMAINS blas)
129
- if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
130
- set(ENABLE_MKLCPU_BACKEND False)
131
- set(ENABLE_MKLGPU_BACKEND False)
132
- set(ENABLE_CUBLAS_BACKEND True)
133
- elseif (GGML_SYCL_TARGET STREQUAL "AMD")
134
- set(ENABLE_MKLCPU_BACKEND False)
135
- set(ENABLE_MKLGPU_BACKEND False)
136
- set(ENABLE_ROCBLAS_BACKEND True)
137
- # Ensure setting a string variable here is not overriden by oneMath CACHE variables
138
- cmake_policy(SET CMP0126 NEW)
139
- # Setting the device architecture is only needed and useful for AMD devices in oneMath
140
- set(HIP_TARGETS ${GGML_SYCL_DEVICE_ARCH} CACHE STRING "oneMath HIP target" FORCE)
141
- endif()
142
- FetchContent_Declare(
143
- ONEMATH
144
- GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
145
- GIT_TAG c255b1b4c41e2ee3059455c1f96a965d6a62568a
146
- )
147
- FetchContent_MakeAvailable(ONEMATH)
148
- # Create alias to match with find_package targets name
149
- function(onemath_alias target)
150
- if (TARGET ${target}_obj)
151
- # Silence verbose warnings from external libraries
152
- target_compile_options(${target}_obj PRIVATE -w)
153
- endif()
154
- if (TARGET ${target})
155
- add_library(ONEMATH::${target} ALIAS ${target})
156
- endif()
157
- endfunction()
158
- onemath_alias(onemath)
159
- onemath_alias(onemath_blas_mklcpu)
160
- onemath_alias(onemath_blas_mklgpu)
161
- onemath_alias(onemath_blas_cublas)
162
- onemath_alias(onemath_blas_rocblas)
163
- endif()
164
-
165
- # Below oneMath compile-time dispatching is used for better performance
166
- if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
167
- target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas)
168
- target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
169
- target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
170
- target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
171
- elseif (GGML_SYCL_TARGET STREQUAL "AMD")
172
- if (NOT GGML_SYCL_DEVICE_ARCH)
173
- message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
174
- endif()
175
- target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
176
- target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
177
- target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
178
- target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD)
179
- else()
180
- # Fallback to oneMath runtime dispatcher
181
- target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath)
182
- target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC)
183
- endif()
184
- endif()
185
-
186
- if (GGML_SYCL_DEVICE_ARCH)
187
- target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
188
- target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
189
- endif()
@@ -1,37 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_BACKEND_HPP
14
- #define GGML_SYCL_BACKEND_HPP
15
-
16
- #include "binbcast.hpp"
17
- #include "common.hpp"
18
- #include "concat.hpp"
19
- #include "conv.hpp"
20
- #include "convert.hpp"
21
- #include "cpy.hpp"
22
- #include "dequantize.hpp"
23
- #include "dmmv.hpp"
24
- #include "element_wise.hpp"
25
- #include "gla.hpp"
26
- #include "im2col.hpp"
27
- #include "mmq.hpp"
28
- #include "mmvq.hpp"
29
- #include "norm.hpp"
30
- #include "outprod.hpp"
31
- #include "quants.hpp"
32
- #include "rope.hpp"
33
- #include "softmax.hpp"
34
- #include "tsembd.hpp"
35
- #include "wkv.hpp"
36
-
37
- #endif // GGML_SYCL_BACKEND_HPP
@@ -1,239 +0,0 @@
1
- #include "binbcast.hpp"
2
-
3
- #include <array>
4
- #include <cstddef>
5
- #include <cstdint>
6
- #include <sycl/sycl.hpp>
7
-
8
- #include "dpct/helper.hpp"
9
- #include "ggml.h"
10
-
11
- template <float (*bin_op)(const float, const float), typename src0_t, typename src1_t, typename dst_t>
12
- static __dpct_inline__ void k_bin_bcast_contiguous(const src0_t * __restrict__ src0, const src1_t * __restrict__ src1,
13
- dst_t * dst, std::size_t num_elements, const sycl::nd_item<1> & it) {
14
- auto element_id = it.get_global_id(0);
15
- auto global_range = it.get_global_range(0);
16
- for (; element_id < num_elements; element_id += global_range) {
17
- auto src0_float_val = sycl::vec(src0[element_id]).template convert<float, sycl::rounding_mode::rte>();
18
- auto src1_float_val = sycl::vec(src1[element_id]).template convert<float, sycl::rounding_mode::rte>();
19
- float dst_val = bin_op(src0_float_val[0], src1_float_val[0]);
20
- auto val_to_store = sycl::vec(dst_val).template convert<dst_t, sycl::rounding_mode::rte>();
21
- dst[element_id] = val_to_store;
22
- }
23
- }
24
-
25
- template <float (*bin_op)(const float, const float), typename src0_t, typename src1_t, typename dst_t>
26
- static __dpct_inline__ void k_bin_bcast(const src0_t * __restrict__ src0, const src1_t * __restrict__ src1, dst_t * dst,
27
- int ne0, int ne1, int ne2, int ne3, int ne10, int ne11, int ne12, int ne13,
28
- int s0, int s1, int s2, int s3, int s00, int s01, int s02, int s03, int s10,
29
- int s11, int s12, int s13, std::size_t num_dst_elements,
30
- const sycl::nd_item<1> & item_ct1) {
31
- auto calculate_logical_index =
32
- [](const std::array<int, 4> & dims, std::size_t element_id) __attribute__((always_inline))->std::array<int, 4> {
33
- std::array<int, 4> logical_index;
34
- #pragma unroll(4)
35
- for (int i = 3; i >= 0; i--) {
36
- logical_index[i] = element_id % dims[i];
37
- element_id /= dims[i];
38
- }
39
- return logical_index;
40
- };
41
-
42
- auto calculate_index = [](const std::array<int, 4> & dims, const std::array<int, 4> & strides,
43
- const std::array<int, 4> & indices) __attribute__((always_inline))
44
- ->std::size_t {
45
- std::size_t index = 0;
46
- #pragma unroll(4)
47
- for (int i = 0; i < 4; i++) {
48
- auto index_i = indices[i];
49
- if (indices[i] >= dims[i]) {
50
- index_i = indices[i] % dims[i];
51
- }
52
- index += strides[i] * index_i;
53
- }
54
- return index;
55
- };
56
-
57
- auto element_id = item_ct1.get_global_id(0);
58
- for (; element_id < num_dst_elements; element_id += item_ct1.get_global_range(0)) {
59
- auto logical_index = calculate_logical_index({ ne3, ne2, ne1, ne0 }, element_id);
60
- auto src_0_index = calculate_index({ ne3, ne2, ne1, ne0 }, { s03, s02, s01, s00 }, logical_index);
61
- auto src_1_index = calculate_index({ ne13, ne12, ne11, ne10 }, { s13, s12, s11, s10 }, logical_index);
62
- auto dst_index = calculate_index({ ne3, ne2, ne1, ne0 }, { s3, s2, s1, s0 }, logical_index);
63
- auto src0_float_val = sycl::vec(src0[src_0_index]).template convert<float, sycl::rounding_mode::rte>();
64
- auto src1_float_val = sycl::vec(src1[src_1_index]).template convert<float, sycl::rounding_mode::rte>();
65
- float dst_val = bin_op(src0_float_val[0], src1_float_val[0]);
66
- auto val_to_store = sycl::vec(dst_val).template convert<dst_t, sycl::rounding_mode::rte>();
67
- dst[dst_index] = val_to_store;
68
- }
69
- }
70
-
71
- template <float (*bin_op)(const float, const float)> struct bin_bcast_sycl {
72
- template <typename src0_t, typename src1_t, typename dst_t>
73
- void operator()(const src0_t * src0_dd, const src1_t * src1_dd, dst_t * dst_dd, const int64_t ne00,
74
- const int64_t ne01, const int64_t ne02, const int64_t ne03, const int64_t ne10, const int64_t ne11,
75
- const int64_t ne12, const int64_t ne13, const int64_t ne0, const int64_t ne1, const int64_t ne2,
76
- const int64_t ne3, const size_t nb00, const size_t nb01, const size_t nb02, const size_t nb03,
77
- const size_t nb10, const size_t nb11, const size_t nb12, const size_t nb13, const size_t nb0,
78
- const size_t nb1, const size_t nb2, const size_t nb3, const bool src0_is_contiguous,
79
- const bool src1_is_contiguous, const bool dst_is_contiguous, queue_ptr stream) {
80
- auto check_bcast_required = [](const std::array<int64_t, 4> & src_dims,
81
- const std::array<int64_t, 4> & dst_dims) -> bool {
82
- for (int i = 0; i < 4; i++) {
83
- if (dst_dims[i] > src_dims[i]) {
84
- return true;
85
- }
86
- }
87
- return false;
88
- };
89
-
90
- dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 });
91
-
92
- GGML_ASSERT(nb0 % sizeof(dst_t) == 0);
93
- GGML_ASSERT(nb1 % sizeof(dst_t) == 0);
94
- GGML_ASSERT(nb2 % sizeof(dst_t) == 0);
95
- GGML_ASSERT(nb3 % sizeof(dst_t) == 0);
96
-
97
- GGML_ASSERT(nb00 % sizeof(src0_t) == 0);
98
- GGML_ASSERT(nb01 % sizeof(src0_t) == 0);
99
- GGML_ASSERT(nb02 % sizeof(src0_t) == 0);
100
- GGML_ASSERT(nb03 % sizeof(src0_t) == 0);
101
-
102
- GGML_ASSERT(nb10 % sizeof(src1_t) == 0);
103
- GGML_ASSERT(nb11 % sizeof(src1_t) == 0);
104
- GGML_ASSERT(nb12 % sizeof(src1_t) == 0);
105
- GGML_ASSERT(nb13 % sizeof(src1_t) == 0);
106
-
107
- // dst strides in number of elements
108
- size_t s0 = nb0 / sizeof(dst_t);
109
- size_t s1 = nb1 / sizeof(dst_t);
110
- size_t s2 = nb2 / sizeof(dst_t);
111
- size_t s3 = nb3 / sizeof(dst_t);
112
-
113
- // src1 strides in number of elements
114
- size_t s10 = nb10 / sizeof(src0_t);
115
- size_t s11 = nb11 / sizeof(src1_t);
116
- size_t s12 = nb12 / sizeof(src1_t);
117
- size_t s13 = nb13 / sizeof(src1_t);
118
-
119
- // src0 strides in number of elements
120
- size_t s00 = nb00 / sizeof(src0_t);
121
- size_t s01 = nb01 / sizeof(src0_t);
122
- size_t s02 = nb02 / sizeof(src0_t);
123
- size_t s03 = nb03 / sizeof(src0_t);
124
-
125
- std::size_t num_dst_elements = static_cast<std::size_t>(ne0) * static_cast<std::size_t>(ne1) *
126
- static_cast<std::size_t>(ne2) * static_cast<std::size_t>(ne3);
127
- std::size_t local_range = 256;
128
- std::size_t global_range = ceil_div(num_dst_elements, local_range) * local_range;
129
-
130
- bool needs_broadcasting = check_bcast_required({ ne00, ne01, ne02, ne03 }, { ne0, ne1, ne2, ne3 }) ||
131
- check_bcast_required({ ne10, ne11, ne12, ne13 }, { ne0, ne1, ne2, ne3 });
132
- bool all_contiguous = src0_is_contiguous && src1_is_contiguous && dst_is_contiguous;
133
-
134
- if (! needs_broadcasting && all_contiguous) {
135
- stream->submit([&](sycl::handler & cgh) {
136
- cgh.parallel_for(sycl::nd_range<1>({ global_range }, { local_range }), [=](sycl::nd_item<1> it) {
137
- k_bin_bcast_contiguous<bin_op>(src0_dd, src1_dd, dst_dd, num_dst_elements, it);
138
- });
139
- });
140
- } else {
141
- stream->submit([&](sycl::handler & cgh) {
142
- cgh.parallel_for(sycl::nd_range<1>({ global_range }, { local_range }), [=](sycl::nd_item<1> it) {
143
- k_bin_bcast<bin_op>(src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3, ne10, ne11, ne12, ne13, s0, s1,
144
- s2, s3, s00, s01, s02, s03, s10, s11, s12, s13, num_dst_elements, it);
145
- });
146
- });
147
- }
148
- }
149
- };
150
-
151
- template <class op>
152
- inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1,
153
- ggml_tensor * dst) {
154
- dpct::queue_ptr main_stream = ctx.stream();
155
- GGML_TENSOR_BINARY_OP_LOCALS
156
-
157
- if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
158
- op()((const float *) src0->data, (const float *) src1->data, (float *) dst->data, ne00, ne01, ne02, ne03, ne10,
159
- ne11, ne12, ne13, ne0, ne1, ne2, ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2, nb3,
160
- ggml_is_contiguous(src0), ggml_is_contiguous(src1), ggml_is_contiguous(dst), main_stream);
161
- } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
162
- op()((const sycl::half *) src0->data, (const sycl::half *) src1->data, (sycl::half *) dst->data, ne00, ne01,
163
- ne02, ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2, ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13,
164
- nb0, nb1, nb2, nb3, ggml_is_contiguous(src0), ggml_is_contiguous(src1), ggml_is_contiguous(dst),
165
- main_stream);
166
- } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F16) {
167
- op()((const sycl::half *) src0->data, (const float *) src1->data, (sycl::half *) dst->data, ne00, ne01, ne02,
168
- ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2, ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1,
169
- nb2, nb3, ggml_is_contiguous(src0), ggml_is_contiguous(src1), ggml_is_contiguous(dst), main_stream);
170
- } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32 && dst->type == GGML_TYPE_I32) {
171
- op()((const int32_t *) src0->data, (const int32_t *) src1->data, (int32_t *) dst->data, ne00, ne01, ne02, ne03,
172
- ne10, ne11, ne12, ne13, ne0, ne1, ne2, ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2,
173
- nb3, ggml_is_contiguous(src0), ggml_is_contiguous(src1), ggml_is_contiguous(dst), main_stream);
174
- } else if (src0->type == GGML_TYPE_I16 && src1->type == GGML_TYPE_I16 && dst->type == GGML_TYPE_I16) {
175
- op()((const int16_t *) src0->data, (const int16_t *) src1->data, (int16_t *) dst->data, ne00, ne01, ne02, ne03,
176
- ne10, ne11, ne12, ne13, ne0, ne1, ne2, ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2,
177
- nb3, ggml_is_contiguous(src0), ggml_is_contiguous(src1), ggml_is_contiguous(dst), main_stream);
178
- } else {
179
- fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, ggml_type_name(dst->type),
180
- ggml_type_name(src0->type), ggml_type_name(src1->type));
181
- GGML_ABORT("fatal error");
182
- }
183
- }
184
-
185
- inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
186
-
187
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_add>>(ctx, dst->src[0], dst->src[1], dst);
188
- }
189
-
190
- inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
191
-
192
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_sub>>(ctx, dst->src[0], dst->src[1], dst);
193
- }
194
-
195
- inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
196
-
197
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_mul>>(ctx, dst->src[0], dst->src[1], dst);
198
- }
199
-
200
- inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
201
-
202
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_div>>(ctx, dst->src[0], dst->src[1], dst);
203
- }
204
-
205
- inline void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
206
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_repeat>>(ctx, dst, dst->src[0], dst);
207
- }
208
-
209
-
210
- void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
211
- GGML_SYCL_DEBUG("call %s\n", __func__);
212
- ggml_sycl_op_add(ctx, dst);
213
- GGML_SYCL_DEBUG("call %s done\n", __func__);
214
- }
215
-
216
- void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
217
- GGML_SYCL_DEBUG("call %s\n", __func__);
218
- ggml_sycl_op_sub(ctx, dst);
219
- GGML_SYCL_DEBUG("call %s done\n", __func__);
220
- }
221
-
222
- void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
223
- GGML_SYCL_DEBUG("call %s\n", __func__);
224
- ggml_sycl_op_mul(ctx, dst);
225
- GGML_SYCL_DEBUG("call %s done\n", __func__);
226
- }
227
-
228
- void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
229
- GGML_SYCL_DEBUG("call %s\n", __func__);
230
- ggml_sycl_op_div(ctx, dst);
231
- GGML_SYCL_DEBUG("call %s done\n", __func__);
232
- }
233
-
234
- void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
235
- GGML_SYCL_DEBUG("call %s\n", __func__);
236
- ggml_sycl_op_repeat(ctx, dst);
237
- GGML_SYCL_DEBUG("call %s done\n", __func__);
238
- }
239
-
@@ -1,39 +0,0 @@
1
- #ifndef GGML_SYCL_BINBCAST_HPP
2
- #define GGML_SYCL_BINBCAST_HPP
3
- #include "common.hpp"
4
-
5
-
6
- static __dpct_inline__ float op_repeat(const float a, const float b) {
7
- return b;
8
- GGML_UNUSED(a);
9
- }
10
-
11
- static __dpct_inline__ float op_add(const float a, const float b) {
12
- return a + b;
13
- }
14
-
15
- static __dpct_inline__ float op_sub(const float a, const float b) {
16
- return a - b;
17
- }
18
-
19
- static __dpct_inline__ float op_mul(const float a, const float b) {
20
- return a * b;
21
- }
22
-
23
- static __dpct_inline__ float op_div(const float a, const float b) {
24
- return a / b;
25
- }
26
-
27
- void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
28
-
29
- void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
30
-
31
- void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
32
-
33
- void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
34
-
35
- void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
36
-
37
-
38
- #endif //GGML_SYCL_BINBCAST_HPP
39
-
@@ -1,83 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #include "common.hpp"
14
-
15
- #include "ggml-backend-impl.h"
16
- #include "ggml-impl.h"
17
-
18
- int get_current_device_id() {
19
- return dpct::dev_mgr::instance().current_device_id();
20
- }
21
-
22
- void* ggml_sycl_host_malloc(size_t size) try {
23
- if (getenv("GGML_SYCL_NO_PINNED") != nullptr) {
24
- return nullptr;
25
- }
26
-
27
- void* ptr = nullptr;
28
- // allow to use dpct::get_in_order_queue() for host malloc
29
- dpct::err0 err = CHECK_TRY_ERROR(
30
- ptr = (void*)sycl::malloc_host(size, dpct::get_in_order_queue()));
31
-
32
- if (err != 0) {
33
- // clear the error
34
- GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0, "syclGetErrorString is not supported");
35
- return nullptr;
36
- }
37
-
38
- return ptr;
39
- } catch (sycl::exception const& exc) {
40
- std::cerr << exc.what() << "Exception caught at file:" << __FILE__
41
- << ", line:" << __LINE__ << std::endl;
42
- std::exit(1);
43
- }
44
-
45
- void ggml_sycl_host_free(void* ptr) try {
46
- // allow to use dpct::get_in_order_queue() for host malloc
47
- SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(ptr, dpct::get_in_order_queue())));
48
- } catch (sycl::exception const& exc) {
49
- std::cerr << exc.what() << "Exception caught at file:" << __FILE__
50
- << ", line:" << __LINE__ << std::endl;
51
- std::exit(1);
52
- }
53
-
54
- bool gpu_has_xmx(sycl::device &dev) {
55
- return dev.has(sycl::aspect::ext_intel_matrix);
56
- }
57
-
58
- int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
59
- const int64_t max_range = std::numeric_limits<int>::max();
60
- int64_t sycl_down_blk_size = block_size;
61
- int64_t global_range = accumulate_block_num * sycl_down_blk_size;
62
- while(global_range > max_range) {
63
- sycl_down_blk_size /= 2;
64
- global_range = accumulate_block_num * sycl_down_blk_size;
65
- }
66
- return sycl_down_blk_size;
67
- }
68
-
69
- void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams) {
70
- for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
71
- for (int64_t is = 0; is < GGML_SYCL_MAX_STREAMS; ++is) {
72
- if (extra->events[i][is] != nullptr) {
73
- SYCL_CHECK(CHECK_TRY_ERROR(dpct::destroy_event(extra->events[i][is])));
74
- }
75
- }
76
- if (extra->data_device[i] != nullptr && streams.size()>0) {
77
- ggml_sycl_set_device(i);
78
- SYCL_CHECK(
79
- CHECK_TRY_ERROR(sycl::free(extra->data_device[i], *(streams[i]))));
80
- }
81
- }
82
- delete extra;
83
- }