@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,493 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_COMMON_HPP
14
- #define GGML_SYCL_COMMON_HPP
15
-
16
- #include <fstream>
17
- #include <iostream>
18
-
19
- #include "dpct/helper.hpp"
20
- #include "ggml-sycl.h"
21
- #include "presets.hpp"
22
- #include "sycl_hw.hpp"
23
-
24
-
25
- #if GGML_SYCL_DNNL
26
- #include "dnnl.hpp"
27
- #include "dnnl_sycl.hpp"
28
- #endif
29
-
30
- #define GGML_COMMON_DECL_SYCL
31
- #define GGML_COMMON_IMPL_SYCL
32
- /* suppress warning spam */
33
- #pragma clang diagnostic push
34
- #pragma clang diagnostic ignored "-Wnested-anon-types"
35
- #include "ggml-common.h"
36
- #pragma clang diagnostic pop
37
- #include "ggml-impl.h"
38
-
39
- void* ggml_sycl_host_malloc(size_t size);
40
- void ggml_sycl_host_free(void* ptr);
41
-
42
-
43
- extern int g_ggml_sycl_debug;
44
- extern int g_ggml_sycl_disable_optimize;
45
- extern int g_ggml_sycl_prioritize_dmmv;
46
-
47
- #define GGML_SYCL_DEBUG(...) \
48
- do { \
49
- if (g_ggml_sycl_debug) \
50
- fprintf(stderr, __VA_ARGS__); \
51
- } while (0)
52
-
53
- #define CHECK_TRY_ERROR(expr) \
54
- [&]() { \
55
- try { \
56
- expr; \
57
- return dpct::success; \
58
- } catch (std::exception const& e) { \
59
- std::cerr << e.what() << "\nException caught at file:" << __FILE__ \
60
- << ", line:" << __LINE__ << ", func:" << __func__ \
61
- << std::endl; \
62
- return dpct::default_error; \
63
- } \
64
- }()
65
-
66
-
67
- #define __SYCL_ARCH__ DPCT_COMPATIBILITY_TEMP
68
- #define VER_4VEC 610 // todo for hardward optimize.
69
- #define VER_GEN9 700 // todo for hardward optimize.
70
- #define VER_GEN12 1000000 // todo for hardward optimize.
71
- #define VER_GEN13 (VER_GEN12 + 1030) // todo for hardward optimize.
72
-
73
- #define GGML_SYCL_MAX_NODES 8192 // TODO: adapt to hardwares
74
-
75
- // define for XMX in Intel GPU
76
- // TODO: currently, it's not used for XMX really.
77
- #if !defined(GGML_SYCL_FORCE_MMQ)
78
- #define SYCL_USE_XMX
79
- #endif
80
-
81
- // max batch size to use MMQ kernels when tensor cores are available
82
- #define MMQ_MAX_BATCH_SIZE 32
83
-
84
- // dmmv = dequantize_mul_mat_vec
85
- #ifndef GGML_SYCL_DMMV_X
86
- #define GGML_SYCL_DMMV_X 32
87
- #endif
88
- #ifndef GGML_SYCL_MMV_Y
89
- #define GGML_SYCL_MMV_Y 1
90
- #endif
91
-
92
- typedef sycl::queue *queue_ptr;
93
-
94
- enum ggml_sycl_backend_gpu_mode {
95
- SYCL_UNSET_GPU_MODE = -1,
96
- SYCL_SINGLE_GPU_MODE = 0,
97
- SYCL_MUL_GPU_MODE
98
- };
99
-
100
- static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
101
-
102
- static void crash() {
103
- int* ptr = NULL;
104
- *ptr = 0;
105
- }
106
-
107
- [[noreturn]] static void ggml_sycl_error(
108
- const char* stmt,
109
- const char* func,
110
- const char* file,
111
- const int line,
112
- const char* msg) {
113
- fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg);
114
- fprintf(stderr, " in function %s at %s:%d\n", func, file, line);
115
- GGML_ABORT("SYCL error");
116
- }
117
-
118
- #define SYCL_CHECK(err) \
119
- do { \
120
- auto err_ = (err); \
121
- if (err_ != 0) \
122
- ggml_sycl_error(#err, __func__, __FILE__, __LINE__, "Exception caught in this line of code."); \
123
- } while (0)
124
-
125
- #if DPCT_COMPAT_RT_VERSION >= 11100
126
- #define GGML_SYCL_ASSUME(x) __builtin_assume(x)
127
- #else
128
- #define GGML_SYCL_ASSUME(x)
129
- #endif // DPCT_COMPAT_RT_VERSION >= 11100
130
-
131
- #ifdef GGML_SYCL_F16
132
- typedef sycl::half dfloat; // dequantize float
133
- typedef sycl::half2 dfloat2;
134
- #else
135
- typedef float dfloat; // dequantize float
136
- typedef sycl::float2 dfloat2;
137
- #endif // GGML_SYCL_F16
138
-
139
- #define MMVQ_MAX_BATCH_SIZE 8
140
-
141
- static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
142
-
143
- static int g_all_sycl_device_count = -1;
144
- static bool g_ggml_backend_sycl_buffer_type_initialized = false;
145
-
146
- static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
147
- SYCL_UNSET_GPU_MODE;
148
-
149
- static void* g_scratch_buffer = nullptr;
150
- static size_t g_scratch_size = 0; // disabled by default
151
- static size_t g_scratch_offset = 0;
152
-
153
- [[noreturn]] static inline void bad_arch(const sycl::stream& stream_ct1) {
154
- stream_ct1 << "ERROR: ggml-sycl was compiled without support for the "
155
- "current GPU architecture.\n";
156
- // __trap();
157
- std::exit(1);
158
-
159
- (void)bad_arch; // suppress unused function warning
160
- }
161
-
162
- int get_current_device_id();
163
-
164
- inline dpct::err0 ggml_sycl_set_device(const int device) try {
165
- int current_device_id;
166
- SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id()));
167
-
168
- // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
169
- // current_device_id=%d\n", device, current_device);
170
- if (device == current_device_id) {
171
- return 0;
172
- }
173
-
174
- return CHECK_TRY_ERROR(dpct::select_device(device));
175
- } catch (sycl::exception const& exc) {
176
- std::cerr << exc.what() << "Exception caught at file:" << __FILE__
177
- << ", line:" << __LINE__ << std::endl;
178
- crash();
179
- std::exit(1);
180
- }
181
-
182
- //////////////////////
183
- struct optimize_feature {
184
- bool reorder=false;
185
- };
186
-
187
- struct sycl_device_info {
188
- int cc; // compute capability
189
- // int nsm; // number of streaming multiprocessors
190
- // size_t smpb; // max. shared memory per block
191
- bool vmm; // virtual memory support
192
- size_t total_vram;
193
- sycl_hw_info hw_info;
194
- optimize_feature opt_feature;
195
- };
196
-
197
-
198
- struct ggml_sycl_device_info {
199
- int device_count;
200
-
201
- sycl_device_info devices[GGML_SYCL_MAX_DEVICES] = {};
202
-
203
- std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
204
-
205
- int max_work_group_sizes[GGML_SYCL_MAX_DEVICES] = {0};
206
- };
207
-
208
- const ggml_sycl_device_info & ggml_sycl_info();
209
-
210
- struct ggml_sycl_pool {
211
- virtual ~ggml_sycl_pool() = default;
212
-
213
- virtual void * alloc(size_t size, size_t * actual_size) = 0;
214
- virtual void free(void * ptr, size_t size) = 0;
215
- };
216
-
217
- template<typename T>
218
- struct ggml_sycl_pool_alloc {
219
- ggml_sycl_pool * pool = nullptr;
220
- T * ptr = nullptr;
221
- size_t actual_size = 0;
222
-
223
- explicit ggml_sycl_pool_alloc(ggml_sycl_pool & pool) : pool(&pool) {
224
- }
225
-
226
- ggml_sycl_pool_alloc(ggml_sycl_pool & pool, size_t size) : pool(&pool) {
227
- alloc(size);
228
- }
229
-
230
- ~ggml_sycl_pool_alloc() {
231
- if (ptr != nullptr) {
232
- pool->free(ptr, actual_size);
233
- }
234
- }
235
-
236
- T * realloc(size_t size) {
237
- GGML_ASSERT(pool != nullptr);
238
- if (ptr)
239
- pool->free(ptr, actual_size);
240
- ptr = (T *) pool->alloc(size * sizeof(T), &this->actual_size);
241
- return ptr;
242
- }
243
-
244
- // size is in number of elements
245
- T * alloc(size_t size) {
246
- GGML_ASSERT(pool != nullptr);
247
- GGML_ASSERT(ptr == nullptr);
248
- ptr = (T *) pool->alloc(size * sizeof(T), &this->actual_size);
249
- return ptr;
250
- }
251
-
252
- T * alloc(ggml_sycl_pool & pool, size_t size) {
253
- this->pool = &pool;
254
- return alloc(size);
255
- }
256
-
257
- T * get() {
258
- return ptr;
259
- }
260
-
261
- ggml_sycl_pool_alloc() = default;
262
- ggml_sycl_pool_alloc(const ggml_sycl_pool_alloc &) = delete;
263
- ggml_sycl_pool_alloc(ggml_sycl_pool_alloc &&) = delete;
264
- ggml_sycl_pool_alloc& operator=(const ggml_sycl_pool_alloc &) = delete;
265
- ggml_sycl_pool_alloc& operator=(ggml_sycl_pool_alloc &&) = delete;
266
- };
267
-
268
- // backend interface
269
-
270
- struct ggml_tensor_extra_gpu {
271
- void* data_device[GGML_SYCL_MAX_DEVICES]; // 1 pointer for each device for split
272
- // tensors
273
- dpct::event_ptr events[GGML_SYCL_MAX_DEVICES]
274
- [GGML_SYCL_MAX_STREAMS]; // events for synchronizing multiple GPUs
275
- optimize_feature optimized_feature;
276
- };
277
-
278
- void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
279
-
280
- inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
281
- optimize_feature opt;
282
-
283
- opt.reorder =
284
- (arch == syclex::architecture::intel_gpu_dg1 ||
285
- arch == syclex::architecture::intel_gpu_acm_g10 ||
286
- arch == syclex::architecture::intel_gpu_acm_g11 ||
287
- arch == syclex::architecture::intel_gpu_acm_g12 ||
288
- arch == syclex::architecture::intel_gpu_pvc ||
289
- arch == syclex::architecture::intel_gpu_pvc_vg ||
290
- arch == syclex::architecture::intel_gpu_mtl_u ||
291
- arch == syclex::architecture::intel_gpu_mtl_s ||
292
- arch == syclex::architecture::intel_gpu_mtl_h ||
293
- arch == syclex::architecture::intel_gpu_arl_u ||
294
- arch == syclex::architecture::intel_gpu_arl_s ||
295
- arch == syclex::architecture::intel_gpu_arl_h ||
296
- arch == syclex::architecture::intel_gpu_bmg_g21 ||
297
- arch == syclex::architecture::intel_gpu_lnl_m
298
- );
299
-
300
- return opt;
301
- }
302
-
303
- namespace sycl_ex = sycl::ext::oneapi::experimental;
304
- struct ggml_backend_sycl_context {
305
- int device;
306
- std::string name;
307
- optimize_feature opt_feature;
308
-
309
- queue_ptr qptrs[GGML_SYCL_MAX_DEVICES][GGML_SYCL_MAX_STREAMS] = { { nullptr } };
310
-
311
- explicit ggml_backend_sycl_context(int device) :
312
- device(device),
313
- name(GGML_SYCL_NAME + std::to_string(device)) {
314
- opt_feature = ggml_sycl_info().devices[device].opt_feature;
315
- }
316
-
317
- queue_ptr stream(int device, int stream) {
318
- if (qptrs[device][stream] == nullptr) {
319
- qptrs[device][stream] = &(dpct::get_device(device).default_queue());
320
- }
321
- return qptrs[device][stream];
322
- }
323
-
324
- queue_ptr stream() {
325
- return stream(device, 0);
326
- }
327
-
328
- #if GGML_SYCL_DNNL
329
- dnnl::engine make_engine(sycl::queue* q) {
330
- // Get the device associated with the queue
331
- sycl::device dev = q->get_device();
332
- // Get the context associated with the queue
333
- sycl::context ctx = q->get_context();
334
- const dnnl::engine eng = dnnl::sycl_interop::make_engine(dev, ctx);
335
- return eng;
336
- }
337
-
338
- std::unordered_map<sycl::queue*, dnnl::stream> stream_map;
339
- std::unordered_map<sycl::queue*, dnnl::engine> engine_map;
340
- dnnl::stream stream_dnnl(int device, int _stream) {
341
- auto q = stream(device, _stream);
342
- return stream_dnnl(q);
343
- }
344
- dnnl::engine engine_dnnl(sycl::queue* qptr) {
345
- auto it = engine_map.find(qptr);
346
- if (it == engine_map.end()) {
347
- auto eng = make_engine(qptr);
348
- engine_map[qptr] = eng;
349
- return eng;
350
- }
351
- else
352
- {
353
- return it->second;
354
- }
355
- }
356
- dnnl::stream stream_dnnl(sycl::queue* qptr) {
357
- auto it = stream_map.find(qptr);
358
- if (it == stream_map.end()) {
359
- auto eng = engine_dnnl(qptr);
360
- auto stream = dnnl::sycl_interop::make_stream(eng, *qptr);
361
- stream_map[qptr] = stream;
362
- return stream;
363
- }
364
- else
365
- {
366
- return it->second;
367
- }
368
- }
369
- dnnl::stream stream_dnnl() {
370
- return stream_dnnl(device, 0);
371
- }
372
- dnnl::memory get_scratchpad_mem(const dnnl::memory::desc & scratchpad_md,
373
- const dnnl::engine & eng, const queue_ptr q) {
374
- ggml_sycl_pool_alloc<uint8_t> * pool;
375
- auto it = scratchpad_map.find(q);
376
- if (it == scratchpad_map.end()) {
377
- scratchpad_map[q] = std::make_unique<ggml_sycl_pool_alloc<uint8_t>>(this->pool());
378
- pool = scratchpad_map[q].get();
379
- } else {
380
- pool = it->second.get();
381
- }
382
-
383
- size_t scratchpad_size = scratchpad_md.get_size();
384
- if (scratchpad_size > pool->actual_size) {
385
- pool->realloc(scratchpad_size);
386
- }
387
- void * mem_ptr = pool->get();
388
- return dnnl::memory(scratchpad_md, eng, mem_ptr);
389
- }
390
- #endif
391
-
392
- // pool
393
- std::unique_ptr<ggml_sycl_pool> pools[GGML_SYCL_MAX_DEVICES];
394
- std::unordered_map<sycl::queue *, std::unique_ptr<ggml_sycl_pool_alloc<uint8_t>>> scratchpad_map;
395
-
396
- std::unique_ptr<ggml_sycl_pool> host_pools[GGML_SYCL_MAX_DEVICES];
397
-
398
- static std::unique_ptr<ggml_sycl_pool> new_pool_for_device(queue_ptr qptr, int device);
399
-
400
- static std::unique_ptr<ggml_sycl_pool> new_pool_for_host(queue_ptr qptr, int device);
401
-
402
- ggml_sycl_pool & pool(int device) {
403
- if (pools[device] == nullptr) {
404
- pools[device] = new_pool_for_device(stream(device,0), device);
405
- }
406
- return *pools[device];
407
- }
408
-
409
- ggml_sycl_pool & pool() {
410
- return pool(device);
411
- }
412
-
413
- #ifdef GGML_SYCL_GRAPH
414
- std::unique_ptr<sycl_ex::command_graph<sycl_ex::graph_state::executable>> exec_graph = nullptr;
415
- #endif
416
-
417
- ggml_sycl_pool & host_pool(int device) {
418
- if (host_pools[device] == nullptr) {
419
- host_pools[device] = new_pool_for_host(stream(device, 0), device);
420
- }
421
- return *host_pools[device];
422
- }
423
-
424
- ggml_sycl_pool & host_pool() { return host_pool(device); }
425
- };
426
-
427
- // common device functions
428
-
429
- static __dpct_inline__ float warp_reduce_sum(float x,
430
- const sycl::nd_item<3>& item_ct1) {
431
- #pragma unroll
432
- for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
433
- /*
434
- DPCT1096:98: The right-most dimension of the work-group used in the SYCL
435
- kernel that calls this function may be less than "32". The function
436
- "dpct::permute_sub_group_by_xor" may return an unexpected result on the
437
- CPU device. Modify the size of the work-group to ensure that the value
438
- of the right-most dimension is a multiple of "32".
439
- */
440
- x += dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), x, mask);
441
- }
442
- return x;
443
- }
444
-
445
- static __dpct_inline__ sycl::float2
446
- warp_reduce_sum(sycl::float2 a, const sycl::nd_item<3>& item_ct1) {
447
- #pragma unroll
448
- for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
449
- a.x() += dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), a.x(),
450
- mask);
451
- a.y() += dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), a.y(),
452
- mask);
453
- }
454
- return a;
455
- }
456
-
457
- static __dpct_inline__ float warp_reduce_max(float x,
458
- const sycl::nd_item<3>& item_ct1) {
459
- #pragma unroll
460
- for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
461
- /*
462
- DPCT1096:97: The right-most dimension of the work-group used in the SYCL
463
- kernel that calls this function may be less than "32". The function
464
- "dpct::permute_sub_group_by_xor" may return an unexpected result on the
465
- CPU device. Modify the size of the work-group to ensure that the value
466
- of the right-most dimension is a multiple of "32".
467
- */
468
- x = sycl::fmax(x, dpct::permute_sub_group_by_xor(
469
- item_ct1.get_sub_group(), x, mask));
470
- }
471
- return x;
472
- }
473
-
474
- // Helper for vec loading aligned data
475
- template <typename Tp, int n>
476
- inline sycl::vec<Tp, n> vec_aligned_load(const Tp* aligned_ptr) {
477
- return *reinterpret_cast<const sycl::vec<Tp, n>*>(aligned_ptr);
478
- }
479
-
480
- // Helper for accessing pointers with no warnings
481
- template <typename Tp, int dim>
482
- static __dpct_inline__ Tp* get_pointer(sycl::local_accessor<Tp, dim> acc) {
483
- return acc.template get_multi_ptr<sycl::access::decorated::no>().get();
484
- }
485
-
486
- int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size);
487
-
488
- constexpr size_t ceil_div(const size_t m, const size_t n) {
489
- return (m + n - 1) / n;
490
- }
491
-
492
- bool gpu_has_xmx(sycl::device &dev);
493
- #endif // GGML_SYCL_COMMON_HPP
@@ -1,197 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #include "concat.hpp"
14
- #include "common.hpp"
15
-
16
- static void concat_f32_dim0(const float *x, const float *y, float *dst,
17
- const int ne0, const int ne00,
18
- const sycl::nd_item<3> &item_ct1) {
19
- int nidx = item_ct1.get_local_id(2) +
20
- item_ct1.get_group(2) * item_ct1.get_local_range(2);
21
- if (nidx >= ne0) {
22
- return;
23
- }
24
- // operation
25
- int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
26
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
27
- if (nidx < ne00) { // src0
28
- int offset_src = nidx + item_ct1.get_group(1) * ne00 +
29
- item_ct1.get_group(0) * ne00 * item_ct1.get_group_range(1);
30
- dst[offset_dst] = x[offset_src];
31
- } else {
32
- int offset_src =
33
- nidx - ne00 + item_ct1.get_group(1) * (ne0 - ne00) +
34
- item_ct1.get_group(0) * (ne0 - ne00) * item_ct1.get_group_range(1);
35
- dst[offset_dst] = y[offset_src];
36
- }
37
- }
38
-
39
- static void concat_f32_dim1(const float *x, const float *y, float *dst,
40
- const int ne0, const int ne01,
41
- const sycl::nd_item<3> &item_ct1) {
42
- int nidx = item_ct1.get_local_id(2) +
43
- item_ct1.get_group(2) * item_ct1.get_local_range(2);
44
- if (nidx >= ne0) {
45
- return;
46
- }
47
- // operation
48
- int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
49
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
50
- if (item_ct1.get_group(1) < (size_t) ne01) { // src0
51
- int offset_src =
52
- nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
53
- dst[offset_dst] = x[offset_src];
54
- } else {
55
- int offset_src =
56
- nidx + (item_ct1.get_group(1) - ne01) * ne0 +
57
- item_ct1.get_group(0) * ne0 * (item_ct1.get_group_range(1) - ne01);
58
- dst[offset_dst] = y[offset_src];
59
- }
60
- }
61
-
62
- static void concat_f32_dim2(const float *x, const float *y, float *dst,
63
- const int ne0, const int ne02,
64
- const sycl::nd_item<3> &item_ct1) {
65
- int nidx = item_ct1.get_local_id(2) +
66
- item_ct1.get_group(2) * item_ct1.get_local_range(2);
67
- if (nidx >= ne0) {
68
- return;
69
- }
70
- // operation
71
- int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
72
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
73
- if (item_ct1.get_group(0) < (size_t) ne02) { // src0
74
- int offset_src = nidx + item_ct1.get_group(1) * ne0 +
75
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
76
- dst[offset_dst] = x[offset_src];
77
- } else {
78
- int offset_src =
79
- nidx + item_ct1.get_group(1) * ne0 +
80
- (item_ct1.get_group(0) - ne02) * ne0 * item_ct1.get_group_range(1);
81
- dst[offset_dst] = y[offset_src];
82
- }
83
- }
84
-
85
- static void concat_f32_sycl(const float *x, const float *y, float *dst,
86
- int ne00, int ne01, int ne02, int ne0, int ne1,
87
- int ne2, int dim, queue_ptr stream) {
88
- int num_blocks = (ne0 + SYCL_CONCAT_BLOCK_SIZE - 1) / SYCL_CONCAT_BLOCK_SIZE;
89
- sycl::range<3> gridDim(ne2, ne1, num_blocks);
90
- switch (dim) {
91
- case 0:
92
- stream->parallel_for(
93
- sycl::nd_range<3>(gridDim *
94
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
95
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
96
- [=](sycl::nd_item<3> item_ct1) {
97
- concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1);
98
- });
99
- break;
100
- case 1:
101
- stream->parallel_for(
102
- sycl::nd_range<3>(gridDim *
103
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
104
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
105
- [=](sycl::nd_item<3> item_ct1) {
106
- concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1);
107
- });
108
- break;
109
- // dim >=2 will be dispatched to the default path
110
- default:
111
- stream->parallel_for(
112
- sycl::nd_range<3>(gridDim *
113
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
114
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
115
- [=](sycl::nd_item<3> item_ct1) {
116
- concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1);
117
- });
118
- break;
119
- }
120
- }
121
-
122
- // non-contiguous kernel (slow)
123
- static void concat_f32_sycl_non_cont(
124
- queue_ptr stream, const char *src0, const char *src1, char *dst,
125
- int64_t ne00, int64_t ne01, int64_t ne02, int64_t ne03, uint64_t nb00,
126
- uint64_t nb01, uint64_t nb02, uint64_t nb03, int64_t /*ne10*/,
127
- int64_t /*ne11*/, int64_t /*ne12*/, int64_t /*ne13*/, uint64_t nb10,
128
- uint64_t nb11, uint64_t nb12, uint64_t nb13, int64_t ne0, int64_t ne1,
129
- int64_t ne2, int64_t ne3, uint64_t nb0, uint64_t nb1, uint64_t nb2,
130
- uint64_t nb3, int32_t dim) {
131
- sycl::range<3> gridDim(ne3, ne2, ne1);
132
- stream->parallel_for(
133
- sycl::nd_range<3>(gridDim, sycl::range<3>(1, 1, 1)),
134
- [=](sycl::nd_item<3> item_ct1) {
135
- int64_t i3 = item_ct1.get_group(0);
136
- int64_t i2 = item_ct1.get_group(1);
137
- int64_t i1 = item_ct1.get_group(2);
138
-
139
- int64_t o[4] = {0, 0, 0, 0};
140
- o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03));
141
-
142
- const float *x;
143
-
144
- for (int i0 = item_ct1.get_local_id(2); i0 < ne0;
145
- i0 += item_ct1.get_local_range(2)) {
146
- if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
147
- x = (const float *)(src0 + (i3)*nb03 + (i2)*nb02 + (i1)*nb01 +
148
- (i0)*nb00);
149
- } else {
150
- x = (const float *)(src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 +
151
- (i1 - o[1]) * nb11 + (i0 - o[0]) * nb10);
152
- }
153
-
154
- float *y = (float *)(dst + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0);
155
-
156
- *y = *x;
157
- }
158
- });
159
- }
160
-
161
- void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
162
- const ggml_tensor *src0 = dst->src[0];
163
- const ggml_tensor *src1 = dst->src[1];
164
- queue_ptr stream = ctx.stream();
165
-
166
- const int32_t dim = ((int32_t *)dst->op_params)[0];
167
-
168
- if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
169
- const float *src0_d = (const float *)src0->data;
170
- const float *src1_d = (const float *)src1->data;
171
-
172
- float *dst_d = (float *)dst->data;
173
-
174
- if (dim != 3) {
175
- for (int i3 = 0; i3 < dst->ne[3]; i3++) {
176
- concat_f32_sycl(
177
- src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
178
- dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1],
179
- src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream);
180
- }
181
- } else {
182
- const size_t size0 = ggml_nbytes(src0);
183
- const size_t size1 = ggml_nbytes(src1);
184
-
185
- SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
186
- SYCL_CHECK(CHECK_TRY_ERROR(
187
- stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
188
- }
189
- } else
190
- concat_f32_sycl_non_cont(
191
- stream, (const char *)src0->data, (const char *)src1->data,
192
- (char *)dst->data, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
193
- src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], src1->ne[0],
194
- src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1],
195
- src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
196
- dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
197
- }
@@ -1,20 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_CONCAT_HPP
14
- #define GGML_SYCL_CONCAT_HPP
15
-
16
- #include "common.hpp"
17
-
18
- void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
19
-
20
- #endif // GGML_SYCL_CONCAT_HPP