@fugood/llama.node 0.6.2 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -10
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/patches/node-api-headers+1.1.0.patch +0 -26
  27. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  28. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  29. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  30. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  31. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  32. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  33. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  37. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  38. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  39. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  40. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  41. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  42. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  43. package/src/llama.cpp/cmake/common.cmake +0 -35
  44. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  45. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  46. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  47. package/src/llama.cpp/docs/build.md +0 -561
  48. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  49. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  50. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  51. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  53. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  54. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  55. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  56. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  57. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  58. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  59. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  61. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  62. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  63. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  64. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  68. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  70. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  71. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  72. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  73. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  74. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  75. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  76. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  77. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  79. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  80. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  81. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  82. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  83. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  84. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  85. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  86. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  87. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  88. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  89. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  90. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  91. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  92. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  93. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  94. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  95. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  96. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  97. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  98. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  99. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  100. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  101. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  102. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  103. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  104. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  105. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  106. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  107. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  108. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  109. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  110. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  111. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  112. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  113. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  114. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  115. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  116. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  117. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  118. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  119. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  120. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  121. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  122. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  123. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  125. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  127. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  128. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  129. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  130. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  131. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  132. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  135. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  136. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  137. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  138. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  139. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  140. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  141. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  142. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  143. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  144. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  145. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  146. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  147. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  148. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  149. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  150. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  151. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  153. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  155. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  157. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  159. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  161. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  163. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  167. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  169. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  170. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  172. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  173. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  175. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  177. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  181. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  183. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  185. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  186. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  187. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  189. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  191. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  193. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  195. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  196. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  198. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  199. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  200. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  204. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  205. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  206. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  207. package/src/llama.cpp/models/.editorconfig +0 -1
  208. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  212. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  214. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  217. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  228. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  231. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  233. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  234. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  242. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  250. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  253. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  256. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  258. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  261. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  262. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  263. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  264. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  265. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  266. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  267. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  268. package/src/llama.cpp/prompts/assistant.txt +0 -31
  269. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  270. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  271. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  273. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  274. package/src/llama.cpp/prompts/chat.txt +0 -28
  275. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  276. package/src/llama.cpp/prompts/dan.txt +0 -1
  277. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  278. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  279. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  280. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  281. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  284. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  285. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  286. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  287. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  289. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  290. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  291. package/src/llama.cpp/requirements.txt +0 -13
  292. package/src/llama.cpp/scripts/build-info.sh +0 -30
  293. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  294. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  295. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  296. package/src/llama.cpp/tests/get-model.cpp +0 -21
  297. package/src/llama.cpp/tests/get-model.h +0 -2
  298. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  299. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  300. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  301. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  302. package/src/llama.cpp/tests/test-c.c +0 -7
  303. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  304. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  305. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  306. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  307. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  308. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  309. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  310. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  311. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  312. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  313. package/src/llama.cpp/tests/test-log.cpp +0 -39
  314. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  315. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  316. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  317. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  318. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  319. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  320. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  321. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  322. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  323. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  324. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  325. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  326. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  327. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  328. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  329. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  330. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  331. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  332. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  333. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  334. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  335. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  336. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  337. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  338. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  339. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  340. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  341. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  342. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  343. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  344. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  345. package/src/llama.cpp/tools/main/main.cpp +0 -977
  346. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  347. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  348. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  349. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  350. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  351. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  353. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  354. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  355. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  356. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  357. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  358. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  360. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  361. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  362. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  363. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  364. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  365. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  367. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  368. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  369. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  370. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  371. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  372. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  373. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  374. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  375. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  376. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  377. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  378. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,493 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_COMMON_HPP
14
- #define GGML_SYCL_COMMON_HPP
15
-
16
- #include <fstream>
17
- #include <iostream>
18
-
19
- #include "dpct/helper.hpp"
20
- #include "ggml-sycl.h"
21
- #include "presets.hpp"
22
- #include "sycl_hw.hpp"
23
-
24
-
25
- #if GGML_SYCL_DNNL
26
- #include "dnnl.hpp"
27
- #include "dnnl_sycl.hpp"
28
- #endif
29
-
30
- #define GGML_COMMON_DECL_SYCL
31
- #define GGML_COMMON_IMPL_SYCL
32
- /* suppress warning spam */
33
- #pragma clang diagnostic push
34
- #pragma clang diagnostic ignored "-Wnested-anon-types"
35
- #include "ggml-common.h"
36
- #pragma clang diagnostic pop
37
- #include "ggml-impl.h"
38
-
39
- void* ggml_sycl_host_malloc(size_t size);
40
- void ggml_sycl_host_free(void* ptr);
41
-
42
-
43
- extern int g_ggml_sycl_debug;
44
- extern int g_ggml_sycl_disable_optimize;
45
- extern int g_ggml_sycl_prioritize_dmmv;
46
-
47
- #define GGML_SYCL_DEBUG(...) \
48
- do { \
49
- if (g_ggml_sycl_debug) \
50
- fprintf(stderr, __VA_ARGS__); \
51
- } while (0)
52
-
53
- #define CHECK_TRY_ERROR(expr) \
54
- [&]() { \
55
- try { \
56
- expr; \
57
- return dpct::success; \
58
- } catch (std::exception const& e) { \
59
- std::cerr << e.what() << "\nException caught at file:" << __FILE__ \
60
- << ", line:" << __LINE__ << ", func:" << __func__ \
61
- << std::endl; \
62
- return dpct::default_error; \
63
- } \
64
- }()
65
-
66
-
67
- #define __SYCL_ARCH__ DPCT_COMPATIBILITY_TEMP
68
- #define VER_4VEC 610 // todo for hardward optimize.
69
- #define VER_GEN9 700 // todo for hardward optimize.
70
- #define VER_GEN12 1000000 // todo for hardward optimize.
71
- #define VER_GEN13 (VER_GEN12 + 1030) // todo for hardward optimize.
72
-
73
- #define GGML_SYCL_MAX_NODES 8192 // TODO: adapt to hardwares
74
-
75
- // define for XMX in Intel GPU
76
- // TODO: currently, it's not used for XMX really.
77
- #if !defined(GGML_SYCL_FORCE_MMQ)
78
- #define SYCL_USE_XMX
79
- #endif
80
-
81
- // max batch size to use MMQ kernels when tensor cores are available
82
- #define MMQ_MAX_BATCH_SIZE 32
83
-
84
- // dmmv = dequantize_mul_mat_vec
85
- #ifndef GGML_SYCL_DMMV_X
86
- #define GGML_SYCL_DMMV_X 32
87
- #endif
88
- #ifndef GGML_SYCL_MMV_Y
89
- #define GGML_SYCL_MMV_Y 1
90
- #endif
91
-
92
- typedef sycl::queue *queue_ptr;
93
-
94
- enum ggml_sycl_backend_gpu_mode {
95
- SYCL_UNSET_GPU_MODE = -1,
96
- SYCL_SINGLE_GPU_MODE = 0,
97
- SYCL_MUL_GPU_MODE
98
- };
99
-
100
- static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
101
-
102
- static void crash() {
103
- int* ptr = NULL;
104
- *ptr = 0;
105
- }
106
-
107
- [[noreturn]] static void ggml_sycl_error(
108
- const char* stmt,
109
- const char* func,
110
- const char* file,
111
- const int line,
112
- const char* msg) {
113
- fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg);
114
- fprintf(stderr, " in function %s at %s:%d\n", func, file, line);
115
- GGML_ABORT("SYCL error");
116
- }
117
-
118
- #define SYCL_CHECK(err) \
119
- do { \
120
- auto err_ = (err); \
121
- if (err_ != 0) \
122
- ggml_sycl_error(#err, __func__, __FILE__, __LINE__, "Exception caught in this line of code."); \
123
- } while (0)
124
-
125
- #if DPCT_COMPAT_RT_VERSION >= 11100
126
- #define GGML_SYCL_ASSUME(x) __builtin_assume(x)
127
- #else
128
- #define GGML_SYCL_ASSUME(x)
129
- #endif // DPCT_COMPAT_RT_VERSION >= 11100
130
-
131
- #ifdef GGML_SYCL_F16
132
- typedef sycl::half dfloat; // dequantize float
133
- typedef sycl::half2 dfloat2;
134
- #else
135
- typedef float dfloat; // dequantize float
136
- typedef sycl::float2 dfloat2;
137
- #endif // GGML_SYCL_F16
138
-
139
- #define MMVQ_MAX_BATCH_SIZE 8
140
-
141
- static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
142
-
143
- static int g_all_sycl_device_count = -1;
144
- static bool g_ggml_backend_sycl_buffer_type_initialized = false;
145
-
146
- static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
147
- SYCL_UNSET_GPU_MODE;
148
-
149
- static void* g_scratch_buffer = nullptr;
150
- static size_t g_scratch_size = 0; // disabled by default
151
- static size_t g_scratch_offset = 0;
152
-
153
- [[noreturn]] static inline void bad_arch(const sycl::stream& stream_ct1) {
154
- stream_ct1 << "ERROR: ggml-sycl was compiled without support for the "
155
- "current GPU architecture.\n";
156
- // __trap();
157
- std::exit(1);
158
-
159
- (void)bad_arch; // suppress unused function warning
160
- }
161
-
162
- int get_current_device_id();
163
-
164
- inline dpct::err0 ggml_sycl_set_device(const int device) try {
165
- int current_device_id;
166
- SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id()));
167
-
168
- // GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
169
- // current_device_id=%d\n", device, current_device);
170
- if (device == current_device_id) {
171
- return 0;
172
- }
173
-
174
- return CHECK_TRY_ERROR(dpct::select_device(device));
175
- } catch (sycl::exception const& exc) {
176
- std::cerr << exc.what() << "Exception caught at file:" << __FILE__
177
- << ", line:" << __LINE__ << std::endl;
178
- crash();
179
- std::exit(1);
180
- }
181
-
182
- //////////////////////
183
- struct optimize_feature {
184
- bool reorder=false;
185
- };
186
-
187
- struct sycl_device_info {
188
- int cc; // compute capability
189
- // int nsm; // number of streaming multiprocessors
190
- // size_t smpb; // max. shared memory per block
191
- bool vmm; // virtual memory support
192
- size_t total_vram;
193
- sycl_hw_info hw_info;
194
- optimize_feature opt_feature;
195
- };
196
-
197
-
198
- struct ggml_sycl_device_info {
199
- int device_count;
200
-
201
- sycl_device_info devices[GGML_SYCL_MAX_DEVICES] = {};
202
-
203
- std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
204
-
205
- int max_work_group_sizes[GGML_SYCL_MAX_DEVICES] = {0};
206
- };
207
-
208
- const ggml_sycl_device_info & ggml_sycl_info();
209
-
210
- struct ggml_sycl_pool {
211
- virtual ~ggml_sycl_pool() = default;
212
-
213
- virtual void * alloc(size_t size, size_t * actual_size) = 0;
214
- virtual void free(void * ptr, size_t size) = 0;
215
- };
216
-
217
- template<typename T>
218
- struct ggml_sycl_pool_alloc {
219
- ggml_sycl_pool * pool = nullptr;
220
- T * ptr = nullptr;
221
- size_t actual_size = 0;
222
-
223
- explicit ggml_sycl_pool_alloc(ggml_sycl_pool & pool) : pool(&pool) {
224
- }
225
-
226
- ggml_sycl_pool_alloc(ggml_sycl_pool & pool, size_t size) : pool(&pool) {
227
- alloc(size);
228
- }
229
-
230
- ~ggml_sycl_pool_alloc() {
231
- if (ptr != nullptr) {
232
- pool->free(ptr, actual_size);
233
- }
234
- }
235
-
236
- T * realloc(size_t size) {
237
- GGML_ASSERT(pool != nullptr);
238
- if (ptr)
239
- pool->free(ptr, actual_size);
240
- ptr = (T *) pool->alloc(size * sizeof(T), &this->actual_size);
241
- return ptr;
242
- }
243
-
244
- // size is in number of elements
245
- T * alloc(size_t size) {
246
- GGML_ASSERT(pool != nullptr);
247
- GGML_ASSERT(ptr == nullptr);
248
- ptr = (T *) pool->alloc(size * sizeof(T), &this->actual_size);
249
- return ptr;
250
- }
251
-
252
- T * alloc(ggml_sycl_pool & pool, size_t size) {
253
- this->pool = &pool;
254
- return alloc(size);
255
- }
256
-
257
- T * get() {
258
- return ptr;
259
- }
260
-
261
- ggml_sycl_pool_alloc() = default;
262
- ggml_sycl_pool_alloc(const ggml_sycl_pool_alloc &) = delete;
263
- ggml_sycl_pool_alloc(ggml_sycl_pool_alloc &&) = delete;
264
- ggml_sycl_pool_alloc& operator=(const ggml_sycl_pool_alloc &) = delete;
265
- ggml_sycl_pool_alloc& operator=(ggml_sycl_pool_alloc &&) = delete;
266
- };
267
-
268
- // backend interface
269
-
270
- struct ggml_tensor_extra_gpu {
271
- void* data_device[GGML_SYCL_MAX_DEVICES]; // 1 pointer for each device for split
272
- // tensors
273
- dpct::event_ptr events[GGML_SYCL_MAX_DEVICES]
274
- [GGML_SYCL_MAX_STREAMS]; // events for synchronizing multiple GPUs
275
- optimize_feature optimized_feature;
276
- };
277
-
278
- void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
279
-
280
- inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
281
- optimize_feature opt;
282
-
283
- opt.reorder =
284
- (arch == syclex::architecture::intel_gpu_dg1 ||
285
- arch == syclex::architecture::intel_gpu_acm_g10 ||
286
- arch == syclex::architecture::intel_gpu_acm_g11 ||
287
- arch == syclex::architecture::intel_gpu_acm_g12 ||
288
- arch == syclex::architecture::intel_gpu_pvc ||
289
- arch == syclex::architecture::intel_gpu_pvc_vg ||
290
- arch == syclex::architecture::intel_gpu_mtl_u ||
291
- arch == syclex::architecture::intel_gpu_mtl_s ||
292
- arch == syclex::architecture::intel_gpu_mtl_h ||
293
- arch == syclex::architecture::intel_gpu_arl_u ||
294
- arch == syclex::architecture::intel_gpu_arl_s ||
295
- arch == syclex::architecture::intel_gpu_arl_h ||
296
- arch == syclex::architecture::intel_gpu_bmg_g21 ||
297
- arch == syclex::architecture::intel_gpu_lnl_m
298
- );
299
-
300
- return opt;
301
- }
302
-
303
- namespace sycl_ex = sycl::ext::oneapi::experimental;
304
- struct ggml_backend_sycl_context {
305
- int device;
306
- std::string name;
307
- optimize_feature opt_feature;
308
-
309
- queue_ptr qptrs[GGML_SYCL_MAX_DEVICES][GGML_SYCL_MAX_STREAMS] = { { nullptr } };
310
-
311
- explicit ggml_backend_sycl_context(int device) :
312
- device(device),
313
- name(GGML_SYCL_NAME + std::to_string(device)) {
314
- opt_feature = ggml_sycl_info().devices[device].opt_feature;
315
- }
316
-
317
- queue_ptr stream(int device, int stream) {
318
- if (qptrs[device][stream] == nullptr) {
319
- qptrs[device][stream] = &(dpct::get_device(device).default_queue());
320
- }
321
- return qptrs[device][stream];
322
- }
323
-
324
- queue_ptr stream() {
325
- return stream(device, 0);
326
- }
327
-
328
- #if GGML_SYCL_DNNL
329
- dnnl::engine make_engine(sycl::queue* q) {
330
- // Get the device associated with the queue
331
- sycl::device dev = q->get_device();
332
- // Get the context associated with the queue
333
- sycl::context ctx = q->get_context();
334
- const dnnl::engine eng = dnnl::sycl_interop::make_engine(dev, ctx);
335
- return eng;
336
- }
337
-
338
- std::unordered_map<sycl::queue*, dnnl::stream> stream_map;
339
- std::unordered_map<sycl::queue*, dnnl::engine> engine_map;
340
- dnnl::stream stream_dnnl(int device, int _stream) {
341
- auto q = stream(device, _stream);
342
- return stream_dnnl(q);
343
- }
344
- dnnl::engine engine_dnnl(sycl::queue* qptr) {
345
- auto it = engine_map.find(qptr);
346
- if (it == engine_map.end()) {
347
- auto eng = make_engine(qptr);
348
- engine_map[qptr] = eng;
349
- return eng;
350
- }
351
- else
352
- {
353
- return it->second;
354
- }
355
- }
356
- dnnl::stream stream_dnnl(sycl::queue* qptr) {
357
- auto it = stream_map.find(qptr);
358
- if (it == stream_map.end()) {
359
- auto eng = engine_dnnl(qptr);
360
- auto stream = dnnl::sycl_interop::make_stream(eng, *qptr);
361
- stream_map[qptr] = stream;
362
- return stream;
363
- }
364
- else
365
- {
366
- return it->second;
367
- }
368
- }
369
- dnnl::stream stream_dnnl() {
370
- return stream_dnnl(device, 0);
371
- }
372
- dnnl::memory get_scratchpad_mem(const dnnl::memory::desc & scratchpad_md,
373
- const dnnl::engine & eng, const queue_ptr q) {
374
- ggml_sycl_pool_alloc<uint8_t> * pool;
375
- auto it = scratchpad_map.find(q);
376
- if (it == scratchpad_map.end()) {
377
- scratchpad_map[q] = std::make_unique<ggml_sycl_pool_alloc<uint8_t>>(this->pool());
378
- pool = scratchpad_map[q].get();
379
- } else {
380
- pool = it->second.get();
381
- }
382
-
383
- size_t scratchpad_size = scratchpad_md.get_size();
384
- if (scratchpad_size > pool->actual_size) {
385
- pool->realloc(scratchpad_size);
386
- }
387
- void * mem_ptr = pool->get();
388
- return dnnl::memory(scratchpad_md, eng, mem_ptr);
389
- }
390
- #endif
391
-
392
- // pool
393
- std::unique_ptr<ggml_sycl_pool> pools[GGML_SYCL_MAX_DEVICES];
394
- std::unordered_map<sycl::queue *, std::unique_ptr<ggml_sycl_pool_alloc<uint8_t>>> scratchpad_map;
395
-
396
- std::unique_ptr<ggml_sycl_pool> host_pools[GGML_SYCL_MAX_DEVICES];
397
-
398
- static std::unique_ptr<ggml_sycl_pool> new_pool_for_device(queue_ptr qptr, int device);
399
-
400
- static std::unique_ptr<ggml_sycl_pool> new_pool_for_host(queue_ptr qptr, int device);
401
-
402
- ggml_sycl_pool & pool(int device) {
403
- if (pools[device] == nullptr) {
404
- pools[device] = new_pool_for_device(stream(device,0), device);
405
- }
406
- return *pools[device];
407
- }
408
-
409
- ggml_sycl_pool & pool() {
410
- return pool(device);
411
- }
412
-
413
- #ifdef GGML_SYCL_GRAPH
414
- std::unique_ptr<sycl_ex::command_graph<sycl_ex::graph_state::executable>> exec_graph = nullptr;
415
- #endif
416
-
417
- ggml_sycl_pool & host_pool(int device) {
418
- if (host_pools[device] == nullptr) {
419
- host_pools[device] = new_pool_for_host(stream(device, 0), device);
420
- }
421
- return *host_pools[device];
422
- }
423
-
424
- ggml_sycl_pool & host_pool() { return host_pool(device); }
425
- };
426
-
427
- // common device functions
428
-
429
- static __dpct_inline__ float warp_reduce_sum(float x,
430
- const sycl::nd_item<3>& item_ct1) {
431
- #pragma unroll
432
- for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
433
- /*
434
- DPCT1096:98: The right-most dimension of the work-group used in the SYCL
435
- kernel that calls this function may be less than "32". The function
436
- "dpct::permute_sub_group_by_xor" may return an unexpected result on the
437
- CPU device. Modify the size of the work-group to ensure that the value
438
- of the right-most dimension is a multiple of "32".
439
- */
440
- x += dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), x, mask);
441
- }
442
- return x;
443
- }
444
-
445
- static __dpct_inline__ sycl::float2
446
- warp_reduce_sum(sycl::float2 a, const sycl::nd_item<3>& item_ct1) {
447
- #pragma unroll
448
- for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
449
- a.x() += dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), a.x(),
450
- mask);
451
- a.y() += dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), a.y(),
452
- mask);
453
- }
454
- return a;
455
- }
456
-
457
- static __dpct_inline__ float warp_reduce_max(float x,
458
- const sycl::nd_item<3>& item_ct1) {
459
- #pragma unroll
460
- for (int mask = WARP_SIZE / 2; mask > 0; mask >>= 1) {
461
- /*
462
- DPCT1096:97: The right-most dimension of the work-group used in the SYCL
463
- kernel that calls this function may be less than "32". The function
464
- "dpct::permute_sub_group_by_xor" may return an unexpected result on the
465
- CPU device. Modify the size of the work-group to ensure that the value
466
- of the right-most dimension is a multiple of "32".
467
- */
468
- x = sycl::fmax(x, dpct::permute_sub_group_by_xor(
469
- item_ct1.get_sub_group(), x, mask));
470
- }
471
- return x;
472
- }
473
-
474
- // Helper for vec loading aligned data
475
- template <typename Tp, int n>
476
- inline sycl::vec<Tp, n> vec_aligned_load(const Tp* aligned_ptr) {
477
- return *reinterpret_cast<const sycl::vec<Tp, n>*>(aligned_ptr);
478
- }
479
-
480
- // Helper for accessing pointers with no warnings
481
- template <typename Tp, int dim>
482
- static __dpct_inline__ Tp* get_pointer(sycl::local_accessor<Tp, dim> acc) {
483
- return acc.template get_multi_ptr<sycl::access::decorated::no>().get();
484
- }
485
-
486
- int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size);
487
-
488
- constexpr size_t ceil_div(const size_t m, const size_t n) {
489
- return (m + n - 1) / n;
490
- }
491
-
492
- bool gpu_has_xmx(sycl::device &dev);
493
- #endif // GGML_SYCL_COMMON_HPP
@@ -1,197 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #include "concat.hpp"
14
- #include "common.hpp"
15
-
16
- static void concat_f32_dim0(const float *x, const float *y, float *dst,
17
- const int ne0, const int ne00,
18
- const sycl::nd_item<3> &item_ct1) {
19
- int nidx = item_ct1.get_local_id(2) +
20
- item_ct1.get_group(2) * item_ct1.get_local_range(2);
21
- if (nidx >= ne0) {
22
- return;
23
- }
24
- // operation
25
- int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
26
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
27
- if (nidx < ne00) { // src0
28
- int offset_src = nidx + item_ct1.get_group(1) * ne00 +
29
- item_ct1.get_group(0) * ne00 * item_ct1.get_group_range(1);
30
- dst[offset_dst] = x[offset_src];
31
- } else {
32
- int offset_src =
33
- nidx - ne00 + item_ct1.get_group(1) * (ne0 - ne00) +
34
- item_ct1.get_group(0) * (ne0 - ne00) * item_ct1.get_group_range(1);
35
- dst[offset_dst] = y[offset_src];
36
- }
37
- }
38
-
39
- static void concat_f32_dim1(const float *x, const float *y, float *dst,
40
- const int ne0, const int ne01,
41
- const sycl::nd_item<3> &item_ct1) {
42
- int nidx = item_ct1.get_local_id(2) +
43
- item_ct1.get_group(2) * item_ct1.get_local_range(2);
44
- if (nidx >= ne0) {
45
- return;
46
- }
47
- // operation
48
- int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
49
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
50
- if (item_ct1.get_group(1) < (size_t) ne01) { // src0
51
- int offset_src =
52
- nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
53
- dst[offset_dst] = x[offset_src];
54
- } else {
55
- int offset_src =
56
- nidx + (item_ct1.get_group(1) - ne01) * ne0 +
57
- item_ct1.get_group(0) * ne0 * (item_ct1.get_group_range(1) - ne01);
58
- dst[offset_dst] = y[offset_src];
59
- }
60
- }
61
-
62
- static void concat_f32_dim2(const float *x, const float *y, float *dst,
63
- const int ne0, const int ne02,
64
- const sycl::nd_item<3> &item_ct1) {
65
- int nidx = item_ct1.get_local_id(2) +
66
- item_ct1.get_group(2) * item_ct1.get_local_range(2);
67
- if (nidx >= ne0) {
68
- return;
69
- }
70
- // operation
71
- int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
72
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
73
- if (item_ct1.get_group(0) < (size_t) ne02) { // src0
74
- int offset_src = nidx + item_ct1.get_group(1) * ne0 +
75
- item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
76
- dst[offset_dst] = x[offset_src];
77
- } else {
78
- int offset_src =
79
- nidx + item_ct1.get_group(1) * ne0 +
80
- (item_ct1.get_group(0) - ne02) * ne0 * item_ct1.get_group_range(1);
81
- dst[offset_dst] = y[offset_src];
82
- }
83
- }
84
-
85
- static void concat_f32_sycl(const float *x, const float *y, float *dst,
86
- int ne00, int ne01, int ne02, int ne0, int ne1,
87
- int ne2, int dim, queue_ptr stream) {
88
- int num_blocks = (ne0 + SYCL_CONCAT_BLOCK_SIZE - 1) / SYCL_CONCAT_BLOCK_SIZE;
89
- sycl::range<3> gridDim(ne2, ne1, num_blocks);
90
- switch (dim) {
91
- case 0:
92
- stream->parallel_for(
93
- sycl::nd_range<3>(gridDim *
94
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
95
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
96
- [=](sycl::nd_item<3> item_ct1) {
97
- concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1);
98
- });
99
- break;
100
- case 1:
101
- stream->parallel_for(
102
- sycl::nd_range<3>(gridDim *
103
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
104
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
105
- [=](sycl::nd_item<3> item_ct1) {
106
- concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1);
107
- });
108
- break;
109
- // dim >=2 will be dispatched to the default path
110
- default:
111
- stream->parallel_for(
112
- sycl::nd_range<3>(gridDim *
113
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE),
114
- sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)),
115
- [=](sycl::nd_item<3> item_ct1) {
116
- concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1);
117
- });
118
- break;
119
- }
120
- }
121
-
122
- // non-contiguous kernel (slow)
123
- static void concat_f32_sycl_non_cont(
124
- queue_ptr stream, const char *src0, const char *src1, char *dst,
125
- int64_t ne00, int64_t ne01, int64_t ne02, int64_t ne03, uint64_t nb00,
126
- uint64_t nb01, uint64_t nb02, uint64_t nb03, int64_t /*ne10*/,
127
- int64_t /*ne11*/, int64_t /*ne12*/, int64_t /*ne13*/, uint64_t nb10,
128
- uint64_t nb11, uint64_t nb12, uint64_t nb13, int64_t ne0, int64_t ne1,
129
- int64_t ne2, int64_t ne3, uint64_t nb0, uint64_t nb1, uint64_t nb2,
130
- uint64_t nb3, int32_t dim) {
131
- sycl::range<3> gridDim(ne3, ne2, ne1);
132
- stream->parallel_for(
133
- sycl::nd_range<3>(gridDim, sycl::range<3>(1, 1, 1)),
134
- [=](sycl::nd_item<3> item_ct1) {
135
- int64_t i3 = item_ct1.get_group(0);
136
- int64_t i2 = item_ct1.get_group(1);
137
- int64_t i1 = item_ct1.get_group(2);
138
-
139
- int64_t o[4] = {0, 0, 0, 0};
140
- o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03));
141
-
142
- const float *x;
143
-
144
- for (int i0 = item_ct1.get_local_id(2); i0 < ne0;
145
- i0 += item_ct1.get_local_range(2)) {
146
- if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
147
- x = (const float *)(src0 + (i3)*nb03 + (i2)*nb02 + (i1)*nb01 +
148
- (i0)*nb00);
149
- } else {
150
- x = (const float *)(src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 +
151
- (i1 - o[1]) * nb11 + (i0 - o[0]) * nb10);
152
- }
153
-
154
- float *y = (float *)(dst + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0);
155
-
156
- *y = *x;
157
- }
158
- });
159
- }
160
-
161
- void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
162
- const ggml_tensor *src0 = dst->src[0];
163
- const ggml_tensor *src1 = dst->src[1];
164
- queue_ptr stream = ctx.stream();
165
-
166
- const int32_t dim = ((int32_t *)dst->op_params)[0];
167
-
168
- if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
169
- const float *src0_d = (const float *)src0->data;
170
- const float *src1_d = (const float *)src1->data;
171
-
172
- float *dst_d = (float *)dst->data;
173
-
174
- if (dim != 3) {
175
- for (int i3 = 0; i3 < dst->ne[3]; i3++) {
176
- concat_f32_sycl(
177
- src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
178
- dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1],
179
- src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream);
180
- }
181
- } else {
182
- const size_t size0 = ggml_nbytes(src0);
183
- const size_t size1 = ggml_nbytes(src1);
184
-
185
- SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
186
- SYCL_CHECK(CHECK_TRY_ERROR(
187
- stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
188
- }
189
- } else
190
- concat_f32_sycl_non_cont(
191
- stream, (const char *)src0->data, (const char *)src1->data,
192
- (char *)dst->data, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
193
- src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], src1->ne[0],
194
- src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1],
195
- src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
196
- dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
197
- }
@@ -1,20 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_CONCAT_HPP
14
- #define GGML_SYCL_CONCAT_HPP
15
-
16
- #include "common.hpp"
17
-
18
- void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
19
-
20
- #endif // GGML_SYCL_CONCAT_HPP