@fugood/llama.node 0.6.2 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -10
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/patches/node-api-headers+1.1.0.patch +0 -26
  27. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  28. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  29. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  30. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  31. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  32. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  33. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  37. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  38. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  39. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  40. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  41. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  42. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  43. package/src/llama.cpp/cmake/common.cmake +0 -35
  44. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  45. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  46. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  47. package/src/llama.cpp/docs/build.md +0 -561
  48. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  49. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  50. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  51. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  53. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  54. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  55. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  56. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  57. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  58. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  59. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  61. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  62. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  63. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  64. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  68. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  70. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  71. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  72. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  73. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  74. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  75. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  76. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  77. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  79. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  80. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  81. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  82. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  83. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  84. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  85. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  86. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  87. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  88. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  89. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  90. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  91. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  92. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  93. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  94. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  95. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  96. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  97. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  98. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  99. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  100. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  101. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  102. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  103. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  104. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  105. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  106. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  107. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  108. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  109. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  110. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  111. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  112. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  113. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  114. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  115. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  116. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  117. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  118. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  119. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  120. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  121. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  122. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  123. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  125. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  127. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  128. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  129. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  130. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  131. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  132. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  135. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  136. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  137. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  138. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  139. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  140. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  141. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  142. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  143. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  144. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  145. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  146. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  147. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  148. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  149. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  150. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  151. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  153. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  155. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  157. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  159. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  161. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  163. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  167. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  169. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  170. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  172. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  173. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  175. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  177. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  181. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  183. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  185. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  186. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  187. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  189. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  191. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  193. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  195. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  196. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  198. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  199. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  200. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  204. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  205. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  206. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  207. package/src/llama.cpp/models/.editorconfig +0 -1
  208. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  212. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  214. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  217. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  228. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  231. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  233. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  234. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  242. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  250. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  253. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  256. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  258. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  261. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  262. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  263. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  264. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  265. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  266. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  267. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  268. package/src/llama.cpp/prompts/assistant.txt +0 -31
  269. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  270. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  271. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  273. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  274. package/src/llama.cpp/prompts/chat.txt +0 -28
  275. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  276. package/src/llama.cpp/prompts/dan.txt +0 -1
  277. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  278. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  279. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  280. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  281. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  284. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  285. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  286. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  287. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  289. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  290. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  291. package/src/llama.cpp/requirements.txt +0 -13
  292. package/src/llama.cpp/scripts/build-info.sh +0 -30
  293. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  294. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  295. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  296. package/src/llama.cpp/tests/get-model.cpp +0 -21
  297. package/src/llama.cpp/tests/get-model.h +0 -2
  298. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  299. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  300. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  301. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  302. package/src/llama.cpp/tests/test-c.c +0 -7
  303. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  304. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  305. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  306. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  307. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  308. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  309. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  310. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  311. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  312. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  313. package/src/llama.cpp/tests/test-log.cpp +0 -39
  314. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  315. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  316. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  317. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  318. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  319. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  320. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  321. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  322. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  323. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  324. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  325. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  326. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  327. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  328. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  329. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  330. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  331. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  332. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  333. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  334. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  335. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  336. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  337. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  338. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  339. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  340. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  341. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  342. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  343. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  344. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  345. package/src/llama.cpp/tools/main/main.cpp +0 -977
  346. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  347. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  348. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  349. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  350. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  351. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  353. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  354. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  355. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  356. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  357. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  358. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  360. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  361. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  362. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  363. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  364. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  365. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  367. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  368. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  369. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  370. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  371. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  372. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  373. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  374. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  375. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  376. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  377. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  378. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,83 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2025 Codeplay Software Ltd.
4
- // Copyright (C) 2025 Intel Corporation
5
- // SPDX-License-Identifier: MIT
6
- //
7
-
8
- //
9
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10
- // See https://llvm.org/LICENSE.txt for license information.
11
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12
- //
13
-
14
- #ifndef GGML_SYCL_QUANTS_HPP
15
- #define GGML_SYCL_QUANTS_HPP
16
-
17
- #include "ggml-common.h"
18
- #include "ggml.h"
19
-
20
- namespace ggml_sycl_reordered {
21
-
22
-
23
- // The reordered block moves quants (qs) and scales(d) to two
24
- // uniform regions of memory that is contiguous in the same tensor.
25
- // What this means is that instead of having:
26
- // [d0, qs0] [d1, qs1] [d2, qs2] ... [dN, qsN]
27
- // We have:
28
- // [qs0, qs1, qs2, ..., qsN] [d0, d1, d2, ..., dN]
29
- //
30
- // Notes: out-of-bounds qs will run into d values
31
- // Aligment relies on the allocated size of qs
32
-
33
- template <ggml_type type> struct block_q_t;
34
-
35
-
36
- // qk number of weights / quants in a block
37
- // qr number of weights in a byte (described as 'before dequantization')
38
- // for quantization types that has low and high bits split, qr is calculated with
39
- // using the lower bits, e.g for Q6 quants QR6 is 2
40
- // qi number of 32 bit integers needed to represent all the quants from a block (`qs` field)
41
- // See ggml-common.h to see how these are calculated
42
- template <> struct block_q_t<GGML_TYPE_Q4_0> {
43
- struct traits {
44
- static constexpr uint32_t qk = QK4_0;
45
- static constexpr uint32_t qi = QI4_0;
46
- static constexpr uint32_t qr = QR4_0;
47
- static constexpr uint32_t vdr_mmvq = 2;
48
- };
49
-
50
- static constexpr int get_block_offset(const int block_index) { return block_index * (traits::qk / traits::qr); }
51
-
52
- static constexpr int get_d_offset(int nrows, int ncols, const int block_index) {
53
- return (ncols / traits::qr * nrows) + block_index * sizeof(ggml_half);
54
- }
55
-
56
- static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
57
- };
58
-
59
- template <> struct block_q_t<GGML_TYPE_Q4_K> {
60
- struct traits {
61
- static constexpr uint32_t qk = QK_K;
62
- static constexpr uint32_t qi = QI4_K;
63
- static constexpr uint32_t qr = QR4_K;
64
- static constexpr uint32_t vdr_mmvq = 2;
65
- };
66
-
67
- static constexpr int get_block_offset(const int block_index) { return block_index * (traits::qk / traits::qr); }
68
-
69
- static constexpr int get_d_offset(int nrows, int ncols, const int block_index) {
70
- auto nblocks = (nrows * (ncols / traits::qk));
71
- return (nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2));
72
- }
73
-
74
- static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
75
-
76
- constexpr size_t get_total_qs_bytes(int nblocks) { return nblocks * QK_K / 2; }
77
-
78
- constexpr size_t get_dm_offset(int nblocks) { return get_total_qs_bytes(nblocks) + nblocks * K_SCALE_SIZE; }
79
- };
80
-
81
- } // namespace ggml_sycl_reordered
82
-
83
- #endif // GGML_SYCL_QUANTS_HPP
@@ -1,362 +0,0 @@
1
- #include "rope.hpp"
2
- #include "ggml-sycl/common.hpp"
3
- #include "ggml.h"
4
-
5
- struct rope_corr_dims {
6
- float v[2];
7
- };
8
-
9
- struct mrope_sections {
10
- int v[4];
11
- };
12
-
13
- static float rope_yarn_ramp(const float low, const float high, const int i0) {
14
- const float y = (i0 / 2 - low) / sycl::max(0.001f, high - low);
15
- return 1.0f - sycl::min(1.0f, sycl::max(0.0f, y));
16
- }
17
-
18
- // YaRN algorithm based on LlamaYaRNScaledRotaryEmbedding.py from https://github.com/jquesnelle/yarn
19
- // MIT licensed. Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng.
20
- static void rope_yarn(
21
- float theta_extrap, float freq_scale, rope_corr_dims corr_dims, int64_t i0, float ext_factor, float mscale,
22
- float * cos_theta, float * sin_theta) {
23
- // Get n-d rotational scaling corrected for extrapolation
24
- float theta_interp = freq_scale * theta_extrap;
25
- float theta = theta_interp;
26
- if (ext_factor != 0.0f) {
27
- float ramp_mix = rope_yarn_ramp(corr_dims.v[0], corr_dims.v[1], i0) * ext_factor;
28
- theta = theta_interp * (1 - ramp_mix) + theta_extrap * ramp_mix;
29
-
30
- // Get n-d magnitude scaling corrected for interpolation
31
- mscale *= 1.0f + 0.1f * sycl::log(1.0f / freq_scale);
32
- }
33
- *cos_theta = sycl::cos(theta) * mscale;
34
- *sin_theta = sycl::sin(theta) * mscale;
35
- }
36
-
37
- template <typename T, bool has_ff>
38
- static void rope_norm(const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims,
39
- const int32_t * pos, float freq_scale, float ext_factor, float attn_factor,
40
- const rope_corr_dims corr_dims, const float theta_scale, const float * freq_factors,
41
- const sycl::nd_item<3> & item_ct1) {
42
- const int i0 = 2 * (item_ct1.get_local_range(1) * item_ct1.get_group(1) + item_ct1.get_local_id(1));
43
-
44
- if (i0 >= ne0) {
45
- return;
46
- }
47
-
48
- const int row = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2);
49
-
50
- if (i0 >= n_dims) {
51
- const int i = row * ne0 + i0;
52
-
53
- dst[i + 0] = x[i + 0];
54
- dst[i + 1] = x[i + 1];
55
-
56
- return;
57
- }
58
-
59
- const int row0 = row % ne1;
60
- const int channel0 = row / ne1;
61
-
62
- const int i = row * ne0 + i0;
63
- const int i2 = channel0 * s2 + row0 * s1 + i0;
64
-
65
- const float theta_base = pos[channel0] * sycl::pow(theta_scale, i0 / 2.0f);
66
-
67
- const float freq_factor = has_ff ? freq_factors[i0 / 2] : 1.0f;
68
-
69
- float cos_theta;
70
- float sin_theta;
71
-
72
- rope_yarn(theta_base / freq_factor, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta);
73
-
74
- const float x0 = x[i2 + 0];
75
- const float x1 = x[i2 + 1];
76
-
77
- dst[i + 0] = x0 * cos_theta - x1 * sin_theta;
78
- dst[i + 1] = x0 * sin_theta + x1 * cos_theta;
79
- }
80
-
81
- template <typename T, bool has_ff>
82
- static void rope_neox(const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims,
83
- const int32_t * pos, const float freq_scale, const float ext_factor, const float attn_factor,
84
- const rope_corr_dims corr_dims, const float theta_scale, const float * freq_factors,
85
- const sycl::nd_item<3> & item_ct1) {
86
- const int i0 = 2 * (item_ct1.get_local_range(1) * item_ct1.get_group(1) + item_ct1.get_local_id(1));
87
-
88
- if (i0 >= ne0) {
89
- return;
90
- }
91
-
92
- const int row = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2);
93
-
94
- if (i0 >= n_dims) {
95
- const int i = row * ne0 + i0;
96
-
97
- dst[i + 0] = x[i + 0];
98
- dst[i + 1] = x[i + 1];
99
-
100
- return;
101
- }
102
-
103
- const int row0 = row % ne1;
104
- const int channel0 = row / ne1;
105
-
106
- const int i = row * ne0 + i0 / 2;
107
- const int i2 = channel0 * s2 + row0 * s1 + i0 / 2;
108
-
109
- const float theta_base = pos[channel0] * sycl::pow(theta_scale, i0 / 2.0f);
110
-
111
- const float freq_factor = has_ff ? freq_factors[i0 / 2] : 1.0f;
112
-
113
- float cos_theta;
114
- float sin_theta;
115
-
116
- rope_yarn(theta_base / freq_factor, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta);
117
-
118
- const float x0 = x[i2 + 0];
119
- const float x1 = x[i2 + n_dims / 2];
120
-
121
- dst[i + 0] = x0 * cos_theta - x1 * sin_theta;
122
- dst[i + n_dims / 2] = x0 * sin_theta + x1 * cos_theta;
123
- }
124
-
125
- template <typename T, bool has_ff>
126
- static void rope_vision(const T * x, T * dst, const int ne0, const int ne1, const int ne2, const size_t s1,
127
- const size_t s2, const int n_dims, const int32_t * pos, const float freq_scale,
128
- const float ext_factor, const float attn_factor, const rope_corr_dims corr_dims,
129
- const float theta_scale, const float * freq_factors, const mrope_sections sections,
130
- const sycl::nd_item<3> & item_ct1) {
131
- // get index pos
132
- const int i0 = 2 * (item_ct1.get_group(1) * item_ct1.get_local_range(1) + item_ct1.get_local_id(1));
133
- if (i0 >= ne0) {
134
- return;
135
- }
136
- const int row_dst = (item_ct1.get_group(2) * item_ct1.get_local_range(2)) + item_ct1.get_local_id(2);
137
- const int row_x = row_dst % ne1;
138
- const int channel_x = row_dst / ne1;
139
- const int idst = (row_dst * ne0) + (i0 / 2);
140
- const size_t ix = ((size_t) channel_x * s2) + ((size_t) row_x * s1) + (i0 / 2);
141
-
142
- const int sect_dims = sections.v[0] + sections.v[1];
143
- const int sector = (i0 / 2) % sect_dims;
144
-
145
- float theta_base = 0.0f;
146
- if (sector < sections.v[0]) {
147
- const int p = sector;
148
- theta_base = pos[channel_x] * sycl::pow(theta_scale, (float) p);
149
- } else {
150
- // Simplified from CUDA backend code: if (sector >= sections.v[0] && sector < sec_w) which is just sector >= sections.v[0]
151
- const int p = sector - sections.v[0];
152
- theta_base = pos[channel_x + ne2] * sycl::pow(theta_scale, (float) p);
153
- }
154
-
155
- const float freq_factor = has_ff ? freq_factors[i0 / 2] : 1.0f;
156
- float cos_theta;
157
- float sin_theta;
158
- rope_yarn(theta_base / freq_factor, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta);
159
- const float x0 = x[ix + 0];
160
- const float x1 = x[ix + n_dims];
161
-
162
- // store results in dst
163
- dst[idst + 0] = x0 * cos_theta - x1 * sin_theta;
164
- dst[idst + n_dims] = x0 * sin_theta + x1 * cos_theta;
165
- }
166
-
167
- template <typename T>
168
- static void rope_norm_sycl(const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2,
169
- const int n_dims, int nr, const int32_t * pos, const float freq_scale, const float freq_base,
170
- const float ext_factor, const float attn_factor, const rope_corr_dims corr_dims,
171
- const float * freq_factors, queue_ptr stream) {
172
- GGML_ASSERT(ne0 % 2 == 0);
173
- const sycl::range<3> block_dims(1, SYCL_ROPE_BLOCK_SIZE, 1);
174
- const int num_blocks_x = (ne0 + 2 * SYCL_ROPE_BLOCK_SIZE - 1) / (2 * SYCL_ROPE_BLOCK_SIZE);
175
- const sycl::range<3> block_nums(1, num_blocks_x, nr);
176
-
177
- const float theta_scale = powf(freq_base, -2.0f / n_dims);
178
-
179
- dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 });
180
-
181
- if (freq_factors == nullptr) {
182
- /*
183
- DPCT1049:40: The work-group size passed to the SYCL kernel may exceed
184
- the limit. To get the device limit, query
185
- info::device::max_work_group_size. Adjust the work-group size if needed.
186
- */
187
- stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
188
- rope_norm<T, false>(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims,
189
- theta_scale, freq_factors, item_ct1);
190
- });
191
- } else {
192
- /*
193
- DPCT1049:41: The work-group size passed to the SYCL kernel may exceed
194
- the limit. To get the device limit, query
195
- info::device::max_work_group_size. Adjust the work-group size if needed.
196
- */
197
- stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
198
- rope_norm<T, true>(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims,
199
- theta_scale, freq_factors, item_ct1);
200
- });
201
- }
202
- }
203
-
204
- template <typename T>
205
- static void rope_neox_sycl(const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2,
206
- const int n_dims, const int nr, const int32_t * pos, const float freq_scale,
207
- const float freq_base, const float ext_factor, const float attn_factor,
208
- const rope_corr_dims corr_dims, const float * freq_factors, queue_ptr stream) {
209
- GGML_ASSERT(ne0 % 2 == 0);
210
- const sycl::range<3> block_dims(1, SYCL_ROPE_BLOCK_SIZE, 1);
211
- const int num_blocks_x = (ne0 + 2 * SYCL_ROPE_BLOCK_SIZE - 1) / (2 * SYCL_ROPE_BLOCK_SIZE);
212
- const sycl::range<3> block_nums(1, num_blocks_x, nr);
213
-
214
- const float theta_scale = powf(freq_base, -2.0f / n_dims);
215
-
216
- dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 });
217
-
218
- if (freq_factors == nullptr) {
219
- stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
220
- rope_neox<T, false>(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims,
221
- theta_scale, freq_factors, item_ct1);
222
- });
223
- } else {
224
- stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) {
225
- rope_neox<T, true>(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims,
226
- theta_scale, freq_factors, item_ct1);
227
- });
228
- }
229
- }
230
-
231
- // rope vision
232
- template <typename T>
233
- static void rope_vision_sycl(const T * x, T * dst, const int ne0, const int ne1, const int ne2, const size_t s1,
234
- const size_t s2, const int n_dims, const int nr, const int32_t * pos,
235
- const float freq_scale, const float freq_base, const float ext_factor,
236
- const float attn_factor, const rope_corr_dims corr_dims, const float * freq_factors,
237
- const mrope_sections sections, queue_ptr stream) {
238
- GGML_ASSERT(ne0 % 2 == 0);
239
- const sycl::range<3> block_dims(1, SYCL_ROPE_BLOCK_SIZE, 1);
240
- const int n_blocks_y = (ne0 + 2 * SYCL_ROPE_BLOCK_SIZE - 1) / (2 * SYCL_ROPE_BLOCK_SIZE);
241
- const sycl::range<3> grid_dims(1, n_blocks_y, nr);
242
- const sycl::nd_range<3> nd_range(grid_dims * block_dims, block_dims);
243
-
244
- const float theta_scale = std::pow(freq_base, -2.0f / n_dims);
245
- // Add FP16 capability check if T could be sycl::half
246
- if constexpr (std::is_same_v<T, sycl::half>) {
247
- dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 });
248
- }
249
- // launch kernel
250
- if (freq_factors == nullptr) {
251
- stream->parallel_for(nd_range, [=](sycl::nd_item<3> item_ct1) {
252
- rope_vision<T, false>(x, dst, ne0, ne1, ne2, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor,
253
- corr_dims, theta_scale, freq_factors, sections, item_ct1);
254
- });
255
- } else {
256
- stream->parallel_for(nd_range, [=](sycl::nd_item<3> item_ct1) {
257
- rope_vision<T, true>(x, dst, ne0, ne1, ne2, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor,
258
- corr_dims, theta_scale, freq_factors, sections, item_ct1);
259
- });
260
- }
261
- }
262
-
263
- inline void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
264
-
265
- GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
266
- GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
267
- GGML_ASSERT(dst->src[0]->type == dst->type);
268
- const int64_t ne00 = dst->src[0]->ne[0]; // head dims
269
- const int64_t ne01 = dst->src[0]->ne[1]; // num heads
270
- const int64_t ne02 = dst->src[0]->ne[2]; // num heads
271
- const int64_t nr = ggml_nrows(dst->src[0]);
272
-
273
- const size_t s01 = dst->src[0]->nb[1] / ggml_type_size(dst->src[0]->type);
274
- const size_t s02 = dst->src[0]->nb[2] / ggml_type_size(dst->src[0]->type);
275
-
276
-
277
- //const int n_past = ((int32_t *) dst->op_params)[0];
278
- const int n_dims = ((int32_t *) dst->op_params)[1];
279
- const int mode = ((int32_t *) dst->op_params)[2];
280
- //const int n_ctx = ((int32_t *) dst->op_params)[3];
281
- const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
282
- mrope_sections sections;
283
-
284
- // RoPE alteration for extended context
285
- float freq_base;
286
- float freq_scale;
287
- float ext_factor;
288
- float attn_factor;
289
- float beta_fast;
290
- float beta_slow;
291
-
292
- memcpy(&freq_base, (int32_t *) dst->op_params + 5, sizeof(float));
293
- memcpy(&freq_scale, (int32_t *) dst->op_params + 6, sizeof(float));
294
- memcpy(&ext_factor, (int32_t *) dst->op_params + 7, sizeof(float));
295
- memcpy(&attn_factor, (int32_t *) dst->op_params + 8, sizeof(float));
296
- memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
297
- memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
298
- memcpy(&sections.v, (int32_t *) dst->op_params + 11, sizeof(int)*4);
299
-
300
- const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
301
- const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
302
-
303
- const int32_t * pos = (const int32_t *) dst->src[1]->data;
304
-
305
- const float * freq_factors = nullptr;
306
- if (dst->src[2] != nullptr) {
307
- freq_factors = (const float *) dst->src[2]->data;
308
- }
309
-
310
- rope_corr_dims corr_dims;
311
- ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v);
312
-
313
- dpct::queue_ptr main_stream = ctx.stream();
314
- SYCL_CHECK(ggml_sycl_set_device(ctx.device));
315
-
316
- // compute
317
- if (is_neox) {
318
- GGML_SYCL_DEBUG("%s: neox path\n", __func__);
319
- if (dst->src[0]->type == GGML_TYPE_F32) {
320
- rope_neox_sycl((const float *) dst->src[0]->data, (float *) dst->data, ne00, ne01, s01, s02, n_dims, nr,
321
- pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream);
322
- } else if (dst->src[0]->type == GGML_TYPE_F16) {
323
- rope_neox_sycl((const sycl::half *) dst->src[0]->data, (sycl::half *) dst->data, ne00, ne01, s01, s02,
324
- n_dims, nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims, freq_factors,
325
- main_stream);
326
- } else {
327
- GGML_ABORT("fatal error");
328
- }
329
- } else if (is_vision) {
330
- GGML_SYCL_DEBUG("%s: vision path\n", __func__);
331
- if (dst->src[0]->type == GGML_TYPE_F16) {
332
- rope_vision_sycl((const sycl::half *) dst->src[0]->data, (sycl::half *) dst->data, ne00, ne01, ne02, s01,
333
- s02, n_dims, nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
334
- freq_factors, sections, main_stream);
335
- } else if (dst->src[0]->type == GGML_TYPE_F32) {
336
- rope_vision_sycl((const float *) dst->src[0]->data, (float *) dst->data, ne00, ne01, ne02, s01, s02, n_dims,
337
- nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims, freq_factors, sections,
338
- main_stream);
339
- } else {
340
- GGML_ABORT("Fatal error: Tensor type unsupported!");
341
- }
342
- } else {
343
- GGML_SYCL_DEBUG("%s: norm path\n", __func__);
344
- if (dst->src[0]->type == GGML_TYPE_F32) {
345
- rope_norm_sycl((const float *) dst->src[0]->data, (float *) dst->data, ne00, ne01, s01, s02, n_dims, nr,
346
- pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream);
347
- } else if (dst->src[0]->type == GGML_TYPE_F16) {
348
- rope_norm_sycl((const sycl::half *) dst->src[0]->data, (sycl::half *) dst->data, ne00, ne01, s01, s02,
349
- n_dims, nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims, freq_factors,
350
- main_stream);
351
- } else {
352
- GGML_ABORT("fatal error");
353
- }
354
- }
355
- }
356
-
357
- void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
358
- GGML_SYCL_DEBUG("call %s\n", __func__);
359
- ggml_sycl_op_rope(ctx, dst);
360
- GGML_SYCL_DEBUG("call %s done\n", __func__);
361
- }
362
-
@@ -1,20 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_ROPE_HPP
14
- #define GGML_SYCL_ROPE_HPP
15
-
16
- #include "common.hpp"
17
-
18
- void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
19
-
20
- #endif // GGML_SYCL_ROPE_HPP