@fugood/llama.node 0.6.2 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -10
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/patches/node-api-headers+1.1.0.patch +0 -26
  27. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  28. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  29. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  30. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  31. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  32. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  33. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  37. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  38. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  39. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  40. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  41. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  42. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  43. package/src/llama.cpp/cmake/common.cmake +0 -35
  44. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  45. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  46. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  47. package/src/llama.cpp/docs/build.md +0 -561
  48. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  49. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  50. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  51. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  53. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  54. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  55. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  56. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  57. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  58. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  59. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  61. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  62. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  63. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  64. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  68. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  70. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  71. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  72. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  73. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  74. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  75. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  76. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  77. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  79. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  80. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  81. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  82. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  83. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  84. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  85. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  86. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  87. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  88. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  89. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  90. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  91. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  92. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  93. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  94. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  95. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  96. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  97. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  98. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  99. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  100. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  101. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  102. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  103. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  104. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  105. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  106. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  107. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  108. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  109. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  110. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  111. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  112. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  113. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  114. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  115. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  116. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  117. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  118. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  119. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  120. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  121. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  122. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  123. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  125. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  127. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  128. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  129. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  130. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  131. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  132. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  135. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  136. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  137. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  138. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  139. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  140. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  141. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  142. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  143. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  144. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  145. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  146. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  147. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  148. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  149. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  150. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  151. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  153. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  155. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  157. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  159. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  161. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  163. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  167. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  169. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  170. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  172. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  173. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  175. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  177. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  181. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  183. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  185. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  186. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  187. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  189. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  191. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  193. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  195. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  196. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  198. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  199. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  200. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  204. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  205. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  206. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  207. package/src/llama.cpp/models/.editorconfig +0 -1
  208. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  212. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  214. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  217. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  228. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  231. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  233. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  234. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  242. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  250. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  253. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  256. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  258. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  261. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  262. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  263. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  264. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  265. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  266. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  267. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  268. package/src/llama.cpp/prompts/assistant.txt +0 -31
  269. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  270. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  271. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  273. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  274. package/src/llama.cpp/prompts/chat.txt +0 -28
  275. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  276. package/src/llama.cpp/prompts/dan.txt +0 -1
  277. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  278. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  279. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  280. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  281. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  284. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  285. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  286. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  287. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  289. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  290. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  291. package/src/llama.cpp/requirements.txt +0 -13
  292. package/src/llama.cpp/scripts/build-info.sh +0 -30
  293. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  294. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  295. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  296. package/src/llama.cpp/tests/get-model.cpp +0 -21
  297. package/src/llama.cpp/tests/get-model.h +0 -2
  298. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  299. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  300. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  301. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  302. package/src/llama.cpp/tests/test-c.c +0 -7
  303. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  304. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  305. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  306. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  307. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  308. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  309. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  310. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  311. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  312. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  313. package/src/llama.cpp/tests/test-log.cpp +0 -39
  314. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  315. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  316. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  317. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  318. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  319. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  320. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  321. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  322. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  323. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  324. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  325. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  326. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  327. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  328. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  329. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  330. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  331. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  332. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  333. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  334. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  335. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  336. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  337. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  338. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  339. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  340. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  341. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  342. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  343. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  344. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  345. package/src/llama.cpp/tools/main/main.cpp +0 -977
  346. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  347. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  348. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  349. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  350. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  351. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  353. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  354. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  355. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  356. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  357. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  358. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  360. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  361. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  362. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  363. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  364. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  365. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  367. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  368. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  369. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  370. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  371. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  372. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  373. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  374. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  375. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  376. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  377. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  378. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,27 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_MMVQ_HPP
14
- #define GGML_SYCL_MMVQ_HPP
15
-
16
- #include "common.hpp"
17
-
18
-
19
- void ggml_sycl_op_mul_mat_vec_q(
20
- ggml_backend_sycl_context & ctx,
21
- const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
22
- const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i,
23
- float *dst_dd_i, const int64_t row_low, const int64_t row_high,
24
- const int64_t src1_ncols, const int64_t src1_padded_row_size,
25
- const dpct::queue_ptr &stream);
26
-
27
- #endif // GGML_SYCL_MMVQ_HPP
@@ -1,474 +0,0 @@
1
- #include "norm.hpp"
2
-
3
- static void norm_f32(const float* x, float* dst, const int ncols, const float eps,
4
- const sycl::nd_item<3>& item_ct1, sycl::float2* s_sum, int block_size) {
5
- const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
6
- item_ct1.get_local_id(1);
7
- const int tid = item_ct1.get_local_id(2);
8
-
9
- const int nthreads = item_ct1.get_local_range(2);
10
- const int nwarps = nthreads / WARP_SIZE;
11
- sycl::float2 mean_var = sycl::float2(0.f, 0.f);
12
-
13
- for (int col = tid; col < ncols; col += block_size) {
14
- const float xi = x[row * ncols + col];
15
- mean_var.x() += xi;
16
- mean_var.y() += xi * xi;
17
- }
18
-
19
- // sum up partial sums
20
- mean_var = warp_reduce_sum(mean_var, item_ct1);
21
- if (block_size > WARP_SIZE) {
22
-
23
- int warp_id = item_ct1.get_local_id(2) / WARP_SIZE;
24
- int lane_id = item_ct1.get_local_id(2) % WARP_SIZE;
25
- if (lane_id == 0) {
26
- s_sum[warp_id] = mean_var;
27
- }
28
- /*
29
- DPCT1118:0: SYCL group functions and algorithms must be encountered in
30
- converged control flow. You may need to adjust the code.
31
- */
32
- item_ct1.barrier(sycl::access::fence_space::local_space);
33
- mean_var = 0.f;
34
- size_t nreduce = nwarps / WARP_SIZE;
35
- for (size_t i = 0; i < nreduce; i += 1)
36
- {
37
- mean_var += s_sum[lane_id + i * WARP_SIZE];
38
- }
39
- mean_var = warp_reduce_sum(mean_var, item_ct1);
40
- }
41
-
42
- const float mean = mean_var.x() / ncols;
43
- const float var = mean_var.y() / ncols - mean * mean;
44
- const float inv_std = sycl::rsqrt(var + eps);
45
-
46
- for (int col = tid; col < ncols; col += block_size) {
47
- dst[row * ncols + col] = (x[row * ncols + col] - mean) * inv_std;
48
- }
49
- }
50
-
51
- static void group_norm_f32(const float* x, float* dst, const int group_size, const int ne_elements, const float eps,
52
- const sycl::nd_item<3>& item_ct1, float* s_sum, int block_size) {
53
- int start = item_ct1.get_group(2) * group_size;
54
- int end = start + group_size;
55
- const int nthreads = item_ct1.get_local_range(2);
56
- const int nwarps = nthreads / WARP_SIZE;
57
- start += item_ct1.get_local_id(2);
58
- size_t nreduce = nwarps / WARP_SIZE;
59
-
60
- if (end >= ne_elements) {
61
- end = ne_elements;
62
- }
63
-
64
- float tmp = 0.0f; // partial sum for thread in warp
65
-
66
- for (int j = start; j < end; j += block_size) {
67
- tmp += x[j];
68
- }
69
-
70
- tmp = warp_reduce_sum(tmp, item_ct1);
71
- if (block_size > WARP_SIZE) {
72
-
73
- int warp_id = item_ct1.get_local_id(2) / WARP_SIZE;
74
- int lane_id = item_ct1.get_local_id(2) % WARP_SIZE;
75
- if (lane_id == 0) {
76
- s_sum[warp_id] = tmp;
77
- }
78
- /*
79
- DPCT1118:1: SYCL group functions and algorithms must be encountered in
80
- converged control flow. You may need to adjust the code.
81
- */
82
- /*
83
- DPCT1065:54: Consider replacing sycl::nd_item::barrier() with
84
- sycl::nd_item::barrier(sycl::access::fence_space::local_space) for
85
- better performance if there is no access to global memory.
86
- */
87
- item_ct1.barrier();
88
- tmp = 0.f;
89
- for (size_t i = 0; i < nreduce; i += 1)
90
- {
91
- tmp += s_sum[lane_id + i * WARP_SIZE];
92
- }
93
- tmp = warp_reduce_sum(tmp, item_ct1);
94
- }
95
-
96
- float mean = tmp / group_size;
97
- tmp = 0.0f;
98
-
99
- for (int j = start; j < end; j += block_size) {
100
- float xi = x[j] - mean;
101
- dst[j] = xi;
102
- tmp += xi * xi;
103
- }
104
-
105
- tmp = warp_reduce_sum(tmp, item_ct1);
106
- if (block_size > WARP_SIZE) {
107
-
108
- int warp_id = item_ct1.get_local_id(2) / WARP_SIZE;
109
- int lane_id = item_ct1.get_local_id(2) % WARP_SIZE;
110
- if (lane_id == 0) {
111
- s_sum[warp_id] = tmp;
112
- }
113
- /*
114
- DPCT1118:2: SYCL group functions and algorithms must be encountered in
115
- converged control flow. You may need to adjust the code.
116
- */
117
- /*
118
- DPCT1065:55: Consider replacing sycl::nd_item::barrier() with
119
- sycl::nd_item::barrier(sycl::access::fence_space::local_space) for
120
- better performance if there is no access to global memory.
121
- */
122
- item_ct1.barrier();
123
- tmp = 0.f;
124
- for (size_t i = 0; i < nreduce; i += 1)
125
- {
126
- tmp += s_sum[lane_id + i * WARP_SIZE];
127
- }
128
- tmp = warp_reduce_sum(tmp, item_ct1);
129
- }
130
-
131
- float variance = tmp / group_size;
132
- float scale = sycl::rsqrt(variance + eps);
133
- for (int j = start; j < end; j += block_size) {
134
- dst[j] *= scale;
135
- }
136
- }
137
-
138
- static void rms_norm_f32(const float* x, float* dst, const int ncols, const float eps,
139
- const sycl::nd_item<3>& item_ct1, float* s_sum, int block_size) {
140
- const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
141
- item_ct1.get_local_id(1);
142
- const int tid = item_ct1.get_local_id(2);
143
- const int nthreads = item_ct1.get_local_range(2);
144
- const int nwarps = nthreads / WARP_SIZE;
145
- float tmp = 0.0f; // partial sum for thread in warp
146
-
147
- for (int col = tid; col < ncols; col += block_size) {
148
- const float xi = x[row * ncols + col];
149
- tmp += xi * xi;
150
- }
151
-
152
- // sum up partial sums
153
- tmp = warp_reduce_sum(tmp, item_ct1);
154
- if (block_size > WARP_SIZE) {
155
-
156
- int warp_id = item_ct1.get_local_id(2) / WARP_SIZE;
157
- int lane_id = item_ct1.get_local_id(2) % WARP_SIZE;
158
- if (lane_id == 0) {
159
- s_sum[warp_id] = tmp;
160
- }
161
- /*
162
- DPCT1118:3: SYCL group functions and algorithms must be encountered in
163
- converged control flow. You may need to adjust the code.
164
- */
165
- item_ct1.barrier(sycl::access::fence_space::local_space);
166
- size_t nreduce = nwarps / WARP_SIZE;
167
- tmp = 0.f;
168
- for (size_t i = 0; i < nreduce; i += 1)
169
- {
170
- tmp += s_sum[lane_id + i * WARP_SIZE];
171
- }
172
- tmp = warp_reduce_sum(tmp, item_ct1);
173
- }
174
-
175
- const float mean = tmp / ncols;
176
- const float scale = sycl::rsqrt(mean + eps);
177
-
178
- for (int col = tid; col < ncols; col += block_size) {
179
- dst[row * ncols + col] = scale * x[row * ncols + col];
180
- }
181
- }
182
-
183
- static void l2_norm_f32(const float* x, float* dst, const int ncols, const float eps,
184
- const sycl::nd_item<3>& item_ct1, float* s_sum, int block_size) {
185
- const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
186
- item_ct1.get_local_id(1);
187
- const int tid = item_ct1.get_local_id(2);
188
- const int nthreads = item_ct1.get_local_range(2);
189
- const int nwarps = nthreads / WARP_SIZE;
190
- float tmp = 0.0f; // partial sum for thread in warp
191
-
192
- for (int col = tid; col < ncols; col += block_size) {
193
- const float xi = x[row * ncols + col];
194
- tmp += xi * xi;
195
- }
196
-
197
- // sum up partial sums
198
- tmp = warp_reduce_sum(tmp, item_ct1);
199
- if (block_size > WARP_SIZE) {
200
-
201
- int warp_id = item_ct1.get_local_id(2) / WARP_SIZE;
202
- int lane_id = item_ct1.get_local_id(2) % WARP_SIZE;
203
- if (lane_id == 0) {
204
- s_sum[warp_id] = tmp;
205
- }
206
- /*
207
- DPCT1118:3: SYCL group functions and algorithms must be encountered in
208
- converged control flow. You may need to adjust the code.
209
- */
210
- item_ct1.barrier(sycl::access::fence_space::local_space);
211
- size_t nreduce = nwarps / WARP_SIZE;
212
- tmp = 0.f;
213
- for (size_t i = 0; i < nreduce; i += 1)
214
- {
215
- tmp += s_sum[lane_id + i * WARP_SIZE];
216
- }
217
- tmp = warp_reduce_sum(tmp, item_ct1);
218
- }
219
-
220
- const float scale = sycl::rsqrt(sycl::max(tmp, eps * eps));
221
-
222
- for (int col = tid; col < ncols; col += block_size) {
223
- dst[row * ncols + col] = scale * x[row * ncols + col];
224
- }
225
- }
226
-
227
- static void norm_f32_sycl(const float* x, float* dst, const int ncols,
228
- const int nrows, const float eps,
229
- queue_ptr stream, int device) {
230
- GGML_ASSERT(ncols % WARP_SIZE == 0);
231
- if (ncols < 1024) {
232
- const sycl::range<3> block_dims(1, 1, WARP_SIZE);
233
- stream->submit([&](sycl::handler& cgh) {
234
- cgh.parallel_for(
235
- sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
236
- block_dims),
237
- [=](sycl::nd_item<3> item_ct1)
238
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
239
- norm_f32(x, dst, ncols, eps, item_ct1,
240
- nullptr, WARP_SIZE);
241
- });
242
- });
243
- }
244
- else {
245
- const int work_group_size = ggml_sycl_info().max_work_group_sizes[device];
246
- assert(work_group_size % (WARP_SIZE * WARP_SIZE) == 0);
247
- const sycl::range<3> block_dims(1, 1, work_group_size);
248
- /*
249
- DPCT1049:17: The work-group size passed to the SYCL kernel may exceed
250
- the limit. To get the device limit, query
251
- info::device::max_work_group_size. Adjust the work-group size if needed.
252
- */
253
- stream->submit([&](sycl::handler& cgh) {
254
- sycl::local_accessor<sycl::float2, 1> s_sum_acc_ct1(
255
- sycl::range<1>(work_group_size / WARP_SIZE), cgh);
256
-
257
- cgh.parallel_for(
258
- sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
259
- block_dims),
260
- [=](sycl::nd_item<3> item_ct1)
261
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
262
- norm_f32(x, dst, ncols, eps, item_ct1,
263
- get_pointer(s_sum_acc_ct1), work_group_size);
264
- });
265
- });
266
- }
267
- }
268
-
269
- static void group_norm_f32_sycl(const float* x, float* dst,
270
- const int num_groups, const float eps, const int group_size,
271
- const int ne_elements, queue_ptr stream, int device) {
272
- if (group_size < 1024) {
273
- const sycl::range<3> block_dims(1, 1, WARP_SIZE);
274
- stream->submit([&](sycl::handler& cgh) {
275
- const float eps_ct4 = eps;
276
- cgh.parallel_for(
277
- sycl::nd_range<3>(sycl::range<3>(1, 1, num_groups) * block_dims,
278
- block_dims),
279
- [=](sycl::nd_item<3> item_ct1)
280
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
281
- group_norm_f32(
282
- x, dst, group_size, ne_elements, eps_ct4, item_ct1,
283
- nullptr, WARP_SIZE);
284
- });
285
- });
286
- }
287
- else {
288
- const int work_group_size = ggml_sycl_info().max_work_group_sizes[device];
289
- assert(work_group_size % (WARP_SIZE * WARP_SIZE) == 0);
290
- const sycl::range<3> block_dims(1, 1, work_group_size);
291
- /*
292
- DPCT1049:18: The work-group size passed to the SYCL kernel may exceed
293
- the limit. To get the device limit, query
294
- info::device::max_work_group_size. Adjust the work-group size if needed.
295
- */
296
-
297
- stream->submit([&](sycl::handler& cgh) {
298
- sycl::local_accessor<float, 1> s_sum_acc_ct1(sycl::range<1>(work_group_size / WARP_SIZE),
299
- cgh);
300
-
301
- const float eps_ct4 = eps;
302
-
303
- cgh.parallel_for(
304
- sycl::nd_range<3>(sycl::range<3>(1, 1, num_groups) * block_dims,
305
- block_dims),
306
- [=](sycl::nd_item<3> item_ct1)
307
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
308
- group_norm_f32(x, dst, group_size, ne_elements,
309
- eps_ct4, item_ct1,
310
- get_pointer(s_sum_acc_ct1), work_group_size);
311
- });
312
- });
313
- }
314
- }
315
-
316
- static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols,
317
- const int nrows, const float eps,
318
- queue_ptr stream, int device) {
319
- GGML_ASSERT(ncols % WARP_SIZE == 0);
320
- // printf("%s ncols=%d, nrows=%d, WARP_SIZE=%d\n", __func__, ncols, nrows, WARP_SIZE);
321
- if (ncols < 1024) {
322
- const sycl::range<3> block_dims(1, 1, WARP_SIZE);
323
- stream->submit([&](sycl::handler& cgh) {
324
- cgh.parallel_for(
325
- sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
326
- block_dims),
327
- [=](sycl::nd_item<3> item_ct1)
328
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
329
- rms_norm_f32(x, dst, ncols, eps, item_ct1,
330
- nullptr, WARP_SIZE);
331
- });
332
- });
333
- }
334
- else {
335
- const int work_group_size = ggml_sycl_info().max_work_group_sizes[device];
336
- assert(work_group_size % (WARP_SIZE * WARP_SIZE) == 0);
337
- const sycl::range<3> block_dims(1, 1, work_group_size);
338
- /*
339
- DPCT1049:19: The work-group size passed to the SYCL kernel may exceed
340
- the limit. To get the device limit, query
341
- info::device::max_work_group_size. Adjust the work-group size if needed.
342
- */
343
- stream->submit([&](sycl::handler& cgh) {
344
- sycl::local_accessor<float, 1> s_sum_acc_ct1(sycl::range<1>(work_group_size / WARP_SIZE),
345
- cgh);
346
- cgh.parallel_for(
347
- sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
348
- block_dims),
349
- [=](sycl::nd_item<3> item_ct1)
350
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
351
- rms_norm_f32(x, dst, ncols, eps, item_ct1,
352
- get_pointer(s_sum_acc_ct1), work_group_size);
353
- });
354
- });
355
- }
356
- }
357
-
358
- static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols,
359
- const int nrows, const float eps,
360
- queue_ptr stream, int device) {
361
- GGML_ASSERT(ncols % WARP_SIZE == 0);
362
- // printf("%s ncols=%d, nrows=%d, WARP_SIZE=%d\n", __func__, ncols, nrows, WARP_SIZE);
363
- if (ncols < 1024) {
364
- const sycl::range<3> block_dims(1, 1, WARP_SIZE);
365
- stream->submit([&](sycl::handler& cgh) {
366
- cgh.parallel_for(
367
- sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
368
- block_dims),
369
- [=](sycl::nd_item<3> item_ct1)
370
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
371
- l2_norm_f32(x, dst, ncols, eps, item_ct1,
372
- nullptr, WARP_SIZE);
373
- });
374
- });
375
- }
376
- else {
377
- const int work_group_size = ggml_sycl_info().max_work_group_sizes[device];
378
- assert(work_group_size % (WARP_SIZE * WARP_SIZE) == 0);
379
- const sycl::range<3> block_dims(1, 1, work_group_size);
380
- /*
381
- DPCT1049:19: The work-group size passed to the SYCL kernel may exceed
382
- the limit. To get the device limit, query
383
- info::device::max_work_group_size. Adjust the work-group size if needed.
384
- */
385
- stream->submit([&](sycl::handler& cgh) {
386
- sycl::local_accessor<float, 1> s_sum_acc_ct1(sycl::range<1>(work_group_size / WARP_SIZE),
387
- cgh);
388
- cgh.parallel_for(
389
- sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims,
390
- block_dims),
391
- [=](sycl::nd_item<3> item_ct1)
392
- [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
393
- l2_norm_f32(x, dst, ncols, eps, item_ct1,
394
- get_pointer(s_sum_acc_ct1), work_group_size);
395
- });
396
- });
397
- }
398
- }
399
-
400
- void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
401
-
402
- GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
403
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
404
-
405
- const int64_t ne00 = dst->src[0]->ne[0];
406
- const int64_t nrows = ggml_nrows(dst->src[0]);
407
- dpct::queue_ptr main_stream = ctx.stream();
408
- SYCL_CHECK(ggml_sycl_set_device(ctx.device));
409
- const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
410
- float * dst_dd = static_cast<float *>(dst->data);
411
-
412
- float eps;
413
- memcpy(&eps, dst->op_params, sizeof(float));
414
-
415
- norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
416
- }
417
-
418
- void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
419
-
420
- GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
421
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
422
-
423
- int num_groups = dst->op_params[0];
424
- dpct::queue_ptr main_stream = ctx.stream();
425
- SYCL_CHECK(ggml_sycl_set_device(ctx.device));
426
-
427
- const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
428
- float * dst_dd = static_cast<float *>(dst->data);
429
-
430
- float eps;
431
- memcpy(&eps, dst->op_params + 1, sizeof(float));
432
-
433
- int group_size = dst->src[0]->ne[0] * dst->src[0]->ne[1] * ((dst->src[0]->ne[2] + num_groups - 1) / num_groups);
434
- group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, dst->src[0]->ne[0] * dst->src[0]->ne[1] * dst->src[0]->ne[2], main_stream, ctx.device);
435
- }
436
-
437
- void ggml_sycl_op_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
438
-
439
- GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
440
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
441
-
442
- const int64_t ne00 = dst->src[0]->ne[0];
443
- const int64_t nrows = ggml_nrows(dst->src[0]);
444
- dpct::queue_ptr main_stream = ctx.stream();
445
- SYCL_CHECK(ggml_sycl_set_device(ctx.device));
446
-
447
- const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
448
- float * dst_dd = static_cast<float *>(dst->data);
449
-
450
- float eps;
451
- memcpy(&eps, dst->op_params, sizeof(float));
452
-
453
- rms_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
454
- }
455
-
456
- void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
457
-
458
- GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
459
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
460
-
461
- dpct::queue_ptr main_stream = ctx.stream();
462
- SYCL_CHECK(ggml_sycl_set_device(ctx.device));
463
-
464
- const int64_t ne00 = dst->src[0]->ne[0];
465
- const int64_t nrows = ggml_nrows(dst->src[0]);
466
- const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
467
- float * dst_dd = static_cast<float *>(dst->data);
468
-
469
- float eps;
470
- memcpy(&eps, dst->op_params, sizeof(float));
471
-
472
- l2_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
473
-
474
- }
@@ -1,26 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_NORM_HPP
14
- #define GGML_SYCL_NORM_HPP
15
-
16
- #include "common.hpp"
17
-
18
- void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
19
-
20
- void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
21
-
22
- void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
23
-
24
- void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
25
-
26
- #endif // GGML_SYCL_NORM_HPP
@@ -1,46 +0,0 @@
1
- #include "outprod.hpp"
2
-
3
- void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
4
- const ggml_tensor *src0 = dst->src[0];
5
- const ggml_tensor *src1 = dst->src[1];
6
-
7
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
8
- GGML_ASSERT(src1->type == GGML_TYPE_F32);
9
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
10
- GGML_ASSERT(ggml_is_contiguous(src0));
11
- GGML_ASSERT(ggml_is_contiguous(dst));
12
-
13
- GGML_TENSOR_BINARY_OP_LOCALS
14
-
15
- // Get SYCL queue
16
- dpct::queue_ptr stream = ctx.stream();
17
-
18
- // Dimension checks
19
- GGML_ASSERT(ne01 == ne11); // Inner dimensions must match
20
- GGML_ASSERT(ne0 == ne00); // Output rows match src0 rows
21
- GGML_ASSERT(ne1 == ne10); // Output cols match src1 cols
22
-
23
- // Get data pointers
24
- const float* src0_d = (const float*)src0->data;
25
- const float* src1_d = (const float*)src1->data;
26
- float* dst_d = (float*)dst->data;
27
-
28
- // GEMM parameters
29
- const float alpha = 1.0f;
30
- const float beta = 0.0f;
31
-
32
- // Handle transposition of src1
33
- const bool src1_T = ggml_is_transposed(src1);
34
- const oneapi::math::transpose src1_op = src1_T ? oneapi::math::transpose::nontrans : oneapi::math::transpose::trans;
35
- const int64_t ldb = (src1_T ? nb10 : nb11) / sizeof(float);
36
-
37
- try {
38
- // Perform matrix multiplication using oneMath GEMM
39
- oneapi::math::blas::column_major::gemm(get_onemath_backend(*stream), oneapi::math::transpose::nontrans, src1_op,
40
- ne0, ne1, ne01, alpha, src0_d, ne00, src1_d, ldb, beta, dst_d, ne0);
41
- }
42
- catch (sycl::exception const& exc) {
43
- std::cerr << exc.what() << std::endl;
44
- GGML_ASSERT(false);
45
- }
46
- }
@@ -1,10 +0,0 @@
1
- #ifndef GGML_SYCL_OUTPROD_HPP
2
- #define GGML_SYCL_OUTPROD_HPP
3
-
4
- #include "common.hpp"
5
-
6
- void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
7
-
8
-
9
- #endif // GGML_SYCL_OUTPROD_HPP
10
-
@@ -1,74 +0,0 @@
1
- //
2
- // MIT license
3
- // Copyright (C) 2024 Intel Corporation
4
- // SPDX-License-Identifier: MIT
5
- //
6
-
7
- //
8
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9
- // See https://llvm.org/LICENSE.txt for license information.
10
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
- //
12
-
13
- #ifndef GGML_SYCL_PRESETS_HPP
14
- #define GGML_SYCL_PRESETS_HPP
15
-
16
- #define GGML_SYCL_MAX_STREAMS 8
17
- #define GGML_SYCL_MAX_BUFFERS 256
18
-
19
- #define WARP_SIZE GGML_SYCL_WARP_SIZE
20
- #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
21
-
22
- #define SYCL_GELU_BLOCK_SIZE 256
23
- #define SYCL_SILU_BLOCK_SIZE 256
24
- #define SYCL_TANH_BLOCK_SIZE 256
25
- #define SYCL_RELU_BLOCK_SIZE 256
26
- #define SYCL_HARDSIGMOID_BLOCK_SIZE 256
27
- #define SYCL_HARDSWISH_BLOCK_SIZE 256
28
- #define SYCL_EXP_BLOCK_SIZE 256
29
- #define SYCL_NEG_BLOCK_SIZE 256
30
- #define SYCL_SIGMOID_BLOCK_SIZE 256
31
- #define SYCL_SQRT_BLOCK_SIZE 256
32
- #define SYCL_SIN_BLOCK_SIZE 256
33
- #define SYCL_SQR_BLOCK_SIZE 256
34
- #define SYCL_CPY_BLOCK_SIZE 32
35
- #define SYCL_SCALE_BLOCK_SIZE 256
36
- #define SYCL_CLAMP_BLOCK_SIZE 256
37
- #define SYCL_ROPE_BLOCK_SIZE 256
38
- #define SYCL_ALIBI_BLOCK_SIZE 32
39
- #define SYCL_DIAG_MASK_INF_BLOCK_SIZE 32
40
- #define SYCL_QUANTIZE_BLOCK_SIZE 256
41
- #define SYCL_DEQUANTIZE_BLOCK_SIZE 256
42
- #define SYCL_GET_ROWS_BLOCK_SIZE 256
43
- #define SYCL_UPSCALE_BLOCK_SIZE 256
44
- #define SYCL_CONCAT_BLOCK_SIZE 256
45
- #define SYCL_PAD_BLOCK_SIZE 256
46
- #define SYCL_ACC_BLOCK_SIZE 256
47
- #define SYCL_IM2COL_BLOCK_SIZE 256
48
- #define SYCL_POOL2D_BLOCK_SIZE 256
49
- #define SYCL_ARGMAX_BLOCK_SIZE 256
50
- #define SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE 256
51
- #define SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE 256
52
-
53
- // dmmv = dequantize_mul_mat_vec
54
- #ifndef GGML_SYCL_DMMV_X
55
- #define GGML_SYCL_DMMV_X 32
56
- #endif
57
- #ifndef GGML_SYCL_MMV_Y
58
- #define GGML_SYCL_MMV_Y 1
59
- #endif
60
-
61
- #ifndef K_QUANTS_PER_ITERATION
62
- #define K_QUANTS_PER_ITERATION 2
63
- #else
64
- static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUANTS_PER_ITERATION must be 1 or 2");
65
- #endif
66
-
67
- #ifndef GGML_SYCL_PEER_MAX_BATCH_SIZE
68
- #define GGML_SYCL_PEER_MAX_BATCH_SIZE 128
69
- #endif // GGML_SYCL_PEER_MAX_BATCH_SIZE
70
-
71
- #define MUL_MAT_SRC1_COL_STRIDE 128
72
-
73
- #define QK_WARP_SIZE 32
74
- #endif // GGML_SYCL_PRESETS_HPP