@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,100 +0,0 @@
1
- include(CheckCSourceRuns)
2
-
3
- set(AVX_CODE "
4
- #include <immintrin.h>
5
- int main()
6
- {
7
- __m256 a;
8
- a = _mm256_set1_ps(0);
9
- return 0;
10
- }
11
- ")
12
-
13
- set(AVX512_CODE "
14
- #include <immintrin.h>
15
- int main()
16
- {
17
- __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
18
- 0, 0, 0, 0, 0, 0, 0, 0,
19
- 0, 0, 0, 0, 0, 0, 0, 0,
20
- 0, 0, 0, 0, 0, 0, 0, 0,
21
- 0, 0, 0, 0, 0, 0, 0, 0,
22
- 0, 0, 0, 0, 0, 0, 0, 0,
23
- 0, 0, 0, 0, 0, 0, 0, 0,
24
- 0, 0, 0, 0, 0, 0, 0, 0);
25
- __m512i b = a;
26
- __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
27
- return 0;
28
- }
29
- ")
30
-
31
- set(AVX2_CODE "
32
- #include <immintrin.h>
33
- int main()
34
- {
35
- __m256i a = {0};
36
- a = _mm256_abs_epi16(a);
37
- __m256i x;
38
- _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
39
- return 0;
40
- }
41
- ")
42
-
43
- set(FMA_CODE "
44
- #include <immintrin.h>
45
- int main()
46
- {
47
- __m256 acc = _mm256_setzero_ps();
48
- const __m256 d = _mm256_setzero_ps();
49
- const __m256 p = _mm256_setzero_ps();
50
- acc = _mm256_fmadd_ps( d, p, acc );
51
- return 0;
52
- }
53
- ")
54
-
55
- macro(check_sse type flags)
56
- set(__FLAG_I 1)
57
- set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
58
- foreach (__FLAG ${flags})
59
- if (NOT ${type}_FOUND)
60
- set(CMAKE_REQUIRED_FLAGS ${__FLAG})
61
- check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
62
- if (HAS_${type}_${__FLAG_I})
63
- set(${type}_FOUND TRUE CACHE BOOL "${type} support")
64
- set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
65
- endif()
66
- math(EXPR __FLAG_I "${__FLAG_I}+1")
67
- endif()
68
- endforeach()
69
- set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
70
-
71
- if (NOT ${type}_FOUND)
72
- set(${type}_FOUND FALSE CACHE BOOL "${type} support")
73
- set(${type}_FLAGS "" CACHE STRING "${type} flags")
74
- endif()
75
-
76
- mark_as_advanced(${type}_FOUND ${type}_FLAGS)
77
- endmacro()
78
-
79
- # flags are for MSVC only!
80
- check_sse("AVX" " ;/arch:AVX")
81
- if (NOT ${AVX_FOUND})
82
- set(GGML_AVX OFF)
83
- else()
84
- set(GGML_AVX ON)
85
- endif()
86
-
87
- check_sse("AVX2" " ;/arch:AVX2")
88
- check_sse("FMA" " ;/arch:AVX2")
89
- if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
90
- set(GGML_AVX2 OFF)
91
- else()
92
- set(GGML_AVX2 ON)
93
- endif()
94
-
95
- check_sse("AVX512" " ;/arch:AVX512")
96
- if (NOT ${AVX512_FOUND})
97
- set(GGML_AVX512 OFF)
98
- else()
99
- set(GGML_AVX512 ON)
100
- endif()
@@ -1,184 +0,0 @@
1
- cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
2
-
3
- find_package(CUDAToolkit)
4
-
5
- if (CUDAToolkit_FOUND)
6
- message(STATUS "CUDA Toolkit found")
7
-
8
- if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
9
- # native == GPUs available at build time
10
- # 50 == Maxwell, lowest CUDA 12 standard
11
- # 60 == P100, FP16 CUDA intrinsics
12
- # 61 == Pascal, __dp4a instruction (per-byte integer dot product)
13
- # 70 == V100, FP16 tensor cores
14
- # 75 == Turing, int8 tensor cores
15
- # 80 == Ampere, asynchronous data loading, faster tensor core instructions
16
- # 86 == RTX 3000, needs CUDA v11.1
17
- # 89 == RTX 4000, needs CUDA v11.8
18
- #
19
- # XX-virtual == compile CUDA code as PTX, do JIT compilation to binary code on first run
20
- # XX-real == compile CUDA code as device code for this specific architecture
21
- # no suffix == compile as both PTX and device code
22
- #
23
- # The default behavior for a non-native is to build virtual architectures as needed to cover all features needed
24
- # for best performance and to also build real architectures for the most commonly used GPUs.
25
- if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
26
- set(CMAKE_CUDA_ARCHITECTURES "native")
27
- elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
28
- if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
29
- set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real")
30
- else()
31
- set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real")
32
- endif()
33
- else()
34
- if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
35
- set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real")
36
- else()
37
- set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real")
38
- endif()
39
- endif()
40
- endif()
41
- message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
42
-
43
- enable_language(CUDA)
44
-
45
- file(GLOB GGML_HEADERS_CUDA "*.cuh")
46
- list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")
47
-
48
- file(GLOB GGML_SOURCES_CUDA "*.cu")
49
- file(GLOB SRCS "template-instances/fattn-mma*.cu")
50
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
51
- file(GLOB SRCS "template-instances/mmq*.cu")
52
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
53
-
54
- if (GGML_CUDA_FA_ALL_QUANTS)
55
- file(GLOB SRCS "template-instances/fattn-vec*.cu")
56
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
57
- add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
58
- else()
59
- file(GLOB SRCS "template-instances/fattn-vec*q4_0-q4_0.cu")
60
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
61
- file(GLOB SRCS "template-instances/fattn-vec*q8_0-q8_0.cu")
62
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
63
- file(GLOB SRCS "template-instances/fattn-vec*f16-f16.cu")
64
- list(APPEND GGML_SOURCES_CUDA ${SRCS})
65
- endif()
66
-
67
- ggml_add_backend_library(ggml-cuda
68
- ${GGML_HEADERS_CUDA}
69
- ${GGML_SOURCES_CUDA}
70
- )
71
-
72
- add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
73
-
74
- if (GGML_CUDA_GRAPHS)
75
- add_compile_definitions(GGML_CUDA_USE_GRAPHS)
76
- endif()
77
-
78
- if (GGML_CUDA_FORCE_MMQ)
79
- add_compile_definitions(GGML_CUDA_FORCE_MMQ)
80
- endif()
81
-
82
- if (GGML_CUDA_FORCE_CUBLAS)
83
- add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
84
- endif()
85
-
86
- if (GGML_CUDA_NO_VMM)
87
- add_compile_definitions(GGML_CUDA_NO_VMM)
88
- endif()
89
-
90
- if (NOT GGML_CUDA_FA)
91
- add_compile_definitions(GGML_CUDA_NO_FA)
92
- endif()
93
-
94
- if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
95
- add_compile_definitions(GGML_CUDA_F16)
96
- endif()
97
-
98
- if (GGML_CUDA_NO_PEER_COPY)
99
- add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
100
- endif()
101
-
102
- if (GGML_STATIC)
103
- if (WIN32)
104
- # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
105
- target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
106
- else ()
107
- target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
108
- endif()
109
- else()
110
- target_link_libraries(ggml-cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
111
- endif()
112
-
113
- if (GGML_CUDA_NO_VMM)
114
- # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
115
- else()
116
- target_link_libraries(ggml-cuda PRIVATE CUDA::cuda_driver)
117
- endif()
118
-
119
- set(CUDA_CXX_FLAGS "")
120
-
121
- set(CUDA_FLAGS -use_fast_math -extended-lambda)
122
-
123
- if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
124
- # Options are:
125
- # - none (not recommended)
126
- # - speed (nvcc's default)
127
- # - balance
128
- # - size
129
- list(APPEND CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE})
130
- endif()
131
-
132
- if (GGML_FATAL_WARNINGS)
133
- list(APPEND CUDA_FLAGS -Werror all-warnings)
134
- endif()
135
-
136
- if (GGML_ALL_WARNINGS AND NOT MSVC)
137
- set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
138
- if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
139
- list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
140
- endif()
141
-
142
- execute_process(
143
- COMMAND ${NVCC_CMD} -Xcompiler --version
144
- OUTPUT_VARIABLE CUDA_CCFULLVER
145
- ERROR_QUIET
146
- )
147
-
148
- if (NOT CUDA_CCFULLVER MATCHES clang)
149
- set(CUDA_CCID "GNU")
150
- execute_process(
151
- COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
152
- OUTPUT_VARIABLE CUDA_CCVER
153
- ERROR_QUIET
154
- OUTPUT_STRIP_TRAILING_WHITESPACE
155
- )
156
- else()
157
- if (CUDA_CCFULLVER MATCHES Apple)
158
- set(CUDA_CCID "AppleClang")
159
- else()
160
- set(CUDA_CCID "Clang")
161
- endif()
162
- string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
163
- endif()
164
-
165
- message(STATUS "CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
166
-
167
- ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER})
168
- list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
169
- endif()
170
-
171
- if (NOT MSVC)
172
- list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
173
- endif()
174
-
175
- list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
176
-
177
- if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
178
- list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
179
- endif()
180
-
181
- target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
182
- else()
183
- message(FATAL_ERROR "CUDA Toolkit not found")
184
- endif()
@@ -1,15 +0,0 @@
1
- #pragma once
2
-
3
- #include <cuda_runtime.h>
4
- #include <cuda.h>
5
- #include <cublas_v2.h>
6
- #include <cuda_bf16.h>
7
- #include <cuda_fp16.h>
8
-
9
- #if CUDART_VERSION < 11020
10
- #define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED
11
- #define CUBLAS_TF32_TENSOR_OP_MATH CUBLAS_TENSOR_OP_MATH
12
- #define CUBLAS_COMPUTE_16F CUDA_R_16F
13
- #define CUBLAS_COMPUTE_32F CUDA_R_32F
14
- #define cublasComputeType_t cudaDataType_t
15
- #endif // CUDART_VERSION < 11020
@@ -1,243 +0,0 @@
1
- #pragma once
2
-
3
- #define HIP_ENABLE_WARP_SYNC_BUILTINS 1
4
- #include <hip/hip_runtime.h>
5
- #include <hipblas/hipblas.h>
6
- #include <hip/hip_fp16.h>
7
- #include <hip/hip_bfloat16.h>
8
- #ifdef __HIP_PLATFORM_AMD__
9
- // for rocblas_initialize()
10
- #include "rocblas/rocblas.h"
11
- #endif // __HIP_PLATFORM_AMD__
12
-
13
- #define CUBLAS_COMPUTE_16F HIPBLAS_R_16F
14
- #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
15
- #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
16
- #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
17
- #define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT
18
- #define CUBLAS_OP_N HIPBLAS_OP_N
19
- #define CUBLAS_OP_T HIPBLAS_OP_T
20
- #define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
21
- #define CUBLAS_TF32_TENSOR_OP_MATH 0
22
- #define CUDA_R_16F HIPBLAS_R_16F
23
- #define CUDA_R_16BF HIPBLAS_R_16B
24
- #define CUDA_R_32F HIPBLAS_R_32F
25
- #define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
26
- #define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
27
- #define CU_MEM_ALLOCATION_TYPE_PINNED hipMemAllocationTypePinned
28
- #define CU_MEM_LOCATION_TYPE_DEVICE hipMemLocationTypeDevice
29
- #define CU_MEM_ACCESS_FLAGS_PROT_READWRITE hipMemAccessFlagsProtReadWrite
30
- #define CU_CHECK(fn) {hipError_t err = fn; if(err != hipSuccess) { GGML_ABORT("HipVMM Failure: %s\n", hipGetErrorString(err)); }}
31
- #define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width)
32
- #define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
33
- #define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
34
- #define cublasCreate hipblasCreate
35
- #define cublasDestroy hipblasDestroy
36
- #define cublasGemmEx hipblasGemmEx
37
- #define cublasGemmBatchedEx hipblasGemmBatchedEx
38
- #define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
39
- #define cublasHandle_t hipblasHandle_t
40
- #define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS
41
- #define cublasSetStream hipblasSetStream
42
- #define cublasSgemm hipblasSgemm
43
- #define cublasStatus_t hipblasStatus_t
44
- #define cublasOperation_t hipblasOperation_t
45
- #define cudaDataType_t hipblasDatatype_t //deprecated, new hipblasDatatype not in 5.6
46
- #define cudaDeviceCanAccessPeer hipDeviceCanAccessPeer
47
- #define cudaDeviceDisablePeerAccess hipDeviceDisablePeerAccess
48
- #define cudaDeviceEnablePeerAccess hipDeviceEnablePeerAccess
49
- #define cudaDeviceProp hipDeviceProp_t
50
- #define cudaDeviceSynchronize hipDeviceSynchronize
51
- #define cudaError_t hipError_t
52
- #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
53
- #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
54
- #define cudaEventCreateWithFlags hipEventCreateWithFlags
55
- #define cudaEventDisableTiming hipEventDisableTiming
56
- #define cudaEventRecord hipEventRecord
57
- #define cudaEventSynchronize hipEventSynchronize
58
- #define cudaEvent_t hipEvent_t
59
- #define cudaEventDestroy hipEventDestroy
60
- #define cudaFree hipFree
61
- #define cudaFreeHost hipHostFree
62
- #define cudaGetDevice hipGetDevice
63
- #define cudaGetDeviceCount hipGetDeviceCount
64
- #define cudaGetDeviceProperties hipGetDeviceProperties
65
- #define cudaGetErrorString hipGetErrorString
66
- #define cudaGetLastError hipGetLastError
67
- #define cudaHostRegister hipHostRegister
68
- #define cudaHostRegisterPortable hipHostRegisterPortable
69
- #define cudaHostRegisterReadOnly hipHostRegisterReadOnly
70
- #define cudaHostUnregister hipHostUnregister
71
- #define cudaLaunchHostFunc hipLaunchHostFunc
72
- #define cudaMalloc hipMalloc
73
- #define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
74
- #define cudaMallocManaged hipMallocManaged
75
- #define cudaMemAdvise hipMemAdvise
76
- #define cudaMemcpy hipMemcpy
77
- #define cudaMemcpyAsync hipMemcpyAsync
78
- #define cudaMemcpyPeerAsync hipMemcpyPeerAsync
79
- #define cudaMemcpy2DAsync hipMemcpy2DAsync
80
- #define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
81
- #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
82
- #define cudaMemcpyHostToDevice hipMemcpyHostToDevice
83
- #define cudaMemcpyKind hipMemcpyKind
84
- #define cudaMemset hipMemset
85
- #define cudaMemsetAsync hipMemsetAsync
86
- #define cudaMemGetInfo hipMemGetInfo
87
- #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
88
- #define cudaSetDevice hipSetDevice
89
- #define cuDeviceGet hipDeviceGet
90
- #define CUdevice hipDevice_t
91
- #define CUdeviceptr hipDeviceptr_t
92
- #define cuMemUnmap hipMemUnmap
93
- #define CUmemAccessDesc hipMemAccessDesc
94
- #define cuMemAddressFree hipMemAddressFree
95
- #define cuMemRelease hipMemRelease
96
- #define CUmemGenericAllocationHandle hipMemGenericAllocationHandle_t
97
- #define cuMemCreate hipMemCreate
98
- #define cuMemAddressReserve hipMemAddressReserve
99
- #define cuMemMap hipMemMap
100
- #define cuMemSetAccess hipMemSetAccess
101
- #define cuMemGetAllocationGranularity hipMemGetAllocationGranularity
102
- #define CUmemAllocationProp hipMemAllocationProp
103
- #define cuDeviceGetAttribute hipDeviceGetAttribute
104
- #define cudaStreamCreateWithFlags hipStreamCreateWithFlags
105
- #define cudaStreamDestroy hipStreamDestroy
106
- #define cudaStreamFireAndForget hipStreamFireAndForget
107
- #define cudaStreamNonBlocking hipStreamNonBlocking
108
- #define cudaStreamPerThread hipStreamPerThread
109
- #define cudaStreamSynchronize hipStreamSynchronize
110
- #define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
111
- #define cudaGraphExec_t hipGraphExec_t
112
- #define cudaGraphNode_t hipGraphNode_t
113
- #define cudaKernelNodeParams hipKernelNodeParams
114
- #define cudaKernelNodeParams hipKernelNodeParams
115
- #define cudaGraphExecDestroy hipGraphExecDestroy
116
- #define cudaGraphLaunch hipGraphLaunch
117
- #define cudaErrorGraphExecUpdateFailure hipErrorGraphExecUpdateFailure
118
- #define cudaGraphExecUpdateResult hipGraphExecUpdateResult
119
- #define cudaGraphNodeType hipGraphNodeType
120
- #define cudaGraphNodeTypeKernel hipGraphNodeTypeKernel
121
- #define cudaGraphInstantiate hipGraphInstantiate
122
- #define cudaStreamEndCapture hipStreamEndCapture
123
- #define cudaGraphDestroy hipGraphDestroy
124
- #define cudaGraphKernelNodeSetParams hipGraphKernelNodeSetParams
125
- #define cudaErrorInvalidDeviceFunction hipErrorInvalidDeviceFunction
126
- #define cudaGraphKernelNodeGetParams hipGraphKernelNodeGetParams
127
- #define cudaGraphNodeGetType hipGraphNodeGetType
128
- #define cudaGraphGetNodes hipGraphGetNodes
129
- #define cudaGraphExecUpdate hipGraphExecUpdate
130
- #define cudaStreamCaptureModeRelaxed hipStreamCaptureModeRelaxed
131
- #define cudaStreamBeginCapture hipStreamBeginCapture
132
- #define cudaGraph_t hipGraph_t
133
- #define cudaStream_t hipStream_t
134
- #define cudaSuccess hipSuccess
135
- #define cudaOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessor
136
- #define __trap() do { abort(); __builtin_unreachable(); } while(0)
137
- #define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
138
- #define CUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED
139
- #define CUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED
140
- #define CUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE
141
- #define CUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH
142
- #define CUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR
143
- #define CUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED
144
- #define CUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR
145
- #define CUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED
146
-
147
- #define __CUDA_ARCH__ 1300
148
-
149
- #if defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__)
150
- #define GCN
151
- #endif
152
-
153
- #if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__)
154
- #define CDNA
155
- #endif
156
-
157
- #if defined(__GFX12__)
158
- #define RDNA4
159
- #endif
160
-
161
- #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || \
162
- defined(__gfx1150__) || defined(__gfx1151__)
163
- #define RDNA3
164
- #endif
165
-
166
- #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || defined(__gfx1033__) || \
167
- defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) || defined(__gfx1037__)
168
- #define RDNA2
169
- #endif
170
-
171
- #if defined(__gfx1010__) || defined(__gfx1012__)
172
- #define RDNA1
173
- #endif
174
-
175
- #ifndef __has_builtin
176
- #define __has_builtin(x) 0
177
- #endif
178
-
179
- typedef hip_bfloat16 nv_bfloat16;
180
-
181
- typedef int8_t int8x4_t __attribute__((ext_vector_type(4)));
182
- typedef uint8_t uint8x4_t __attribute__((ext_vector_type(4)));
183
- static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
184
- const int8x4_t va = reinterpret_cast<const int8x4_t&>(a);
185
- const int8x4_t vb = reinterpret_cast<const int8x4_t&>(b);
186
- #if __has_builtin(__builtin_elementwise_sub_sat)
187
- const int8x4_t c = __builtin_elementwise_sub_sat(va, vb);
188
- return reinterpret_cast<const int &>(c);
189
- #else
190
- int8x4_t c;
191
- int16_t tmp;
192
- #pragma unroll
193
- for (int i = 0; i < 4; i++) {
194
- tmp = va[i] - vb[i];
195
- if(tmp > std::numeric_limits<int8_t>::max()) tmp = std::numeric_limits<int8_t>::max();
196
- if(tmp < std::numeric_limits<int8_t>::min()) tmp = std::numeric_limits<int8_t>::min();
197
- c[i] = tmp;
198
- }
199
- return reinterpret_cast<int &>(c);
200
- #endif // __has_builtin(__builtin_elementwise_sub_sat)
201
- }
202
-
203
- static __device__ __forceinline__ int __vsub4(const int a, const int b) {
204
- return __vsubss4(a, b);
205
- }
206
-
207
- static __device__ __forceinline__ unsigned int __vcmpeq4(unsigned int a, unsigned int b) {
208
- const uint8x4_t& va = reinterpret_cast<const uint8x4_t&>(a);
209
- const uint8x4_t& vb = reinterpret_cast<const uint8x4_t&>(b);
210
- unsigned int c;
211
- uint8x4_t& vc = reinterpret_cast<uint8x4_t&>(c);
212
- #pragma unroll
213
- for (int i = 0; i < 4; ++i) {
214
- vc[i] = va[i] == vb[i] ? 0xff : 0x00;
215
- }
216
- return c;
217
- }
218
-
219
- static __device__ __forceinline__ unsigned int __vcmpne4(unsigned int a, unsigned int b) {
220
- const uint8x4_t& va = reinterpret_cast<const uint8x4_t&>(a);
221
- const uint8x4_t& vb = reinterpret_cast<const uint8x4_t&>(b);
222
- unsigned int c;
223
- uint8x4_t& vc = reinterpret_cast<uint8x4_t&>(c);
224
- #pragma unroll
225
- for (int i = 0; i < 4; ++i) {
226
- vc[i] = va[i] == vb[i] ? 0x00 : 0xff;
227
- }
228
- return c;
229
- }
230
-
231
- #if defined(__HIP_PLATFORM_AMD__) && HIP_VERSION < 50600000
232
- // __shfl_xor() for half2 was added in ROCm 5.6
233
- static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int width) {
234
- typedef union half2_b32 {
235
- half2 val;
236
- int b32;
237
- } half2_b32_t;
238
- half2_b32_t tmp;
239
- tmp.val = var;
240
- tmp.b32 = __shfl_xor(tmp.b32, laneMask, width);
241
- return tmp.val;
242
- }
243
- #endif // defined(__HIP_PLATFORM_AMD__) && HIP_VERSION < 50600000
@@ -1,140 +0,0 @@
1
- #pragma once
2
-
3
- #include <musa_runtime.h>
4
- #include <musa.h>
5
- #include <mublas.h>
6
- #include <musa_bf16.h>
7
- #include <musa_fp16.h>
8
- #define CUBLAS_COMPUTE_16F CUDA_R_16F
9
- #define CUBLAS_COMPUTE_32F CUDA_R_32F
10
- #define CUBLAS_COMPUTE_32F_FAST_16F MUBLAS_COMPUTE_32F_FAST_16F
11
- #define CUBLAS_GEMM_DEFAULT MUBLAS_GEMM_DEFAULT
12
- #define CUBLAS_GEMM_DEFAULT_TENSOR_OP MUBLAS_GEMM_DEFAULT
13
- #define CUBLAS_OP_N MUBLAS_OP_N
14
- #define CUBLAS_OP_T MUBLAS_OP_T
15
- #define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
16
- #define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_MATH_MODE_DEFAULT
17
- #define CUDA_R_16F MUSA_R_16F
18
- #define CUDA_R_16BF MUSA_R_16BF
19
- #define CUDA_R_32F MUSA_R_32F
20
- #define cublasComputeType_t cudaDataType_t
21
- #define cublasCreate mublasCreate
22
- #define cublasDestroy mublasDestroy
23
- #define cublasGemmEx mublasGemmEx
24
- #define cublasGemmBatchedEx mublasGemmBatchedEx
25
- #define cublasGemmStridedBatchedEx mublasGemmStridedBatchedEx
26
- #define cublasHandle_t mublasHandle_t
27
- #define cublasSetMathMode mublasSetMathMode
28
- #define cublasSetStream mublasSetStream
29
- #define cublasSgemm mublasSgemm
30
- #define cublasStatus_t mublasStatus_t
31
- #define cublasOperation_t mublasOperation_t
32
- #define cublasGetStatusString mublasStatus_to_string
33
- #define cudaDataType_t musaDataType_t
34
- #define cudaDeviceCanAccessPeer musaDeviceCanAccessPeer
35
- #define cudaDeviceDisablePeerAccess musaDeviceDisablePeerAccess
36
- #define cudaDeviceEnablePeerAccess musaDeviceEnablePeerAccess
37
- #define cudaDeviceProp musaDeviceProp
38
- #define cudaDeviceSynchronize musaDeviceSynchronize
39
- #define cudaError_t musaError_t
40
- #define cudaErrorPeerAccessAlreadyEnabled musaErrorPeerAccessAlreadyEnabled
41
- #define cudaErrorPeerAccessNotEnabled musaErrorPeerAccessNotEnabled
42
- #define cudaEventCreateWithFlags musaEventCreateWithFlags
43
- #define cudaEventDisableTiming musaEventDisableTiming
44
- #define cudaEventRecord musaEventRecord
45
- #define cudaEventSynchronize musaEventSynchronize
46
- #define cudaEvent_t musaEvent_t
47
- #define cudaEventDestroy musaEventDestroy
48
- #define cudaFree musaFree
49
- #define cudaFreeHost musaFreeHost
50
- #define cudaGetDevice musaGetDevice
51
- #define cudaGetDeviceCount musaGetDeviceCount
52
- #define cudaGetDeviceProperties musaGetDeviceProperties
53
- #define cudaGetErrorString musaGetErrorString
54
- #define cudaGetLastError musaGetLastError
55
- #define cudaHostRegister musaHostRegister
56
- #define cudaHostRegisterPortable musaHostRegisterPortable
57
- #define cudaHostRegisterReadOnly musaHostRegisterReadOnly
58
- #define cudaHostUnregister musaHostUnregister
59
- #define cudaLaunchHostFunc musaLaunchHostFunc
60
- #define cudaMalloc musaMalloc
61
- #define cudaMallocHost musaMallocHost
62
- #define cudaMallocManaged musaMallocManaged
63
- #define cudaMemcpy musaMemcpy
64
- #define cudaMemcpyAsync musaMemcpyAsync
65
- #define cudaMemcpyPeerAsync musaMemcpyPeerAsync
66
- #define cudaMemcpy2DAsync musaMemcpy2DAsync
67
- #define cudaMemcpyDeviceToDevice musaMemcpyDeviceToDevice
68
- #define cudaMemcpyDeviceToHost musaMemcpyDeviceToHost
69
- #define cudaMemcpyHostToDevice musaMemcpyHostToDevice
70
- #define cudaMemcpyKind musaMemcpyKind
71
- #define cudaMemset musaMemset
72
- #define cudaMemsetAsync musaMemsetAsync
73
- #define cudaMemGetInfo musaMemGetInfo
74
- #define cudaOccupancyMaxPotentialBlockSize musaOccupancyMaxPotentialBlockSize
75
- #define cudaSetDevice musaSetDevice
76
- #define cudaStreamCreateWithFlags musaStreamCreateWithFlags
77
- #define cudaStreamDestroy musaStreamDestroy
78
- #define cudaStreamFireAndForget musaStreamFireAndForget
79
- #define cudaStreamNonBlocking musaStreamNonBlocking
80
- #define cudaStreamPerThread musaStreamPerThread
81
- #define cudaStreamSynchronize musaStreamSynchronize
82
- #define cudaStreamWaitEvent musaStreamWaitEvent
83
- #define cudaStream_t musaStream_t
84
- #define cudaSuccess musaSuccess
85
-
86
- // Additional mappings for MUSA virtual memory pool
87
- #define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED MU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED
88
- #define CU_MEM_ACCESS_FLAGS_PROT_READWRITE MU_MEM_ACCESS_FLAGS_PROT_READWRITE
89
- #define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED MU_MEM_ALLOC_GRANULARITY_RECOMMENDED
90
- #define CU_MEM_ALLOCATION_TYPE_PINNED MU_MEM_ALLOCATION_TYPE_PINNED
91
- #define CU_MEM_LOCATION_TYPE_DEVICE MU_MEM_LOCATION_TYPE_DEVICE
92
- #define CUdevice MUdevice
93
- #define CUdeviceptr MUdeviceptr
94
- #define CUmemAccessDesc MUmemAccessDesc
95
- #define CUmemAllocationProp MUmemAllocationProp
96
- #define CUmemGenericAllocationHandle MUmemGenericAllocationHandle
97
- #define cuDeviceGet muDeviceGet
98
- #define cuDeviceGetAttribute muDeviceGetAttribute
99
- #define cuMemAddressFree muMemAddressFree
100
- #define cuMemAddressReserve muMemAddressReserve
101
- #define cuMemCreate muMemCreate
102
- #define cuMemGetAllocationGranularity muMemGetAllocationGranularity
103
- #define cuMemMap muMemMap
104
- #define cuMemRelease muMemRelease
105
- #define cuMemSetAccess muMemSetAccess
106
- #define cuMemUnmap muMemUnmap
107
- #define cudaFuncAttributeMaxDynamicSharedMemorySize musaFuncAttributeMaxDynamicSharedMemorySize
108
- #define cudaFuncSetAttribute musaFuncSetAttribute
109
- #define cudaMemcpy3DPeerParms musaMemcpy3DPeerParms
110
- #define make_cudaExtent make_musaExtent
111
- #define make_cudaPitchedPtr make_musaPitchedPtr
112
-
113
- // Additional mappings for MUSA graphs
114
- #define CUDA_SUCCESS MUSA_SUCCESS
115
- #define CUresult MUresult
116
- #define cuGetErrorString muGetErrorString
117
- #define cudaErrorGraphExecUpdateFailure musaErrorGraphExecUpdateFailure
118
- #define cudaErrorInvalidDeviceFunction musaErrorInvalidDeviceFunction
119
- #define cudaGraphDestroy musaGraphDestroy
120
- #define cudaGraphExecDestroy musaGraphExecDestroy
121
- #define cudaGraphExec_t musaGraphExec_t
122
- #define cudaGraphExecUpdate musaGraphExecUpdate
123
- #define cudaGraphExecUpdateResult musaGraphExecUpdateResult
124
- #define cudaGraphGetNodes musaGraphGetNodes
125
- #define cudaGraphInstantiate musaGraphInstantiate
126
- #define cudaGraphKernelNodeGetParams musaGraphKernelNodeGetParams
127
- #define cudaGraphKernelNodeSetParams musaGraphKernelNodeSetParams
128
- #define cudaGraphLaunch musaGraphLaunch
129
- #define cudaGraphNodeGetType musaGraphNodeGetType
130
- #define cudaGraphNode_t musaGraphNode_t
131
- #define cudaGraphNodeType musaGraphNodeType
132
- #define cudaGraphNodeTypeKernel musaGraphNodeTypeKernel
133
- #define cudaGraph_t musaGraph_t
134
- #define cudaKernelNodeParams musaKernelNodeParams
135
- #define cudaStreamCaptureModeRelaxed musaStreamCaptureModeRelaxed
136
- #define cudaStreamBeginCapture musaStreamBeginCapture
137
- #define cudaStreamEndCapture musaStreamEndCapture
138
- #define cudaOccupancyMaxActiveBlocksPerMultiprocessor musaOccupancyMaxActiveBlocksPerMultiprocessor
139
-
140
- typedef mt_bfloat16 nv_bfloat16;