@fugood/llama.node 0.6.3 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +40 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,751 +0,0 @@
1
-
2
-
3
- #include <iostream>
4
- #include <fstream>
5
- #include <sstream>
6
- #include <string>
7
- #include <stdexcept>
8
- #include <array>
9
- #include <vector>
10
- #include <map>
11
- #include <thread>
12
- #include <mutex>
13
- #include <future>
14
- #include <queue>
15
- #include <condition_variable>
16
- #include <cstdio>
17
- #include <cstring>
18
- #include <cstdlib>
19
- #include <cassert>
20
- #include <algorithm>
21
- #include <sys/stat.h>
22
- #include <sys/types.h>
23
-
24
- #ifdef _WIN32
25
- #include <windows.h>
26
- #include <direct.h> // For _mkdir on Windows
27
- #else
28
- #include <unistd.h>
29
- #include <sys/wait.h>
30
- #include <fcntl.h>
31
- #endif
32
-
33
- #define ASYNCIO_CONCURRENCY 64
34
-
35
- std::mutex lock;
36
- std::vector<std::pair<std::string, std::string>> shader_fnames;
37
-
38
- std::string GLSLC = "glslc";
39
- std::string input_dir = "vulkan-shaders";
40
- std::string output_dir = "/tmp";
41
- std::string target_hpp = "ggml-vulkan-shaders.hpp";
42
- std::string target_cpp = "ggml-vulkan-shaders.cpp";
43
- bool no_clean = false;
44
-
45
- const std::vector<std::string> type_names = {
46
- "f32",
47
- "f16",
48
- "q4_0",
49
- "q4_1",
50
- "q5_0",
51
- "q5_1",
52
- "q8_0",
53
- "q2_k",
54
- "q3_k",
55
- "q4_k",
56
- "q5_k",
57
- "q6_k",
58
- "iq1_s",
59
- "iq1_m",
60
- "iq2_xxs",
61
- "iq2_xs",
62
- "iq2_s",
63
- "iq3_xxs",
64
- "iq3_s",
65
- "iq4_xs",
66
- "iq4_nl",
67
- "bf16",
68
- };
69
-
70
- namespace {
71
- void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str) {
72
- #ifdef _WIN32
73
- HANDLE stdout_read, stdout_write;
74
- HANDLE stderr_read, stderr_write;
75
- SECURITY_ATTRIBUTES sa = { sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
76
-
77
- if (!CreatePipe(&stdout_read, &stdout_write, &sa, 0) ||
78
- !SetHandleInformation(stdout_read, HANDLE_FLAG_INHERIT, 0)) {
79
- throw std::runtime_error("Failed to create stdout pipe");
80
- }
81
-
82
- if (!CreatePipe(&stderr_read, &stderr_write, &sa, 0) ||
83
- !SetHandleInformation(stderr_read, HANDLE_FLAG_INHERIT, 0)) {
84
- throw std::runtime_error("Failed to create stderr pipe");
85
- }
86
-
87
- PROCESS_INFORMATION pi;
88
- STARTUPINFOA si = {};
89
- si.cb = sizeof(STARTUPINFOA);
90
- si.dwFlags = STARTF_USESTDHANDLES;
91
- si.hStdOutput = stdout_write;
92
- si.hStdError = stderr_write;
93
-
94
- std::vector<char> cmd(command.begin(), command.end());
95
- cmd.push_back('\0');
96
-
97
- if (!CreateProcessA(NULL, cmd.data(), NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi)) {
98
- throw std::runtime_error("Failed to create process");
99
- }
100
-
101
- CloseHandle(stdout_write);
102
- CloseHandle(stderr_write);
103
-
104
- std::array<char, 128> buffer;
105
- DWORD bytes_read;
106
-
107
- while (ReadFile(stdout_read, buffer.data(), (DWORD)buffer.size(), &bytes_read, NULL) && bytes_read > 0) {
108
- stdout_str.append(buffer.data(), bytes_read);
109
- }
110
-
111
- while (ReadFile(stderr_read, buffer.data(), (DWORD)buffer.size(), &bytes_read, NULL) && bytes_read > 0) {
112
- stderr_str.append(buffer.data(), bytes_read);
113
- }
114
-
115
- CloseHandle(stdout_read);
116
- CloseHandle(stderr_read);
117
- WaitForSingleObject(pi.hProcess, INFINITE);
118
- CloseHandle(pi.hProcess);
119
- CloseHandle(pi.hThread);
120
- #else
121
- int stdout_pipe[2];
122
- int stderr_pipe[2];
123
-
124
- if (pipe(stdout_pipe) != 0 || pipe(stderr_pipe) != 0) {
125
- throw std::runtime_error("Failed to create pipes");
126
- }
127
-
128
- pid_t pid = fork();
129
- if (pid < 0) {
130
- throw std::runtime_error("Failed to fork process");
131
- }
132
-
133
- if (pid == 0) {
134
- close(stdout_pipe[0]);
135
- close(stderr_pipe[0]);
136
- dup2(stdout_pipe[1], STDOUT_FILENO);
137
- dup2(stderr_pipe[1], STDERR_FILENO);
138
- close(stdout_pipe[1]);
139
- close(stderr_pipe[1]);
140
- execl("/bin/sh", "sh", "-c", command.c_str(), (char*) nullptr);
141
- _exit(EXIT_FAILURE);
142
- } else {
143
- close(stdout_pipe[1]);
144
- close(stderr_pipe[1]);
145
-
146
- std::array<char, 128> buffer;
147
- ssize_t bytes_read;
148
-
149
- while ((bytes_read = read(stdout_pipe[0], buffer.data(), buffer.size())) > 0) {
150
- stdout_str.append(buffer.data(), bytes_read);
151
- }
152
-
153
- while ((bytes_read = read(stderr_pipe[0], buffer.data(), buffer.size())) > 0) {
154
- stderr_str.append(buffer.data(), bytes_read);
155
- }
156
-
157
- close(stdout_pipe[0]);
158
- close(stderr_pipe[0]);
159
- waitpid(pid, nullptr, 0);
160
- }
161
- #endif
162
- }
163
-
164
- bool directory_exists(const std::string& path) {
165
- struct stat info;
166
- if (stat(path.c_str(), &info) != 0) {
167
- return false; // Path doesn't exist or can't be accessed
168
- }
169
- return (info.st_mode & S_IFDIR) != 0; // Check if it is a directory
170
- }
171
-
172
- bool create_directory(const std::string& path) {
173
- #ifdef _WIN32
174
- return _mkdir(path.c_str()) == 0 || errno == EEXIST; // EEXIST means the directory already exists
175
- #else
176
- return mkdir(path.c_str(), 0755) == 0 || errno == EEXIST; // 0755 is the directory permissions
177
- #endif
178
- }
179
-
180
- std::string to_uppercase(const std::string& input) {
181
- std::string result = input;
182
- for (char& c : result) {
183
- c = std::toupper(c);
184
- }
185
- return result;
186
- }
187
-
188
- bool string_starts_with(const std::string& str, const std::string& prefix) {
189
- if (prefix.size() > str.size()) {
190
- return false;
191
- }
192
- return std::equal(prefix.begin(), prefix.end(), str.begin());
193
- }
194
-
195
- bool string_ends_with(const std::string& str, const std::string& suffix) {
196
- if (suffix.size() > str.size()) {
197
- return false;
198
- }
199
- return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin());
200
- }
201
-
202
- static const char path_separator = '/';
203
-
204
- std::string join_paths(const std::string& path1, const std::string& path2) {
205
- return path1 + path_separator + path2;
206
- }
207
-
208
- std::string basename(const std::string &path) {
209
- return path.substr(path.find_last_of("/\\") + 1);
210
- }
211
-
212
- // variables to track number of compiles in progress
213
- static uint32_t compile_count = 0;
214
- static std::mutex compile_count_mutex;
215
- static std::condition_variable compile_count_cond;
216
-
217
- void string_to_spv_func(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
218
- std::string name = _name + (f16acc ? "_f16acc" : "") + (coopmat ? "_cm1" : "") + (coopmat2 ? "_cm2" : (fp16 ? "" : "_fp32"));
219
- std::string out_fname = join_paths(output_dir, name + ".spv");
220
- std::string in_path = join_paths(input_dir, in_fname);
221
-
222
- std::string target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2";
223
-
224
- // disable spirv-opt for coopmat shaders for https://github.com/ggerganov/llama.cpp/issues/10734
225
- std::string opt_level = coopmat ? "" : "-O";
226
-
227
- #ifdef _WIN32
228
- std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, "\"" + in_path + "\"", "-o", "\"" + out_fname + "\""};
229
- #else
230
- std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, in_path, "-o", out_fname};
231
- #endif
232
-
233
- #ifdef GGML_VULKAN_SHADER_DEBUG_INFO
234
- cmd.push_back("-g");
235
- #endif
236
-
237
- for (const auto& define : defines) {
238
- cmd.push_back("-D" + define.first + "=" + define.second);
239
- }
240
-
241
- std::string command;
242
- for (const auto& part : cmd) {
243
- command += part + " ";
244
- }
245
-
246
- std::string stdout_str, stderr_str;
247
- try {
248
- // std::cout << "Executing command: ";
249
- // for (const auto& part : cmd) {
250
- // std::cout << part << " ";
251
- // }
252
- // std::cout << std::endl;
253
-
254
- execute_command(command, stdout_str, stderr_str);
255
- if (!stderr_str.empty()) {
256
- std::cerr << "cannot compile " << name << "\n\n" << command << "\n\n" << stderr_str << std::endl;
257
- return;
258
- }
259
-
260
- std::lock_guard<std::mutex> guard(lock);
261
- shader_fnames.push_back(std::make_pair(name, out_fname));
262
- } catch (const std::exception& e) {
263
- std::cerr << "Error executing command for " << name << ": " << e.what() << std::endl;
264
- }
265
- {
266
- std::lock_guard<std::mutex> guard(compile_count_mutex);
267
- assert(compile_count > 0);
268
- compile_count--;
269
- }
270
- compile_count_cond.notify_all();
271
- }
272
-
273
- std::map<std::string, std::string> merge_maps(const std::map<std::string, std::string>& a, const std::map<std::string, std::string>& b) {
274
- std::map<std::string, std::string> result = a;
275
- result.insert(b.begin(), b.end());
276
- return result;
277
- }
278
-
279
- static std::vector<std::future<void>> compiles;
280
- void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
281
- {
282
- // wait until fewer than N compiles are in progress.
283
- // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
284
- uint32_t N = 16;
285
- std::unique_lock<std::mutex> guard(compile_count_mutex);
286
- while (compile_count >= N) {
287
- compile_count_cond.wait(guard);
288
- }
289
- compile_count++;
290
- }
291
- compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
292
- }
293
-
294
- void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool f16acc) {
295
- std::string load_vec = coopmat2 ? "1" : fp16 ? "8" : "4";
296
- std::string aligned_b_type_f32 = coopmat2 ? "float" : fp16 ? "mat2x4" : "vec4";
297
- std::string aligned_b_type_f16 = coopmat2 ? "float16_t" : fp16 ? "f16mat2x4" : "f16vec4";
298
-
299
- std::map<std::string, std::string> base_dict = {
300
- {"FLOAT_TYPE_VEC2", (coopmat2 || fp16) ? "f16vec2" : "vec2"},
301
- };
302
- std::string shader_name = "matmul";
303
-
304
- if (matmul_id) {
305
- base_dict["MUL_MAT_ID"] = "1";
306
- shader_name = "matmul_id";
307
- }
308
-
309
- if (fp16) {
310
- base_dict["FLOAT16"] = "1";
311
- }
312
-
313
- base_dict["ACC_TYPE"] = f16acc ? "float16_t" : "float";
314
-
315
- if (coopmat) {
316
- base_dict["COOPMAT"] = "1";
317
- }
318
-
319
- const std::string source_name = coopmat2 ? "mul_mm_cm2.comp" : "mul_mm.comp";
320
-
321
- auto const &FLOAT_TYPE = [&](const std::string &t) -> std::string {
322
- if (t == "bf16") {
323
- // scalar path promotes to float
324
- if (!coopmat && !coopmat2) {
325
- return "float";
326
- }
327
- return "bfloat16_t";
328
- }
329
- if (coopmat2 || fp16) {
330
- return "float16_t";
331
- }
332
- return "float";
333
- };
334
-
335
- // Shaders with f16 B_TYPE
336
- string_to_spv(shader_name + "_f32_f16", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F32", "1"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}, }), fp16, coopmat, coopmat2, f16acc);
337
- string_to_spv(shader_name + "_f32_f16_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F32", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
338
-
339
- string_to_spv(shader_name + "_f16_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F16", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
340
- string_to_spv(shader_name + "_f16", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("f16")}, {"DATA_A_F16", "1"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
341
-
342
- // bf16
343
- {
344
- std::string load_vec_a_unaligned = "1";
345
- // For aligned matmul loads
346
- std::string load_vec_a = coopmat2 ? "1" : "4";
347
-
348
- // scalar path promotes to float
349
- std::string to_float_type = (coopmat || coopmat2) ? "uintBitsToBFloat16EXT" : "bf16_to_fp32";
350
-
351
- // If bfloat16 is not supported, then only compile the scalar (promote to fp32) shader
352
- #if !defined(GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT)
353
- if (!(coopmat || coopmat2))
354
- #endif
355
- {
356
- string_to_spv(shader_name + "_bf16_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("bf16")}, {"TO_FLOAT_TYPE", to_float_type}, {"DATA_A_BF16", "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", "4"}, {"B_TYPE", coopmat2 ? "bfloat16_t" : "u16vec4"}, {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
357
- string_to_spv(shader_name + "_bf16", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE("bf16")}, {"TO_FLOAT_TYPE", to_float_type}, {"DATA_A_BF16", "1"}, {"LOAD_VEC_A", load_vec_a_unaligned}, {"B_TYPE", coopmat2 ? "bfloat16_t" : "uint16_t"}, {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}}), fp16, coopmat, coopmat2, f16acc);
358
- }
359
- }
360
-
361
- for (const auto& tname : type_names) {
362
- std::string load_vec_quant = "2";
363
- if ((tname == "q4_0") || (tname == "q4_1"))
364
- load_vec_quant = "8";
365
- else if ((tname == "q5_0") || (tname == "q5_1") || (tname == "q8_0") || (tname == "iq4_nl"))
366
- load_vec_quant = "4";
367
-
368
- if (tname == "bf16") {
369
- continue;
370
- }
371
-
372
- std::string data_a_key = "DATA_A_" + to_uppercase(tname);
373
- // For unaligned, load one at a time for f32/f16, or two at a time for quants
374
- std::string load_vec_a_unaligned = (coopmat2 || tname == "f32" || tname == "f16" || tname == "bf16") ? "1" : load_vec_quant;
375
- // For aligned matmul loads
376
- std::string load_vec_a = (coopmat2 || tname == "f32" || tname == "f16" || tname == "bf16") ? load_vec : load_vec_quant;
377
-
378
- // don't generate f32 variants for coopmat2
379
- if (!coopmat2) {
380
- string_to_spv(shader_name + "_" + tname + "_f32", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
381
- string_to_spv(shader_name + "_" + tname + "_f32_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
382
- }
383
-
384
- if (tname != "f16" && tname != "f32") {
385
- string_to_spv(shader_name + "_" + tname + "_f16", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}), fp16, coopmat, coopmat2, f16acc);
386
- string_to_spv(shader_name + "_" + tname + "_f16_aligned", source_name, merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
387
- }
388
-
389
- #if defined(GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT)
390
- if (!coopmat && !coopmat2 && !matmul_id && (tname == "q4_0" || tname == "q4_1" || tname == "q5_0" || tname == "q5_1" || tname == "q8_0")) {
391
- string_to_spv(shader_name + "_" + tname + "_q8_1", "mul_mmq.comp", merge_maps(base_dict, {{"FLOAT_TYPE", FLOAT_TYPE(tname)}, {data_a_key, "1"}, {"D_TYPE", "float"},}), fp16, coopmat, coopmat2, f16acc);
392
- }
393
- #endif
394
- }
395
- }
396
-
397
- void process_shaders() {
398
- std::cout << "ggml_vulkan: Generating and compiling shaders to SPIR-V" << std::endl;
399
- std::map<std::string, std::string> base_dict = {{"FLOAT_TYPE", "float"}};
400
-
401
- // matmul
402
- for (const auto& matmul_id : {false, true}) {
403
- // No coopmats
404
- // fp32
405
- matmul_shaders(false, matmul_id, false, false, false);
406
-
407
- // fp16, fp32acc and fp16acc
408
- matmul_shaders(true, matmul_id, false, false, false);
409
- matmul_shaders(true, matmul_id, false, false, true);
410
-
411
- #if defined(GGML_VULKAN_COOPMAT_GLSLC_SUPPORT)
412
- // Coopmat, fp32acc and fp16acc
413
- matmul_shaders(true, matmul_id, true, false, false);
414
- matmul_shaders(true, matmul_id, true, false, true);
415
- #endif
416
-
417
- #if defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
418
- // Coopmat2, fp32acc and fp16acc
419
- matmul_shaders(true, matmul_id, false, true, false);
420
- matmul_shaders(true, matmul_id, false, true, true);
421
- #endif
422
- }
423
-
424
- // flash attention
425
- for (const auto& f16acc : {false, true}) {
426
- std::string acctype = f16acc ? "float16_t" : "float";
427
- std::string acctypev4 = f16acc ? "f16vec4" : "vec4";
428
-
429
- for (const auto& tname : type_names) {
430
- if (tname == "f32") {
431
- continue;
432
- }
433
- if (tname == "bf16") continue;
434
-
435
- #if defined(GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT)
436
- if (tname == "f16") {
437
- string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn_cm2.comp",
438
- merge_maps(base_dict, {{"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}}), true, false, true, f16acc);
439
- } else {
440
- std::string data_a_key = "DATA_A_" + to_uppercase(tname);
441
- string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn_cm2.comp",
442
- merge_maps(base_dict, {{data_a_key, "1"}, {"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}, {"DEQUANTFUNC", "dequantFunc"+to_uppercase(tname) }, {"BLOCK_SIZE", "QUANT_K_"+to_uppercase(tname) }}), true, false, true, f16acc);
443
- }
444
- #endif
445
- #if defined(GGML_VULKAN_COOPMAT_GLSLC_SUPPORT)
446
- if (tname == "f16") {
447
- string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn_cm1.comp",
448
- merge_maps(base_dict, {{"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}, {"ACC_TYPEV4", acctypev4}, {"COOPMAT", "1"}}), true, true, false, f16acc);
449
- } else if (tname == "q4_0" || tname == "q8_0") {
450
- std::string data_a_key = "DATA_A_" + to_uppercase(tname);
451
- string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn_cm1.comp",
452
- merge_maps(base_dict, {{data_a_key, "1"}, {"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}, {"ACC_TYPEV4", acctypev4}, {"BLOCK_SIZE", "QUANT_K_"+to_uppercase(tname)}, {"COOPMAT", "1"}}), true, true, false, f16acc);
453
- }
454
- #endif
455
- if (tname == "f16") {
456
- string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn.comp",
457
- merge_maps(base_dict, {{"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}}), true, false, false, f16acc);
458
- } else if (tname == "q4_0" || tname == "q8_0") {
459
- std::string data_a_key = "DATA_A_" + to_uppercase(tname);
460
- string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn.comp",
461
- merge_maps(base_dict, {{data_a_key, "1"}, {"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}, {"BLOCK_SIZE", "QUANT_K_"+to_uppercase(tname) }}), true, false, false, f16acc);
462
- }
463
- }
464
- }
465
-
466
- for (const auto& tname : type_names) {
467
- // mul mat vec
468
- std::string data_a_key = "DATA_A_" + to_uppercase(tname);
469
- std::string shader = (string_ends_with(tname, "_k") || string_starts_with(tname, "iq1_") || string_starts_with(tname, "iq2_") || string_starts_with(tname, "iq3_")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";
470
-
471
- string_to_spv("mul_mat_vec_" + tname + "_f32_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}}));
472
- string_to_spv("mul_mat_vec_" + tname + "_f16_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float16_t"}, {"B_TYPE_VEC2", "f16vec2"}, {"B_TYPE_VEC4", "f16vec4"}, {"D_TYPE", "float"}}));
473
-
474
- string_to_spv("mul_mat_vec_id_" + tname + "_f32", shader, merge_maps(base_dict, {{"MUL_MAT_ID", "1"}, {data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}}));
475
-
476
- // Dequant shaders
477
- if (tname != "f16" && tname != "bf16") {
478
- string_to_spv("dequant_" + tname, "dequant_" + tname + ".comp", merge_maps(base_dict, {{data_a_key, "1"}, {"D_TYPE", "float16_t"}}));
479
- }
480
-
481
- if (!string_ends_with(tname, "_k")) {
482
- shader = (tname == "f32" || tname == "f16" || tname == "bf16") ? "get_rows.comp" : "get_rows_quant.comp";
483
-
484
- if (tname == "f16") {
485
- string_to_spv("get_rows_" + tname, shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}}));
486
- } else {
487
- string_to_spv("get_rows_" + tname, shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}}));
488
- }
489
- string_to_spv("get_rows_" + tname + "_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float"}}));
490
- }
491
- }
492
-
493
- string_to_spv("mul_mat_vec_p021_f16_f32_subgroup_add", "mul_mat_vec_p021.comp", {{"A_TYPE", "float16_t"}, {"A_TYPE_VEC4", "f16vec4"}, {"B_TYPE", "float"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}, {"USE_SUBGROUP_ADD", "1"}});
494
- string_to_spv("mul_mat_vec_p021_f16_f32", "mul_mat_vec_p021.comp", {{"A_TYPE", "float16_t"}, {"A_TYPE_VEC4", "f16vec4"}, {"B_TYPE", "float"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}});
495
- string_to_spv("mul_mat_vec_nc_f16_f32", "mul_mat_vec_nc.comp", {{"A_TYPE", "float16_t"}, {"A_TYPE_VEC4", "f16vec4"}, {"B_TYPE", "float"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}});
496
-
497
- // Norms
498
- string_to_spv("norm_f32", "norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
499
- string_to_spv("group_norm_f32", "group_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
500
- string_to_spv("rms_norm_f32", "rms_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
501
- string_to_spv("rms_norm_back_f32", "rms_norm_back.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
502
- string_to_spv("l2_norm_f32", "l2_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
503
-
504
- string_to_spv("cpy_f32_f32", "copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
505
- string_to_spv("cpy_f32_f16", "copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}});
506
- string_to_spv("cpy_f16_f16", "copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
507
- string_to_spv("cpy_f16_f32", "copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
508
- string_to_spv("cpy_f32_bf16","copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "uint16_t"}, {"DATA_D_BF16", "1"}});
509
- string_to_spv("contig_cpy_f32_f32", "contig_copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
510
- string_to_spv("contig_cpy_f32_f16", "contig_copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}});
511
- string_to_spv("contig_cpy_f16_f16", "contig_copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
512
- string_to_spv("contig_cpy_f16_f32", "contig_copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
513
- string_to_spv("contig_cpy_f32_bf16","contig_copy.comp",{{"A_TYPE", "float"}, {"D_TYPE", "uint16_t"}, {"DATA_D_BF16", "1"}});
514
-
515
- for (std::string t : {"q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) {
516
- string_to_spv("cpy_f32_" + t, "copy_to_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
517
- string_to_spv("cpy_f32_" + t + "_rte", "copy_to_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}, {"RTE16", "1"}});
518
- string_to_spv("cpy_" + t + "_f32", "copy_from_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
519
- }
520
-
521
- auto get_type_str = [](bool f16) {
522
- return f16 ? "float16_t" : "float";
523
- };
524
- auto get_suffix = [](bool src0_f16, bool src1_f16, bool dst_f16) {
525
- std::string s;
526
- s += std::string(src0_f16 ? "_f16" : "_f32");
527
- s += std::string(src1_f16 ? "_f16" : "_f32");
528
- s += std::string(dst_f16 ? "_f16" : "_f32");
529
- return s;
530
- };
531
- for (std::string op : {"add", "sub", "mul", "div"}) {
532
- for (auto src0_f16 : {false, true}) {
533
- for (auto src1_f16 : {false, true}) {
534
- for (auto dst_f16 : {false, true}) {
535
- auto name = op + get_suffix(src0_f16, src1_f16, dst_f16);
536
- string_to_spv(name.c_str(), op + ".comp", {{"A_TYPE", get_type_str(src0_f16)}, {"B_TYPE", get_type_str(src1_f16)}, {"D_TYPE", get_type_str(dst_f16)}, {"FLOAT_TYPE", "float"}});
537
- }
538
- }
539
- }
540
- }
541
-
542
- string_to_spv("sub_f32", "sub.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
543
-
544
- string_to_spv("acc_f32", "acc.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
545
-
546
- string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {});
547
- string_to_spv("fa_split_k_reduce", "flash_attn_split_k_reduce.comp", {});
548
- string_to_spv("quantize_q8_1", "quantize_q8_1.comp", {});
549
-
550
- string_to_spv("mul_f32", "mul.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
551
-
552
- string_to_spv("div_f32", "div.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
553
-
554
- string_to_spv("repeat_f32", "repeat.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
555
- string_to_spv("repeat_back_f32", "repeat_back.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
556
-
557
- string_to_spv("scale_f32", "scale.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
558
-
559
- string_to_spv("sqr_f32", "square.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
560
-
561
- string_to_spv("sin_f32", "sin.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
562
-
563
- string_to_spv("cos_f32", "cos.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
564
-
565
- string_to_spv("clamp_f32", "clamp.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
566
-
567
- string_to_spv("pad_f32", "pad.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
568
-
569
- string_to_spv("concat_f32", "concat.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
570
- string_to_spv("concat_f16", "concat.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
571
- string_to_spv("concat_i32", "concat.comp", {{"A_TYPE", "int"}, {"B_TYPE", "int"}, {"D_TYPE", "int"}});
572
-
573
- string_to_spv("upscale_f32", "upscale.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
574
-
575
- string_to_spv("gelu_f16", "gelu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
576
- string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
577
- string_to_spv("gelu_quick_f16", "gelu_quick.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
578
- string_to_spv("gelu_quick_f32", "gelu_quick.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
579
- string_to_spv("silu_f16", "silu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
580
- string_to_spv("silu_f32", "silu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
581
- string_to_spv("relu_f16", "relu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
582
- string_to_spv("relu_f32", "relu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
583
- string_to_spv("tanh_f16", "tanh.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
584
- string_to_spv("tanh_f32", "tanh.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
585
- string_to_spv("sigmoid_f16", "sigmoid.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
586
- string_to_spv("sigmoid_f32", "sigmoid.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
587
-
588
- string_to_spv("leaky_relu_f32", "leaky_relu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
589
- string_to_spv("silu_back_f32", "silu_back.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
590
-
591
- string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
592
-
593
- string_to_spv("soft_max_f32", "soft_max.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
594
- string_to_spv("soft_max_f32_f16", "soft_max.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}));
595
- string_to_spv("soft_max_back_f32", "soft_max_back.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
596
-
597
- string_to_spv("rope_norm_f32", "rope_norm.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
598
- string_to_spv("rope_norm_f16", "rope_norm.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
599
- string_to_spv("rope_norm_f16_rte", "rope_norm.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
600
-
601
- string_to_spv("rope_neox_f32", "rope_neox.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
602
- string_to_spv("rope_neox_f16", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
603
- string_to_spv("rope_neox_f16_rte", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
604
-
605
- string_to_spv("rope_multi_f32", "rope_multi.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
606
- string_to_spv("rope_multi_f16", "rope_multi.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
607
- string_to_spv("rope_multi_f16_rte", "rope_multi.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
608
-
609
- string_to_spv("rope_vision_f32", "rope_vision.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
610
- string_to_spv("rope_vision_f16", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
611
- string_to_spv("rope_vision_f16_rte", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
612
-
613
- string_to_spv("argsort_f32", "argsort.comp", {{"A_TYPE", "float"}});
614
-
615
- string_to_spv("argmax_f32", "argmax.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "int"}}));
616
- string_to_spv("sum_rows_f32", "sum_rows.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
617
- string_to_spv("count_equal_i32", "count_equal.comp", merge_maps(base_dict, {{"A_TYPE", "int"}, {"B_TYPE", "int"}, {"D_TYPE", "int"}}));
618
-
619
- string_to_spv("im2col_f32", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
620
- string_to_spv("im2col_f32_f16", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}}));
621
- string_to_spv("im2col_f32_f16_rte", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}}));
622
-
623
- string_to_spv("timestep_embedding_f32", "timestep_embedding.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
624
-
625
- string_to_spv("pool2d_f32", "pool2d.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
626
-
627
- string_to_spv("rwkv_wkv6_f32", "wkv6.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
628
-
629
- string_to_spv("rwkv_wkv7_f32", "wkv7.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
630
-
631
- string_to_spv("opt_step_adamw_f32", "opt_step_adamw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
632
-
633
- string_to_spv("conv2d_dw_whcn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"WHCN", "1"}}));
634
- string_to_spv("conv2d_dw_cwhn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"CWHN", "1"}}));
635
-
636
- for (auto &c : compiles) {
637
- c.wait();
638
- }
639
- }
640
-
641
- void write_output_files() {
642
- FILE* hdr = fopen(target_hpp.c_str(), "w");
643
- FILE* src = fopen(target_cpp.c_str(), "w");
644
-
645
- fprintf(hdr, "#include <cstdint>\n\n");
646
- fprintf(src, "#include \"%s\"\n\n", basename(target_hpp).c_str());
647
-
648
- std::sort(shader_fnames.begin(), shader_fnames.end());
649
- for (const auto& pair : shader_fnames) {
650
- const std::string& name = pair.first;
651
- #ifdef _WIN32
652
- std::string path = pair.second;
653
- std::replace(path.begin(), path.end(), '/', '\\' );
654
- #else
655
- const std::string& path = pair.second;
656
- #endif
657
-
658
- FILE* spv = fopen(path.c_str(), "rb");
659
- if (!spv) {
660
- std::cerr << "Error opening SPIR-V file: " << path << " (" << strerror(errno) << ")\n";
661
- continue;
662
- }
663
-
664
- fseek(spv, 0, SEEK_END);
665
- size_t size = ftell(spv);
666
- fseek(spv, 0, SEEK_SET);
667
-
668
- std::vector<unsigned char> data(size);
669
- size_t read_size = fread(data.data(), 1, size, spv);
670
- fclose(spv);
671
- if (read_size != size) {
672
- std::cerr << "Error reading SPIR-V file: " << path << " (" << strerror(errno) << ")\n";
673
- continue;
674
- }
675
-
676
- fprintf(hdr, "extern unsigned char %s_data[%zu];\n", name.c_str(), size);
677
- fprintf(hdr, "const uint64_t %s_len = %zu;\n\n", name.c_str(), size);
678
-
679
- fprintf(src, "unsigned char %s_data[%zu] = {\n", name.c_str(), size);
680
- for (size_t i = 0; i < size; ++i) {
681
- fprintf(src, "0x%02x,", data[i]);
682
- if ((i + 1) % 12 == 0) fprintf(src, "\n");
683
- }
684
- fprintf(src, "\n};\n\n");
685
-
686
- if (!no_clean) {
687
- std::remove(path.c_str());
688
- }
689
- }
690
- for (const char *op : {"add", "sub", "mul", "div"}) {
691
- fprintf(hdr, "extern unsigned char *%s_data[2][2][2];\n", op);
692
- fprintf(hdr, "extern uint64_t %s_len[2][2][2];\n", op);
693
- fprintf(src, "unsigned char *%s_data[2][2][2] = {{{%s_f32_f32_f32_data, %s_f32_f32_f16_data}, {%s_f32_f16_f32_data, %s_f32_f16_f16_data}}, {{%s_f16_f32_f32_data, %s_f16_f32_f16_data}, {%s_f16_f16_f32_data, %s_f16_f16_f16_data}}};\n", op, op, op, op, op, op, op, op, op);
694
- fprintf(src, "uint64_t %s_len[2][2][2] = {{{%s_f32_f32_f32_len, %s_f32_f32_f16_len}, {%s_f32_f16_f32_len, %s_f32_f16_f16_len}}, {{%s_f16_f32_f32_len, %s_f16_f32_f16_len}, {%s_f16_f16_f32_len, %s_f16_f16_f16_len}}};\n", op, op, op, op, op, op, op, op, op);
695
- }
696
- fclose(hdr);
697
- fclose(src);
698
- }
699
- }
700
-
701
- int main(int argc, char** argv) {
702
- std::map<std::string, std::string> args;
703
- for (int i = 1; i < argc; ++i) {
704
- std::string arg = argv[i];
705
- if (arg.rfind("--", 0) == 0) {
706
- if (i + 1 < argc && argv[i + 1][0] != '-') {
707
- args[arg] = argv[i + 1];
708
- ++i;
709
- } else {
710
- args[arg] = "";
711
- }
712
- }
713
- }
714
-
715
- if (args.find("--glslc") != args.end()) {
716
- GLSLC = args["--glslc"]; // Path to glslc
717
- }
718
- if (args.find("--input-dir") != args.end()) {
719
- input_dir = args["--input-dir"]; // Directory containing shader sources
720
- }
721
- if (args.find("--output-dir") != args.end()) {
722
- output_dir = args["--output-dir"]; // Directory for containing SPIR-V output
723
- }
724
- if (args.find("--target-hpp") != args.end()) {
725
- target_hpp = args["--target-hpp"]; // Path to generated header file
726
- }
727
- if (args.find("--target-cpp") != args.end()) {
728
- target_cpp = args["--target-cpp"]; // Path to generated cpp file
729
- }
730
- if (args.find("--no-clean") != args.end()) {
731
- no_clean = true; // Keep temporary SPIR-V files in output-dir after build
732
- }
733
-
734
- if (!directory_exists(input_dir)) {
735
- std::cerr << "\"" << input_dir << "\" must be a valid directory containing shader sources" << std::endl;
736
- return EXIT_FAILURE;
737
- }
738
-
739
- if (!directory_exists(output_dir)) {
740
- if (!create_directory(output_dir)) {
741
- std::cerr << "Error creating output directory: " << output_dir << "\n";
742
- return EXIT_FAILURE;
743
- }
744
- }
745
-
746
- process_shaders();
747
-
748
- write_output_files();
749
-
750
- return EXIT_SUCCESS;
751
- }