@fugood/llama.node 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/CMakeLists.txt +6 -3
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +8 -1
  17. package/package.json +3 -3
  18. package/patches/llama.patch +12 -12
  19. package/src/DetokenizeWorker.cpp +1 -1
  20. package/src/LlamaContext.cpp +33 -1
  21. package/src/LlamaContext.h +1 -0
  22. package/src/llama.cpp/.github/workflows/bench.yml +310 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +1315 -0
  24. package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
  25. package/src/llama.cpp/.github/workflows/docker.yml +116 -0
  26. package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
  27. package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  28. package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
  29. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
  30. package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
  31. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
  32. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
  36. package/src/llama.cpp/.github/workflows/server.yml +183 -0
  37. package/src/llama.cpp/CMakeLists.txt +91 -1245
  38. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
  39. package/src/llama.cpp/cmake/build-info.cmake +58 -0
  40. package/src/llama.cpp/cmake/git-vars.cmake +22 -0
  41. package/src/llama.cpp/common/CMakeLists.txt +4 -3
  42. package/src/llama.cpp/common/build-info.cpp.in +4 -0
  43. package/src/llama.cpp/common/common.cpp +1116 -877
  44. package/src/llama.cpp/common/common.h +191 -77
  45. package/src/llama.cpp/common/grammar-parser.cpp +118 -31
  46. package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
  47. package/src/llama.cpp/common/log.h +1 -1
  48. package/src/llama.cpp/common/ngram-cache.h +10 -3
  49. package/src/llama.cpp/common/sampling.cpp +19 -10
  50. package/src/llama.cpp/docs/build.md +353 -0
  51. package/src/llama.cpp/examples/CMakeLists.txt +22 -22
  52. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
  54. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched/batched.cpp +52 -55
  56. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
  58. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/chat-13B.bat +57 -0
  60. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  61. package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
  62. package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
  63. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
  64. package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
  65. package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
  66. package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
  67. package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
  68. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
  69. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
  71. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
  72. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
  73. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
  75. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
  76. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
  77. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
  79. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
  80. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
  81. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
  82. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
  83. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
  84. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
  85. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
  86. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
  87. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
  88. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
  90. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  91. package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
  92. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
  94. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  95. package/src/llama.cpp/examples/infill/infill.cpp +38 -153
  96. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
  97. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
  98. package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
  99. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
  100. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
  101. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
  102. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
  103. package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
  104. package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
  105. package/src/llama.cpp/examples/llava/clip.cpp +23 -14
  106. package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
  107. package/src/llama.cpp/examples/llava/requirements.txt +3 -2
  108. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
  110. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  111. package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
  112. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  113. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
  114. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  115. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  116. package/src/llama.cpp/examples/main/main.cpp +98 -75
  117. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
  118. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  119. package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
  120. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  121. package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
  122. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  123. package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
  124. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  125. package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
  126. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  127. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  128. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  129. package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
  130. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  131. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
  132. package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
  133. package/src/llama.cpp/examples/server/server.cpp +274 -671
  134. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  135. package/src/llama.cpp/examples/server/utils.hpp +28 -29
  136. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  137. package/src/llama.cpp/examples/simple/simple.cpp +21 -29
  138. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  139. package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
  140. package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
  141. package/src/llama.cpp/examples/sycl/build.sh +23 -0
  142. package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
  143. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
  144. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
  145. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  146. package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
  147. package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
  148. package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
  149. package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
  150. package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
  151. package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
  152. package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
  153. package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
  154. package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
  155. package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
  156. package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
  157. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
  158. package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
  159. package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
  160. package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
  161. package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
  162. package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
  163. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
  164. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
  165. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
  169. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
  170. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
  171. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
  172. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
  173. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
  174. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  175. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
  176. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
  177. package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
  178. package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
  179. package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
  180. package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
  181. package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
  182. package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
  183. package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
  184. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
  187. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
  188. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
  192. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
  195. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  197. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
  198. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  199. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
  200. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
  201. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
  202. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
  203. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
  204. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
  205. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
  208. package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
  209. package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
  210. package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
  211. package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
  212. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
  213. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
  214. package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
  215. package/src/llama.cpp/models/.editorconfig +1 -0
  216. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  217. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  220. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  221. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  223. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  226. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  227. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  228. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  229. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  230. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  231. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  232. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  233. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  235. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  236. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  241. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  242. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  249. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  252. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  255. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  256. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  257. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  258. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  259. package/src/llama.cpp/requirements/requirements-all.txt +12 -0
  260. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  261. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  262. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  263. package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
  264. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  265. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  266. package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  267. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  268. package/src/llama.cpp/requirements.txt +5 -4
  269. package/src/llama.cpp/scripts/build-info.sh +30 -0
  270. package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
  271. package/src/llama.cpp/src/CMakeLists.txt +33 -0
  272. package/src/llama.cpp/src/llama-grammar.cpp +539 -0
  273. package/src/llama.cpp/src/llama-grammar.h +39 -0
  274. package/src/llama.cpp/src/llama-impl.h +26 -0
  275. package/src/llama.cpp/src/llama-sampling.cpp +635 -0
  276. package/src/llama.cpp/src/llama-sampling.h +56 -0
  277. package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
  278. package/src/llama.cpp/src/llama-vocab.h +130 -0
  279. package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
  280. package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
  281. package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
  282. package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
  283. package/src/llama.cpp/tests/CMakeLists.txt +19 -20
  284. package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
  285. package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
  286. package/src/llama.cpp/tests/test-double-float.cpp +2 -2
  287. package/src/llama.cpp/tests/test-grad0.cpp +2 -2
  288. package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
  289. package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
  290. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
  291. package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
  292. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
  293. package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
  294. package/src/llama.cpp/tests/test-rope.cpp +3 -4
  295. package/src/llama.cpp/tests/test-sampling.cpp +5 -5
  296. package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
  297. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
  298. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
  299. package/bin/darwin/arm64/default.metallib +0 -0
  300. package/bin/darwin/x64/default.metallib +0 -0
  301. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
  302. package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
  303. package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
  304. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
  305. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
  306. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
  307. package/src/llama.cpp/ggml-opencl.cpp +0 -2305
  308. package/src/llama.cpp/ggml-opencl.h +0 -36
  309. package/src/llama.cpp/ggml-sycl.cpp +0 -17340
  310. package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
  311. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
  312. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
  313. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
  314. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
  315. /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
  316. /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
  317. /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
  318. /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
  319. /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
@@ -0,0 +1,552 @@
1
+
2
+
3
+ #include <iostream>
4
+ #include <fstream>
5
+ #include <sstream>
6
+ #include <string>
7
+ #include <stdexcept>
8
+ #include <array>
9
+ #include <vector>
10
+ #include <map>
11
+ #include <thread>
12
+ #include <mutex>
13
+ #include <future>
14
+ #include <queue>
15
+ #include <condition_variable>
16
+ #include <cstdio>
17
+ #include <cstring>
18
+ #include <cstdlib>
19
+ #include <sys/stat.h>
20
+ #include <sys/types.h>
21
+
22
+ #ifdef _WIN32
23
+ #include <windows.h>
24
+ #include <direct.h> // For _mkdir on Windows
25
+ #else
26
+ #include <unistd.h>
27
+ #include <sys/wait.h>
28
+ #include <fcntl.h>
29
+ #endif
30
+
31
+ #define ASYNCIO_CONCURRENCY 64
32
+
33
+ // define prototypes
34
+ void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str);
35
+ bool directory_exists(const std::string& path);
36
+ bool create_directory(const std::string& path);
37
+ std::string to_uppercase(const std::string& input);
38
+ bool string_ends_with(const std::string& str, const std::string& suffix);
39
+ std::string join_paths(const std::string& path1, const std::string& path2);
40
+ std::string basename(const std::string &path);
41
+ void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16);
42
+ std::map<std::string, std::string> merge_maps(const std::map<std::string, std::string>& a, const std::map<std::string, std::string>& b);
43
+ void matmul_shaders(std::vector<std::future<void>>& tasks, bool fp16, bool matmul_id);
44
+ void process_shaders(std::vector<std::future<void>>& tasks);
45
+ void write_output_files();
46
+
47
+ std::mutex lock;
48
+ std::vector<std::pair<std::string, std::string>> shader_fnames;
49
+
50
+ std::string GLSLC = "glslc";
51
+ std::string input_dir = "vulkan-shaders";
52
+ std::string output_dir = "/tmp";
53
+ std::string target_hpp = "ggml-vulkan-shaders.hpp";
54
+ std::string target_cpp = "ggml-vulkan-shaders.cpp";
55
+ bool clean = true;
56
+
57
+ const std::vector<std::string> type_names = {
58
+ "f32",
59
+ "f16",
60
+ "q4_0",
61
+ "q4_1",
62
+ "q5_0",
63
+ "q5_1",
64
+ "q8_0",
65
+ "q2_k",
66
+ "q3_k",
67
+ "q4_k",
68
+ "q5_k",
69
+ "q6_k",
70
+ "iq4_nl"
71
+ };
72
+
73
+ void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str) {
74
+ #ifdef _WIN32
75
+ HANDLE stdout_read, stdout_write;
76
+ HANDLE stderr_read, stderr_write;
77
+ SECURITY_ATTRIBUTES sa = { sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
78
+
79
+ if (!CreatePipe(&stdout_read, &stdout_write, &sa, 0) ||
80
+ !SetHandleInformation(stdout_read, HANDLE_FLAG_INHERIT, 0)) {
81
+ throw std::runtime_error("Failed to create stdout pipe");
82
+ }
83
+
84
+ if (!CreatePipe(&stderr_read, &stderr_write, &sa, 0) ||
85
+ !SetHandleInformation(stderr_read, HANDLE_FLAG_INHERIT, 0)) {
86
+ throw std::runtime_error("Failed to create stderr pipe");
87
+ }
88
+
89
+ PROCESS_INFORMATION pi;
90
+ STARTUPINFOA si = { sizeof(STARTUPINFOA) };
91
+ si.dwFlags = STARTF_USESTDHANDLES;
92
+ si.hStdOutput = stdout_write;
93
+ si.hStdError = stderr_write;
94
+
95
+ std::vector<char> cmd(command.begin(), command.end());
96
+ cmd.push_back('\0');
97
+
98
+ if (!CreateProcessA(NULL, cmd.data(), NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi)) {
99
+ throw std::runtime_error("Failed to create process");
100
+ }
101
+
102
+ CloseHandle(stdout_write);
103
+ CloseHandle(stderr_write);
104
+
105
+ std::array<char, 128> buffer;
106
+ DWORD bytes_read;
107
+
108
+ while (ReadFile(stdout_read, buffer.data(), buffer.size(), &bytes_read, NULL) && bytes_read > 0) {
109
+ stdout_str.append(buffer.data(), bytes_read);
110
+ }
111
+
112
+ while (ReadFile(stderr_read, buffer.data(), buffer.size(), &bytes_read, NULL) && bytes_read > 0) {
113
+ stderr_str.append(buffer.data(), bytes_read);
114
+ }
115
+
116
+ CloseHandle(stdout_read);
117
+ CloseHandle(stderr_read);
118
+ WaitForSingleObject(pi.hProcess, INFINITE);
119
+ CloseHandle(pi.hProcess);
120
+ CloseHandle(pi.hThread);
121
+ #else
122
+ int stdout_pipe[2];
123
+ int stderr_pipe[2];
124
+
125
+ if (pipe(stdout_pipe) != 0 || pipe(stderr_pipe) != 0) {
126
+ throw std::runtime_error("Failed to create pipes");
127
+ }
128
+
129
+ pid_t pid = fork();
130
+ if (pid < 0) {
131
+ throw std::runtime_error("Failed to fork process");
132
+ }
133
+
134
+ if (pid == 0) {
135
+ close(stdout_pipe[0]);
136
+ close(stderr_pipe[0]);
137
+ dup2(stdout_pipe[1], STDOUT_FILENO);
138
+ dup2(stderr_pipe[1], STDERR_FILENO);
139
+ close(stdout_pipe[1]);
140
+ close(stderr_pipe[1]);
141
+ execl("/bin/sh", "sh", "-c", command.c_str(), (char*) nullptr);
142
+ _exit(EXIT_FAILURE);
143
+ } else {
144
+ close(stdout_pipe[1]);
145
+ close(stderr_pipe[1]);
146
+
147
+ std::array<char, 128> buffer;
148
+ ssize_t bytes_read;
149
+
150
+ while ((bytes_read = read(stdout_pipe[0], buffer.data(), buffer.size())) > 0) {
151
+ stdout_str.append(buffer.data(), bytes_read);
152
+ }
153
+
154
+ while ((bytes_read = read(stderr_pipe[0], buffer.data(), buffer.size())) > 0) {
155
+ stderr_str.append(buffer.data(), bytes_read);
156
+ }
157
+
158
+ close(stdout_pipe[0]);
159
+ close(stderr_pipe[0]);
160
+ waitpid(pid, nullptr, 0);
161
+ }
162
+ #endif
163
+ }
164
+
165
+ bool directory_exists(const std::string& path) {
166
+ struct stat info;
167
+ if (stat(path.c_str(), &info) != 0) {
168
+ return false; // Path doesn't exist or can't be accessed
169
+ }
170
+ return (info.st_mode & S_IFDIR) != 0; // Check if it is a directory
171
+ }
172
+
173
+ bool create_directory(const std::string& path) {
174
+ #ifdef _WIN32
175
+ return _mkdir(path.c_str()) == 0 || errno == EEXIST; // EEXIST means the directory already exists
176
+ #else
177
+ return mkdir(path.c_str(), 0755) == 0 || errno == EEXIST; // 0755 is the directory permissions
178
+ #endif
179
+ }
180
+
181
+ std::string to_uppercase(const std::string& input) {
182
+ std::string result = input;
183
+ for (char& c : result) {
184
+ c = std::toupper(c);
185
+ }
186
+ return result;
187
+ }
188
+
189
+ bool string_ends_with(const std::string& str, const std::string& suffix) {
190
+ if (suffix.size() > str.size()) {
191
+ return false;
192
+ }
193
+ return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin());
194
+ }
195
+
196
+ #ifdef _WIN32
197
+ static const char path_separator = '\\';
198
+ #else
199
+ static const char path_separator = '/';
200
+ #endif
201
+
202
+ std::string join_paths(const std::string& path1, const std::string& path2) {
203
+ return path1 + path_separator + path2;
204
+ }
205
+
206
+ std::string basename(const std::string &path) {
207
+ return path.substr(path.find_last_of("/\\") + 1);
208
+ }
209
+
210
+ void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true) {
211
+ std::string name = _name + (fp16 ? "" : "_fp32");
212
+ std::string out_fname = join_paths(output_dir, name + ".spv");
213
+ std::string in_path = join_paths(input_dir, in_fname);
214
+
215
+ std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", "--target-env=vulkan1.2", "-O", in_path, "-o", out_fname};
216
+ for (const auto& define : defines) {
217
+ cmd.push_back("-D" + define.first + "=" + define.second);
218
+ }
219
+
220
+ std::string command;
221
+ for (const auto& part : cmd) {
222
+ command += part + " ";
223
+ }
224
+
225
+ std::string stdout_str, stderr_str;
226
+ try {
227
+ // std::cout << "Executing command: ";
228
+ // for (const auto& part : cmd) {
229
+ // std::cout << part << " ";
230
+ // }
231
+ // std::cout << std::endl;
232
+
233
+ execute_command(command, stdout_str, stderr_str);
234
+ if (!stderr_str.empty()) {
235
+ std::cerr << "cannot compile " << name << "\n\n" << command << "\n\n" << stderr_str << std::endl;
236
+ return;
237
+ }
238
+
239
+ std::lock_guard<std::mutex> guard(lock);
240
+ shader_fnames.push_back(std::make_pair(name, out_fname));
241
+ } catch (const std::exception& e) {
242
+ std::cerr << "Error executing command for " << name << ": " << e.what() << std::endl;
243
+ }
244
+ }
245
+
246
+ std::map<std::string, std::string> merge_maps(const std::map<std::string, std::string>& a, const std::map<std::string, std::string>& b) {
247
+ std::map<std::string, std::string> result = a;
248
+ result.insert(b.begin(), b.end());
249
+ return result;
250
+ }
251
+
252
+ void matmul_shaders(std::vector<std::future<void>>& tasks, bool fp16, bool matmul_id) {
253
+ std::string load_vec = fp16 ? "8" : "4";
254
+ std::string aligned_b_type_f32 = fp16 ? "mat2x4" : "vec4";
255
+ std::string aligned_b_type_f16 = fp16 ? "f16mat2x4" : "f16vec4";
256
+
257
+ std::map<std::string, std::string> base_dict = {{"FLOAT_TYPE", fp16 ? "float16_t" : "float"}};
258
+ std::string shader_name = "matmul";
259
+
260
+ if (matmul_id) {
261
+ base_dict["MUL_MAT_ID"] = "1";
262
+ shader_name = "matmul_id";
263
+ }
264
+
265
+ if (fp16) {
266
+ base_dict["FLOAT16"] = "1";
267
+ }
268
+
269
+ // Shaders with f16 B_TYPE
270
+ tasks.push_back(std::async(std::launch::async, [=] {
271
+ string_to_spv(shader_name + "_f32_f16", "mul_mm.comp", merge_maps(base_dict, {{"DATA_A_F32", "1"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}), fp16);
272
+ }));
273
+ tasks.push_back(std::async(std::launch::async, [=] {
274
+ string_to_spv(shader_name + "_f32_f16_aligned", "mul_mm.comp", merge_maps(base_dict, {{"DATA_A_F32", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}}), fp16);
275
+ }));
276
+
277
+ tasks.push_back(std::async(std::launch::async, [=] {
278
+ string_to_spv(shader_name + "_f16", "mul_mm.comp", merge_maps(base_dict, {{"DATA_A_F16", "1"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}), fp16);
279
+ }));
280
+ tasks.push_back(std::async(std::launch::async, [=] {
281
+ string_to_spv(shader_name + "_f16_aligned", "mul_mm.comp", merge_maps(base_dict, {{"DATA_A_F16", "1"}, {"LOAD_VEC_A", load_vec}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f16}, {"D_TYPE", "float"}}), fp16);
282
+ }));
283
+
284
+ for (const auto& tname : type_names) {
285
+ std::string data_a_key = "DATA_A_" + to_uppercase(tname);
286
+ std::string load_vec_a = (tname == "f32" || tname == "f16") ? load_vec : "2";
287
+ tasks.push_back(std::async(std::launch::async, [=] {
288
+ string_to_spv(shader_name + "_" + tname + "_f32", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}), fp16);
289
+ }));
290
+ tasks.push_back(std::async(std::launch::async, [=] {
291
+ string_to_spv(shader_name + "_" + tname + "_f32_aligned", "mul_mm.comp", merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}}), fp16);
292
+ }));
293
+ }
294
+ }
295
+
296
+ void process_shaders(std::vector<std::future<void>>& tasks) {
297
+ std::cout << "ggml_vulkan: Generating and compiling shaders to SPIR-V" << std::endl;
298
+ std::map<std::string, std::string> base_dict = {{"FLOAT_TYPE", "float"}};
299
+
300
+ for (const auto& fp16 : {false, true}) {
301
+ matmul_shaders(tasks, fp16, false);
302
+ matmul_shaders(tasks, fp16, true);
303
+ }
304
+
305
+ for (const auto& tname : type_names) {
306
+ // mul mat vec
307
+ std::string data_a_key = "DATA_A_" + to_uppercase(tname);
308
+ std::string shader = (string_ends_with(tname, "_k")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";
309
+
310
+ tasks.push_back(std::async(std::launch::async, [=] {
311
+ string_to_spv("mul_mat_vec_" + tname + "_f32_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
312
+ }));
313
+ tasks.push_back(std::async(std::launch::async, [=] {
314
+ string_to_spv("mul_mat_vec_" + tname + "_f16_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}));
315
+ }));
316
+
317
+ tasks.push_back(std::async(std::launch::async, [=] {
318
+ string_to_spv("mul_mat_vec_id_" + tname + "_f32", shader, merge_maps(base_dict, {{"MUL_MAT_ID", "1"}, {data_a_key, "1"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
319
+ }));
320
+
321
+ // Dequant shaders
322
+ if (tname != "f16") {
323
+ tasks.push_back(std::async(std::launch::async, [=] {
324
+ string_to_spv("dequant_" + tname, "dequant_" + tname + ".comp", merge_maps(base_dict, {{data_a_key, "1"}, {"D_TYPE", "float16_t"}}));
325
+ }));
326
+ }
327
+
328
+ if (!string_ends_with(tname, "_k")) {
329
+ shader = (tname == "f32" || tname == "f16") ? "get_rows.comp" : "get_rows_quant.comp";
330
+
331
+ if (tname == "f16") {
332
+ tasks.push_back(std::async(std::launch::async, [=] {
333
+ string_to_spv("get_rows_" + tname, shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
334
+ }));
335
+ } else {
336
+ tasks.push_back(std::async(std::launch::async, [=] {
337
+ string_to_spv("get_rows_" + tname, shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}});
338
+ }));
339
+ }
340
+ tasks.push_back(std::async(std::launch::async, [=] {
341
+ string_to_spv("get_rows_" + tname + "_f32", shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float"}});
342
+ }));
343
+ }
344
+ }
345
+
346
+ tasks.push_back(std::async(std::launch::async, [] {
347
+ string_to_spv("mul_mat_vec_p021_f16_f32", "mul_mat_vec_p021.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
348
+ }));
349
+ tasks.push_back(std::async(std::launch::async, [] {
350
+ string_to_spv("mul_mat_vec_nc_f16_f32", "mul_mat_vec_nc.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
351
+ }));
352
+
353
+ // Norms
354
+ tasks.push_back(std::async(std::launch::async, [=] {
355
+ string_to_spv("norm_f32", "norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
356
+ }));
357
+ tasks.push_back(std::async(std::launch::async, [=] {
358
+ string_to_spv("rms_norm_f32", "rms_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
359
+ }));
360
+
361
+ tasks.push_back(std::async(std::launch::async, [] {
362
+ string_to_spv("cpy_f32_f32", "copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
363
+ }));
364
+ tasks.push_back(std::async(std::launch::async, [] {
365
+ string_to_spv("cpy_f32_f16", "copy.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}});
366
+ }));
367
+ tasks.push_back(std::async(std::launch::async, [] {
368
+ string_to_spv("cpy_f16_f16", "copy.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
369
+ }));
370
+
371
+ tasks.push_back(std::async(std::launch::async, [] {
372
+ string_to_spv("add_f32", "add.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
373
+ }));
374
+
375
+ tasks.push_back(std::async(std::launch::async, [] {
376
+ string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {});
377
+ }));
378
+
379
+ tasks.push_back(std::async(std::launch::async, [] {
380
+ string_to_spv("mul_f32", "mul.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
381
+ }));
382
+
383
+ tasks.push_back(std::async(std::launch::async, [] {
384
+ string_to_spv("div_f32", "div.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
385
+ }));
386
+
387
+ tasks.push_back(std::async(std::launch::async, [] {
388
+ string_to_spv("scale_f32", "scale.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
389
+ }));
390
+
391
+ tasks.push_back(std::async(std::launch::async, [] {
392
+ string_to_spv("sqr_f32", "square.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
393
+ }));
394
+
395
+ tasks.push_back(std::async(std::launch::async, [] {
396
+ string_to_spv("clamp_f32", "clamp.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
397
+ }));
398
+
399
+ tasks.push_back(std::async(std::launch::async, [] {
400
+ string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
401
+ }));
402
+ tasks.push_back(std::async(std::launch::async, [] {
403
+ string_to_spv("silu_f32", "silu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
404
+ }));
405
+ tasks.push_back(std::async(std::launch::async, [] {
406
+ string_to_spv("relu_f32", "relu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
407
+ }));
408
+
409
+ tasks.push_back(std::async(std::launch::async, [] {
410
+ string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
411
+ }));
412
+
413
+ tasks.push_back(std::async(std::launch::async, [=] {
414
+ string_to_spv("soft_max_f32", "soft_max.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}));
415
+ }));
416
+ tasks.push_back(std::async(std::launch::async, [=] {
417
+ string_to_spv("soft_max_f32_f16", "soft_max.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}}));
418
+ }));
419
+
420
+ tasks.push_back(std::async(std::launch::async, [] {
421
+ string_to_spv("rope_norm_f32", "rope_norm.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
422
+ }));
423
+ tasks.push_back(std::async(std::launch::async, [] {
424
+ string_to_spv("rope_norm_f16", "rope_norm.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
425
+ }));
426
+
427
+ tasks.push_back(std::async(std::launch::async, [] {
428
+ string_to_spv("rope_neox_f32", "rope_neox.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
429
+ }));
430
+ tasks.push_back(std::async(std::launch::async, [] {
431
+ string_to_spv("rope_neox_f16", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
432
+ }));
433
+
434
+ tasks.push_back(std::async(std::launch::async, [] {
435
+ string_to_spv("argsort_f32", "argsort.comp", {{"A_TYPE", "float"}});
436
+ }));
437
+
438
+ tasks.push_back(std::async(std::launch::async, [=] {
439
+ string_to_spv("sum_rows_f32", "sum_rows.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
440
+ }));
441
+ }
442
+
443
+ void write_output_files() {
444
+ FILE* hdr = fopen(target_hpp.c_str(), "w");
445
+ FILE* src = fopen(target_cpp.c_str(), "w");
446
+
447
+ fprintf(hdr, "#include <cstdint>\n\n");
448
+ fprintf(src, "#include \"%s\"\n\n", basename(target_hpp).c_str());
449
+
450
+ for (const auto& pair : shader_fnames) {
451
+ const std::string& name = pair.first;
452
+ const std::string& path = pair.second;
453
+ FILE* spv = fopen(path.c_str(), "rb");
454
+ if (!spv) {
455
+ std::cerr << "Error opening SPIR-V file: " << path << "\n";
456
+ continue;
457
+ }
458
+
459
+ fseek(spv, 0, SEEK_END);
460
+ size_t size = ftell(spv);
461
+ fseek(spv, 0, SEEK_SET);
462
+
463
+ std::vector<unsigned char> data(size);
464
+ size_t read_size = fread(data.data(), 1, size, spv);
465
+ fclose(spv);
466
+ if (read_size != size) {
467
+ std::cerr << "Error reading SPIR-V file: " << path << "\n";
468
+ continue;
469
+ }
470
+
471
+ fprintf(hdr, "extern unsigned char %s_data[%zu];\n", name.c_str(), size);
472
+ fprintf(hdr, "const uint64_t %s_len = %zu;\n\n", name.c_str(), size);
473
+
474
+ fprintf(src, "unsigned char %s_data[%zu] = {\n", name.c_str(), size);
475
+ for (size_t i = 0; i < size; ++i) {
476
+ fprintf(src, "0x%02x,", data[i]);
477
+ if ((i + 1) % 12 == 0) fprintf(src, "\n");
478
+ }
479
+ fprintf(src, "\n};\n\n");
480
+
481
+ if (clean) {
482
+ std::remove(path.c_str());
483
+ // fprintf(stderr, "Removed: %s\n", path.c_str());
484
+ }
485
+ }
486
+
487
+ fclose(hdr);
488
+ fclose(src);
489
+ }
490
+
491
+ int main(int argc, char** argv) {
492
+ std::map<std::string, std::string> args;
493
+ for (int i = 1; i < argc; i += 2) {
494
+ if (i + 1 < argc) {
495
+ args[argv[i]] = argv[i + 1];
496
+ }
497
+ }
498
+
499
+ if (argc <= 1 || args.find("--help") != args.end()) {
500
+ std::cout << "Usage:\n"
501
+ "\tvulkan-shaders-gen [options]\n\n"
502
+ "Options:\n"
503
+ "\t--glslc <path> Path to glslc executable (default: /usr/bin/glslc)\n"
504
+ "\t--input-dir Directory containing shader sources (required)\n"
505
+ "\t--output-dir Output directory for generated SPIR-V files and optional C++ headers\n"
506
+ "\t--target-hpp <path> Path to generate a header file with shader declarations in C++ format\n"
507
+ "\t--target-cpp <path> Path to generate a source code file implementing the declared shaders (optional)\n"
508
+ "\t--no-clean Keep temporary SPIR-V files after build (default: remove them)\n";
509
+ return EXIT_SUCCESS;
510
+ }
511
+ if (args.find("--glslc") != args.end()) {
512
+ GLSLC = args["--glslc"]; // Path to glslc
513
+ }
514
+ if (args.find("--input-dir") != args.end()) {
515
+ input_dir = args["--input-dir"]; // Directory containing shader sources
516
+ }
517
+ if (args.find("--output-dir") != args.end()) {
518
+ output_dir = args["--output-dir"]; // Directory for containing SPIR-V output
519
+ }
520
+ if (args.find("--target-hpp") != args.end()) {
521
+ target_hpp = args["--target-hpp"]; // Path to generated header file
522
+ }
523
+ if (args.find("--target-cpp") != args.end()) {
524
+ target_cpp = args["--target-cpp"]; // Path to generated cpp file
525
+ }
526
+ if (args.find("--no-clean") != args.end()) {
527
+ clean = false; // Keep temporary SPIR-V files in output-dir after build
528
+ }
529
+
530
+ if (!directory_exists(input_dir)) {
531
+ std::cerr << "\"" << input_dir << "\" must be a valid directory containing shader sources" << std::endl;
532
+ return EXIT_FAILURE;
533
+ }
534
+
535
+ if (!directory_exists(output_dir)) {
536
+ if (!create_directory(output_dir)) {
537
+ std::cerr << "Error creating output directory: " << output_dir << "\n";
538
+ return EXIT_FAILURE;
539
+ }
540
+ }
541
+
542
+ std::vector<std::future<void>> tasks;
543
+ process_shaders(tasks);
544
+
545
+ for (auto& task : tasks) {
546
+ task.get();
547
+ }
548
+
549
+ write_output_files();
550
+
551
+ return EXIT_SUCCESS;
552
+ }