@fugood/llama.node 0.6.2 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -10
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/patches/node-api-headers+1.1.0.patch +0 -26
  27. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  28. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  29. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  30. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  31. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  32. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  33. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  37. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  38. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  39. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  40. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  41. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  42. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  43. package/src/llama.cpp/cmake/common.cmake +0 -35
  44. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  45. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  46. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  47. package/src/llama.cpp/docs/build.md +0 -561
  48. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  49. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  50. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  51. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  53. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  54. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  55. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  56. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  57. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  58. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  59. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  61. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  62. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  63. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  64. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  68. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  70. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  71. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  72. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  73. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  74. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  75. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  76. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  77. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  79. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  80. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  81. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  82. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  83. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  84. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  85. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  86. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  87. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  88. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  89. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  90. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  91. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  92. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  93. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  94. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  95. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  96. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  97. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  98. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  99. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  100. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  101. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  102. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  103. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  104. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  105. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  106. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  107. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  108. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  109. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  110. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  111. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  112. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  113. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  114. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  115. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  116. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  117. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  118. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  119. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  120. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  121. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  122. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  123. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  125. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  127. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  128. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  129. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  130. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  131. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  132. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  135. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  136. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  137. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  138. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  139. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  140. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  141. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  142. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  143. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  144. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  145. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  146. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  147. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  148. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  149. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  150. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  151. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  153. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  155. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  157. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  159. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  161. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  163. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  167. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  169. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  170. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  172. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  173. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  175. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  177. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  181. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  183. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  185. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  186. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  187. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  189. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  191. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  193. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  195. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  196. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  198. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  199. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  200. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  204. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  205. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  206. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  207. package/src/llama.cpp/models/.editorconfig +0 -1
  208. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  212. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  214. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  217. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  228. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  231. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  233. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  234. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  242. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  250. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  253. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  256. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  258. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  261. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  262. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  263. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  264. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  265. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  266. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  267. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  268. package/src/llama.cpp/prompts/assistant.txt +0 -31
  269. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  270. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  271. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  273. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  274. package/src/llama.cpp/prompts/chat.txt +0 -28
  275. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  276. package/src/llama.cpp/prompts/dan.txt +0 -1
  277. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  278. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  279. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  280. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  281. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  284. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  285. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  286. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  287. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  289. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  290. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  291. package/src/llama.cpp/requirements.txt +0 -13
  292. package/src/llama.cpp/scripts/build-info.sh +0 -30
  293. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  294. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  295. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  296. package/src/llama.cpp/tests/get-model.cpp +0 -21
  297. package/src/llama.cpp/tests/get-model.h +0 -2
  298. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  299. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  300. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  301. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  302. package/src/llama.cpp/tests/test-c.c +0 -7
  303. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  304. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  305. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  306. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  307. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  308. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  309. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  310. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  311. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  312. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  313. package/src/llama.cpp/tests/test-log.cpp +0 -39
  314. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  315. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  316. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  317. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  318. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  319. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  320. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  321. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  322. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  323. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  324. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  325. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  326. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  327. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  328. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  329. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  330. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  331. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  332. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  333. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  334. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  335. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  336. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  337. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  338. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  339. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  340. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  341. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  342. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  343. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  344. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  345. package/src/llama.cpp/tools/main/main.cpp +0 -977
  346. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  347. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  348. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  349. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  350. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  351. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  353. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  354. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  355. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  356. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  357. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  358. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  360. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  361. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  362. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  363. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  364. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  365. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  367. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  368. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  369. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  370. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  371. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  372. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  373. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  374. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  375. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  376. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  377. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  378. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,87 +0,0 @@
1
- if (GGML_STATIC)
2
- set(BLA_STATIC ON)
3
- endif()
4
- #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
5
- # set(BLA_SIZEOF_INTEGER 8)
6
- #endif()
7
-
8
- set(BLA_VENDOR ${GGML_BLAS_VENDOR})
9
- find_package(BLAS)
10
-
11
- if (BLAS_FOUND)
12
- message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
13
-
14
- ggml_add_backend_library(ggml-blas
15
- ggml-blas.cpp
16
- )
17
-
18
- if (${GGML_BLAS_VENDOR} MATCHES "Apple")
19
- add_compile_definitions(ACCELERATE_NEW_LAPACK)
20
- add_compile_definitions(ACCELERATE_LAPACK_ILP64)
21
- add_compile_definitions(GGML_BLAS_USE_ACCELERATE)
22
- elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
23
- # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
24
- # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
25
- find_package(PkgConfig REQUIRED)
26
- if (${GGML_BLAS_VENDOR} MATCHES "Generic")
27
- pkg_check_modules(DepBLAS blas)
28
- elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
29
- # As of openblas v0.3.22, the 64-bit is named openblas64.pc
30
- pkg_check_modules(DepBLAS openblas64)
31
- if (NOT DepBLAS_FOUND)
32
- pkg_check_modules(DepBLAS openblas)
33
- endif()
34
- elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
35
- add_compile_definitions(GGML_BLAS_USE_BLIS)
36
- pkg_check_modules(DepBLAS blis)
37
- elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
38
- pkg_check_modules(DepBLAS blas-atlas)
39
- elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
40
- pkg_check_modules(DepBLAS flexiblas_api)
41
- elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
42
- add_compile_definitions(GGML_BLAS_USE_MKL)
43
- # all Intel* libraries share the same include path
44
- pkg_check_modules(DepBLAS mkl-sdl)
45
- elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
46
- # this doesn't provide pkg-config
47
- # suggest to assign BLAS_INCLUDE_DIRS on your own
48
- if ("${NVHPC_VERSION}" STREQUAL "")
49
- message(WARNING "Better to set NVHPC_VERSION")
50
- else()
51
- set(DepBLAS_FOUND ON)
52
- set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
53
- endif()
54
- endif()
55
- if (DepBLAS_FOUND)
56
- set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
57
- else()
58
- message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
59
- " detected by pkgconfig, trying to find cblas.h from possible paths...")
60
- find_path(BLAS_INCLUDE_DIRS
61
- NAMES cblas.h
62
- HINTS
63
- /usr/include
64
- /usr/local/include
65
- /usr/include/openblas
66
- /opt/homebrew/opt/openblas/include
67
- /usr/local/opt/openblas/include
68
- /usr/include/x86_64-linux-gnu/openblas/include
69
- )
70
- endif()
71
- endif()
72
-
73
- message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
74
-
75
- target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
76
-
77
- if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
78
- add_compile_definitions(GGML_BLAS_USE_MKL)
79
- endif()
80
-
81
- target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
82
- target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
83
- else()
84
- message(ERROR "BLAS not found, please refer to "
85
- "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86
- " to set correct GGML_BLAS_VENDOR")
87
- endif()
@@ -1,517 +0,0 @@
1
- #include "ggml-impl.h"
2
- #include "ggml-blas.h"
3
- #include "ggml-backend-impl.h"
4
-
5
- #include <future>
6
- #include <vector>
7
- #include <cstring>
8
-
9
- #if defined(GGML_BLAS_USE_ACCELERATE)
10
- # include <Accelerate/Accelerate.h>
11
- #elif defined(GGML_BLAS_USE_MKL)
12
- # include <mkl.h>
13
- #elif defined(GGML_BLAS_USE_BLIS)
14
- # include <blis.h>
15
- #elif defined(GGML_BLAS_USE_NVPL)
16
- # include <nvpl_blas.h>
17
- #else
18
- # include <cblas.h>
19
- #endif
20
-
21
- struct ggml_backend_blas_context {
22
- int n_threads = GGML_DEFAULT_N_THREADS;
23
- std::unique_ptr<char[]> work_data;
24
- size_t work_size = 0;
25
- #ifndef GGML_USE_OPENMP
26
- std::vector<std::future<void>> tasks;
27
- #endif
28
- };
29
-
30
- static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
31
- const struct ggml_tensor * src0 = dst->src[0];
32
- const struct ggml_tensor * src1 = dst->src[1];
33
-
34
- GGML_TENSOR_BINARY_OP_LOCALS
35
-
36
- const enum ggml_type type = src0->type;
37
-
38
- GGML_ASSERT(ne0 == ne01);
39
- GGML_ASSERT(ne1 == ne11);
40
- GGML_ASSERT(ne2 == ne12);
41
- GGML_ASSERT(ne3 == ne13);
42
-
43
- // we don't support permuted src0 or src1
44
- GGML_ASSERT(nb00 == ggml_type_size(type));
45
- GGML_ASSERT(nb10 == ggml_type_size(src1->type));
46
-
47
- // dst cannot be transposed or permuted
48
- GGML_ASSERT(nb0 == sizeof(float));
49
- GGML_ASSERT(nb0 <= nb1);
50
- GGML_ASSERT(nb1 <= nb2);
51
- GGML_ASSERT(nb2 <= nb3);
52
-
53
- // broadcast factors
54
- const int64_t r2 = ne12/ne02;
55
- const int64_t r3 = ne13/ne03;
56
-
57
- const int64_t ne_plane = ne01*ne00;
58
- const size_t desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float);
59
-
60
- if (ctx->work_size < desired_wsize) {
61
- ctx->work_data.reset(new char[desired_wsize]);
62
- ctx->work_size = desired_wsize;
63
- }
64
- void * wdata = ctx->work_data.get();
65
-
66
- // convert src0 to float
67
- if (type != GGML_TYPE_F32) {
68
- const auto * type_traits = ggml_get_type_traits(type);
69
- ggml_to_float_t const to_float = type_traits->to_float;
70
-
71
- for (int64_t i03 = 0; i03 < ne03; i03++) {
72
- for (int64_t i02 = 0; i02 < ne02; i02++) {
73
- const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
74
- float * const wplane = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
75
-
76
- const int min_cols_per_thread = 4096;
77
- const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1);
78
- const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1);
79
-
80
- #ifdef GGML_USE_OPENMP
81
- #pragma omp parallel for num_threads(n_threads)
82
- for (int64_t i01 = 0; i01 < ne01; i01++) {
83
- to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
84
- }
85
- #else
86
- for (int i = 1; i < n_threads; i++) {
87
- const int64_t start = i*ne01/n_threads;
88
- const int64_t end = (i + 1)*ne01/n_threads;
89
- if (start < end) {
90
- ctx->tasks.push_back(std::async(std::launch::async, [=]() {
91
- for (int64_t i01 = start; i01 < end; i01++) {
92
- to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
93
- }
94
- }));
95
- }
96
- }
97
- {
98
- // reuse the current thread for the first task
99
- const int64_t start = 0;
100
- const int64_t end = ne01/n_threads;
101
- for (int64_t i01 = start; i01 < end; i01++) {
102
- to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
103
- }
104
- }
105
- #endif
106
- }
107
- }
108
-
109
- #ifndef GGML_USE_OPENMP
110
- // wait for all tasks to finish
111
- for (auto & task : ctx->tasks) {
112
- task.get();
113
- }
114
- ctx->tasks.clear();
115
- #endif
116
- }
117
-
118
- #if defined(OPENBLAS_VERSION)
119
- openblas_set_num_threads(ctx->n_threads);
120
- #endif
121
-
122
- #if defined(GGML_BLAS_USE_BLIS)
123
- bli_thread_set_num_threads(ctx->n_threads);
124
- #endif
125
-
126
- #if defined(GGML_BLAS_USE_NVPL)
127
- nvpl_blas_set_num_threads(ctx->n_threads);
128
- #endif
129
-
130
- for (int64_t i13 = 0; i13 < ne13; i13++) {
131
- for (int64_t i12 = 0; i12 < ne12; i12++) {
132
- const int64_t i03 = i13/r3;
133
- const int64_t i02 = i12/r2;
134
-
135
- const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03);
136
- const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
137
- float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
138
-
139
- if (type != GGML_TYPE_F32) {
140
- x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
141
- }
142
-
143
- cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
144
- ne1, ne01, ne10,
145
- 1.0f, y, ne10,
146
- x, ne00,
147
- 0.0f, d, ne01);
148
- }
149
- }
150
- }
151
-
152
- static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
153
- const struct ggml_tensor * src0 = dst->src[0];
154
- const struct ggml_tensor * src1 = dst->src[1];
155
-
156
- GGML_TENSOR_BINARY_OP_LOCALS
157
-
158
- GGML_ASSERT(ne0 == ne00);
159
- GGML_ASSERT(ne1 == ne10);
160
- GGML_ASSERT(ne2 == ne02);
161
- GGML_ASSERT(ne02 == ne12);
162
- GGML_ASSERT(ne3 == ne13);
163
- GGML_ASSERT(ne03 == ne13);
164
-
165
- // we don't support permuted src0 or src1
166
- GGML_ASSERT(nb00 == sizeof(float));
167
-
168
- // dst cannot be transposed or permuted
169
- GGML_ASSERT(nb0 == sizeof(float));
170
- // GGML_ASSERT(nb0 <= nb1);
171
- // GGML_ASSERT(nb1 <= nb2);
172
- // GGML_ASSERT(nb2 <= nb3);
173
-
174
- // Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
175
- // src0: (k,n)
176
- // src1: (k,m)
177
- // dst: (m,n)
178
- //
179
- // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
180
- // Also expressed as (major,minor)
181
- // a: (m,k): so src1 transposed
182
- // b: (k,n): so src0
183
- // c: (m,n)
184
- //
185
- // However, if ggml_is_transposed(src1) is true, then
186
- // src1->data already contains a transposed version, so sgemm mustn't
187
- // transpose it further.
188
-
189
- int n = src0->ne[0];
190
- int k = src0->ne[1];
191
- int m = src1->ne[0];
192
-
193
- CBLAS_TRANSPOSE transposeA;
194
- int lda;
195
-
196
- if (!ggml_is_transposed(src1)) {
197
- transposeA = CblasTrans;
198
- lda = m;
199
- } else {
200
- transposeA = CblasNoTrans;
201
- lda = k;
202
- }
203
-
204
- float * a = (float *) ((char *) src1->data);
205
- float * b = (float *) ((char *) src0->data);
206
- float * c = (float *) ((char *) dst->data);
207
-
208
- cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
209
-
210
- GGML_UNUSED(ctx);
211
- }
212
-
213
- // backend interface
214
-
215
- static const char * ggml_backend_blas_get_name(ggml_backend_t backend) {
216
- return "BLAS";
217
-
218
- GGML_UNUSED(backend);
219
- }
220
-
221
- static void ggml_backend_blas_free(ggml_backend_t backend) {
222
- ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
223
- delete ctx;
224
- delete backend;
225
- }
226
-
227
- static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
228
- ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
229
-
230
- for (int i = 0; i < cgraph->n_nodes; i++) {
231
- struct ggml_tensor * node = cgraph->nodes[i];
232
-
233
- switch (node->op) {
234
- case GGML_OP_MUL_MAT:
235
- ggml_backend_blas_mul_mat(ctx, node);
236
- break;
237
-
238
- case GGML_OP_OUT_PROD:
239
- ggml_backend_blas_out_prod(ctx, node);
240
- break;
241
-
242
- case GGML_OP_NONE:
243
- case GGML_OP_RESHAPE:
244
- case GGML_OP_VIEW:
245
- case GGML_OP_PERMUTE:
246
- case GGML_OP_TRANSPOSE:
247
- break;
248
-
249
- default:
250
- GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
251
- }
252
- }
253
-
254
- return GGML_STATUS_SUCCESS;
255
-
256
- GGML_UNUSED(backend);
257
- }
258
-
259
- static struct ggml_backend_i blas_backend_i = {
260
- /* .get_name = */ ggml_backend_blas_get_name,
261
- /* .free = */ ggml_backend_blas_free,
262
- /* .set_tensor_async = */ NULL,
263
- /* .get_tensor_async = */ NULL,
264
- /* .cpy_tensor_async = */ NULL,
265
- /* .synchronize = */ NULL,
266
- /* .graph_plan_create = */ NULL,
267
- /* .graph_plan_free = */ NULL,
268
- /* .graph_plan_update = */ NULL,
269
- /* .graph_plan_compute = */ NULL,
270
- /* .graph_compute = */ ggml_backend_blas_graph_compute,
271
- /* .event_record = */ NULL,
272
- /* .event_wait = */ NULL,
273
- };
274
-
275
- static ggml_guid_t ggml_backend_blas_guid(void) {
276
- static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d };
277
- return &guid;
278
- }
279
-
280
- ggml_backend_t ggml_backend_blas_init(void) {
281
- ggml_backend_blas_context * ctx = new ggml_backend_blas_context;
282
-
283
- ggml_backend_t backend = new ggml_backend {
284
- /* .guid = */ ggml_backend_blas_guid(),
285
- /* .interface = */ blas_backend_i,
286
- /* .device = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0),
287
- /* .context = */ ctx,
288
- };
289
-
290
- #if defined(OPENBLAS_VERSION) && defined(GGML_USE_OPENMP)
291
- if (openblas_get_parallel() != OPENBLAS_OPENMP) {
292
- GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__);
293
- }
294
- #endif
295
-
296
- #if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP)
297
- GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__);
298
- #endif
299
-
300
- return backend;
301
- }
302
-
303
- bool ggml_backend_is_blas(ggml_backend_t backend) {
304
- return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid());
305
- }
306
-
307
- void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) {
308
- GGML_ASSERT(ggml_backend_is_blas(backend_blas));
309
-
310
- ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
311
- ctx->n_threads = n_threads;
312
- }
313
-
314
- // device interface
315
-
316
- static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) {
317
- return "BLAS";
318
-
319
- GGML_UNUSED(dev);
320
- }
321
-
322
- static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) {
323
- #if defined(GGML_BLAS_USE_ACCELERATE)
324
- return "Accelerate";
325
- #elif defined(GGML_BLAS_USE_MKL)
326
- return "MKL";
327
- #elif defined(GGML_BLAS_USE_BLIS)
328
- return "BLIS";
329
- #elif defined(GGML_BLAS_USE_NVPL)
330
- return "NVPL";
331
- #elif defined(OPENBLAS_VERSION)
332
- return "OpenBLAS";
333
- #else
334
- return "BLAS";
335
- #endif
336
-
337
- GGML_UNUSED(dev);
338
- }
339
-
340
- static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
341
- // TODO
342
- *free = 0;
343
- *total = 0;
344
-
345
- GGML_UNUSED(dev);
346
- }
347
-
348
- static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) {
349
- return GGML_BACKEND_DEVICE_TYPE_ACCEL;
350
-
351
- GGML_UNUSED(dev);
352
- }
353
-
354
- static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
355
- props->name = ggml_backend_blas_device_get_name(dev);
356
- props->description = ggml_backend_blas_device_get_description(dev);
357
- props->type = ggml_backend_blas_device_get_type(dev);
358
- ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total);
359
- props->caps = {
360
- /* .async = */ false,
361
- /* .host_buffer = */ false,
362
- /* .buffer_from_host_ptr = */ true,
363
- /* .events = */ false,
364
- };
365
- }
366
-
367
- static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) {
368
- return ggml_backend_blas_init();
369
-
370
- GGML_UNUSED(dev);
371
- GGML_UNUSED(params);
372
- }
373
-
374
- static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) {
375
- return ggml_backend_cpu_buffer_type();
376
-
377
- GGML_UNUSED(dev);
378
- }
379
-
380
- static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
381
- return ggml_backend_cpu_buffer_from_ptr(ptr, size);
382
-
383
- GGML_UNUSED(dev);
384
- GGML_UNUSED(max_tensor_size);
385
- }
386
-
387
- static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
388
- const struct ggml_tensor * src0 = op->src[0];
389
- const struct ggml_tensor * src1 = op->src[1];
390
-
391
- switch (op->op) {
392
- case GGML_OP_NONE:
393
- case GGML_OP_RESHAPE:
394
- case GGML_OP_VIEW:
395
- case GGML_OP_PERMUTE:
396
- case GGML_OP_TRANSPOSE:
397
- return true;
398
-
399
- case GGML_OP_MUL_MAT:
400
- {
401
- // BLAS usually is only faster for large matrices
402
- const struct ggml_tensor * src0 = op->src[0];
403
- const struct ggml_tensor * src1 = op->src[1];
404
-
405
- const int64_t ne10 = src1->ne[0];
406
-
407
- const int64_t ne0 = op->ne[0];
408
- const int64_t ne1 = op->ne[1];
409
-
410
- // TODO: find the optimal value
411
- const int64_t min_batch = 32;
412
-
413
- return ggml_is_contiguous(src0) &&
414
- ggml_is_contiguous(src1) &&
415
- src1->type == GGML_TYPE_F32 &&
416
- (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
417
- (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
418
- }
419
-
420
- case GGML_OP_OUT_PROD:
421
- return op->src[0]->type == GGML_TYPE_F32 &&
422
- op->src[1]->type == GGML_TYPE_F32 &&
423
- ggml_is_matrix(src0) &&
424
- ggml_is_matrix(src1) &&
425
- ggml_is_contiguous(src0) &&
426
- (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
427
- (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
428
-
429
- default:
430
- return false;
431
-
432
- }
433
-
434
- GGML_UNUSED(dev);
435
- }
436
-
437
- static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
438
- return ggml_backend_buft_is_host(buft);
439
-
440
- GGML_UNUSED(dev);
441
- }
442
-
443
- static const struct ggml_backend_device_i ggml_backend_blas_device_i = {
444
- /* .get_name = */ ggml_backend_blas_device_get_name,
445
- /* .get_description = */ ggml_backend_blas_device_get_description,
446
- /* .get_memory = */ ggml_backend_blas_device_get_memory,
447
- /* .get_type = */ ggml_backend_blas_device_get_type,
448
- /* .get_props = */ ggml_backend_blas_device_get_props,
449
- /* .init_backend = */ ggml_backend_blas_device_init_backend,
450
- /* .get_buffer_type = */ ggml_backend_blas_device_get_buffer_type,
451
- /* .get_host_buffer_type = */ NULL,
452
- /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr,
453
- /* .supports_op = */ ggml_backend_blas_device_supports_op,
454
- /* .supports_buft = */ ggml_backend_blas_device_supports_buft,
455
- /* .offload_op = */ NULL,
456
- /* .event_new = */ NULL,
457
- /* .event_free = */ NULL,
458
- /* .event_synchronize = */ NULL,
459
- };
460
-
461
- // backend reg interface
462
-
463
- static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) {
464
- return "BLAS";
465
-
466
- GGML_UNUSED(reg);
467
- }
468
-
469
- static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) {
470
- return 1;
471
-
472
- GGML_UNUSED(reg);
473
- }
474
-
475
- static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) {
476
- GGML_ASSERT(index == 0);
477
-
478
- static ggml_backend_device ggml_backend_blas_device = {
479
- /* .iface = */ ggml_backend_blas_device_i,
480
- /* .reg = */ reg,
481
- /* .context = */ nullptr,
482
- };
483
-
484
- return &ggml_backend_blas_device;
485
-
486
- GGML_UNUSED(reg);
487
- GGML_UNUSED(index);
488
- }
489
-
490
- static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) {
491
- if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
492
- return (void *)ggml_backend_blas_set_n_threads;
493
- }
494
- return NULL;
495
-
496
- GGML_UNUSED(reg);
497
- GGML_UNUSED(name);
498
- }
499
-
500
- static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
501
- /* .get_name = */ ggml_backend_blas_reg_get_name,
502
- /* .get_device_count = */ ggml_backend_blas_reg_get_device_count,
503
- /* .get_device = */ ggml_backend_blas_reg_get_device,
504
- /* .get_proc_address = */ ggml_backend_blas_get_proc_address,
505
- };
506
-
507
- ggml_backend_reg_t ggml_backend_blas_reg(void) {
508
- static struct ggml_backend_reg ggml_backend_blas_reg = {
509
- /* .api_version = */ GGML_BACKEND_API_VERSION,
510
- /* .iface = */ ggml_backend_blas_reg_i,
511
- /* .context = */ NULL,
512
- };
513
-
514
- return &ggml_backend_blas_reg;
515
- }
516
-
517
- GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
@@ -1,74 +0,0 @@
1
- if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
2
- set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
3
- message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
4
- endif()
5
-
6
- # Auto-detech Soc type and Soc version, if detect failed, will abort build
7
- set(SOC_VERSION "")
8
- function(detect_ascend_soc_type SOC_VERSION)
9
- execute_process(
10
- COMMAND bash -c "npu-smi info|awk -F' ' 'NF > 0 && NR==7 {print $3}'"
11
- OUTPUT_VARIABLE npu_info
12
- RESULT_VARIABLE npu_result
13
- OUTPUT_STRIP_TRAILING_WHITESPACE
14
- )
15
- if("${npu_info}" STREQUAL "" OR ${npu_result})
16
- message(FATAL_ERROR "Auto-detech ascend soc type failed, please specify manually or check ascend device working normally.")
17
- endif()
18
- set(${SOC_VERSION} "Ascend${npu_info}" PARENT_SCOPE)
19
- endfunction()
20
-
21
- if(NOT SOC_TYPE)
22
- detect_ascend_soc_type(SOC_VERSION)
23
- set(SOC_TYPE "${SOC_VERSION}")
24
- message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
25
- endif()
26
-
27
- string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
28
-
29
- # Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P.
30
- string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
31
- set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
32
- string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
33
-
34
- if (CANN_INSTALL_DIR)
35
- # Only Support Linux.
36
- if (NOT UNIX)
37
- message(FATAL_ERROR "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}")
38
- endif()
39
-
40
- # Supported platforms: x86-64, arm64
41
- if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
42
- elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
43
- else()
44
- message(FATAL_ERROR "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}")
45
- endif()
46
-
47
- # Set header and libs
48
- set(CANN_INCLUDE_DIRS
49
- ${CANN_INSTALL_DIR}/include
50
- ${CANN_INSTALL_DIR}/include/aclnn
51
- ${CANN_INSTALL_DIR}/acllib/include
52
- )
53
-
54
- list(APPEND CANN_LIBRARIES
55
- ascendcl
56
- nnopbase
57
- opapi
58
- acl_op_compiler
59
- )
60
-
61
- file(GLOB GGML_SOURCES_CANN "*.cpp")
62
-
63
- ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN})
64
- target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES})
65
- target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS})
66
- target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
67
-
68
- target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
69
-
70
- message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
71
- message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
72
- else()
73
- message(FATAL_ERROR "CANN: Can't find CANN_INSTALL_DIR, did you forget to source set_var.sh?")
74
- endif()