@novastera-oss/llamarn 0.0.1-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (989) hide show
  1. package/INTERFACE.md +389 -0
  2. package/LICENSE +201 -0
  3. package/README.md +235 -0
  4. package/RNLlamaCpp.podspec +69 -0
  5. package/android/CMakeLists.txt +107 -0
  6. package/android/build.gradle +111 -0
  7. package/android/generated/java/com/novastera/llamarn/NativeRNLlamaCppSpec.java +47 -0
  8. package/android/generated/jni/CMakeLists.txt +36 -0
  9. package/android/generated/jni/RNLlamaCppSpec-generated.cpp +44 -0
  10. package/android/generated/jni/RNLlamaCppSpec.h +31 -0
  11. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI-generated.cpp +42 -0
  12. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI.h +336 -0
  13. package/android/gradle.properties +5 -0
  14. package/android/src/main/AndroidManifest.xml +3 -0
  15. package/android/src/main/AndroidManifestNew.xml +2 -0
  16. package/android/src/main/cpp/include/llama-cpp.h +30 -0
  17. package/android/src/main/cpp/include/llama.h +1440 -0
  18. package/android/src/main/java/com/novastera/llamarn/RNLlamaCppPackage.kt +21 -0
  19. package/android/src/main/jniLibs/arm64-v8a/libOpenCL.so +0 -0
  20. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  21. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  22. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  23. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  24. package/android/src/main/jniLibs/x86_64/libOpenCL.so +0 -0
  25. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  26. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  27. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  28. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  29. package/cpp/LlamaCppModel.cpp +984 -0
  30. package/cpp/LlamaCppModel.h +162 -0
  31. package/cpp/PureCppImpl.cpp +308 -0
  32. package/cpp/PureCppImpl.h +59 -0
  33. package/cpp/SystemUtils.cpp +180 -0
  34. package/cpp/SystemUtils.h +74 -0
  35. package/cpp/build-info.cpp +4 -0
  36. package/cpp/llama.cpp/AUTHORS +1106 -0
  37. package/cpp/llama.cpp/CMakeLists.txt +254 -0
  38. package/cpp/llama.cpp/CMakePresets.json +84 -0
  39. package/cpp/llama.cpp/CODEOWNERS +11 -0
  40. package/cpp/llama.cpp/CONTRIBUTING.md +127 -0
  41. package/cpp/llama.cpp/LICENSE +21 -0
  42. package/cpp/llama.cpp/Makefile +1608 -0
  43. package/cpp/llama.cpp/README.md +575 -0
  44. package/cpp/llama.cpp/SECURITY.md +68 -0
  45. package/cpp/llama.cpp/build-xcframework.sh +540 -0
  46. package/cpp/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  47. package/cpp/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
  48. package/cpp/llama.cpp/cmake/build-info.cmake +64 -0
  49. package/cpp/llama.cpp/cmake/common.cmake +35 -0
  50. package/cpp/llama.cpp/cmake/git-vars.cmake +22 -0
  51. package/cpp/llama.cpp/cmake/llama-config.cmake.in +30 -0
  52. package/cpp/llama.cpp/cmake/llama.pc.in +10 -0
  53. package/cpp/llama.cpp/cmake/x64-windows-llvm.cmake +5 -0
  54. package/cpp/llama.cpp/common/CMakeLists.txt +170 -0
  55. package/cpp/llama.cpp/common/arg.cpp +3337 -0
  56. package/cpp/llama.cpp/common/arg.h +89 -0
  57. package/cpp/llama.cpp/common/base64.hpp +392 -0
  58. package/cpp/llama.cpp/common/build-info.cpp.in +4 -0
  59. package/cpp/llama.cpp/common/chat.cpp +1781 -0
  60. package/cpp/llama.cpp/common/chat.h +135 -0
  61. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +24 -0
  62. package/cpp/llama.cpp/common/common.cpp +1567 -0
  63. package/cpp/llama.cpp/common/common.h +668 -0
  64. package/cpp/llama.cpp/common/console.cpp +504 -0
  65. package/cpp/llama.cpp/common/console.h +19 -0
  66. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +1027 -0
  67. package/cpp/llama.cpp/common/json-schema-to-grammar.h +21 -0
  68. package/cpp/llama.cpp/common/json.hpp +24766 -0
  69. package/cpp/llama.cpp/common/llguidance.cpp +254 -0
  70. package/cpp/llama.cpp/common/log.cpp +393 -0
  71. package/cpp/llama.cpp/common/log.h +103 -0
  72. package/cpp/llama.cpp/common/minja/chat-template.hpp +537 -0
  73. package/cpp/llama.cpp/common/minja/minja.hpp +2941 -0
  74. package/cpp/llama.cpp/common/ngram-cache.cpp +286 -0
  75. package/cpp/llama.cpp/common/ngram-cache.h +101 -0
  76. package/cpp/llama.cpp/common/sampling.cpp +580 -0
  77. package/cpp/llama.cpp/common/sampling.h +107 -0
  78. package/cpp/llama.cpp/common/speculative.cpp +278 -0
  79. package/cpp/llama.cpp/common/speculative.h +28 -0
  80. package/cpp/llama.cpp/common/stb_image.h +7988 -0
  81. package/cpp/llama.cpp/convert_hf_to_gguf.py +6195 -0
  82. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +393 -0
  83. package/cpp/llama.cpp/convert_llama_ggml_to_gguf.py +450 -0
  84. package/cpp/llama.cpp/convert_lora_to_gguf.py +461 -0
  85. package/cpp/llama.cpp/flake.lock +58 -0
  86. package/cpp/llama.cpp/flake.nix +185 -0
  87. package/cpp/llama.cpp/ggml/CMakeLists.txt +388 -0
  88. package/cpp/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  89. package/cpp/llama.cpp/ggml/cmake/common.cmake +26 -0
  90. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  91. package/cpp/llama.cpp/ggml/include/ggml-alloc.h +76 -0
  92. package/cpp/llama.cpp/ggml/include/ggml-backend.h +354 -0
  93. package/cpp/llama.cpp/ggml/include/ggml-blas.h +25 -0
  94. package/cpp/llama.cpp/ggml/include/ggml-cann.h +123 -0
  95. package/cpp/llama.cpp/ggml/include/ggml-cpp.h +39 -0
  96. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +143 -0
  97. package/cpp/llama.cpp/ggml/include/ggml-cuda.h +47 -0
  98. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +50 -0
  99. package/cpp/llama.cpp/ggml/include/ggml-metal.h +66 -0
  100. package/cpp/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  101. package/cpp/llama.cpp/ggml/include/ggml-opt.h +216 -0
  102. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +33 -0
  103. package/cpp/llama.cpp/ggml/include/ggml-sycl.h +49 -0
  104. package/cpp/llama.cpp/ggml/include/ggml-vulkan.h +29 -0
  105. package/cpp/llama.cpp/ggml/include/ggml.h +2192 -0
  106. package/cpp/llama.cpp/ggml/include/gguf.h +202 -0
  107. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +345 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +1042 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +255 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +586 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +2008 -0
  112. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +74 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +179 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2589 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +1083 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +420 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2554 -0
  122. package/cpp/llama.cpp/ggml/src/ggml-common.h +1857 -0
  123. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +495 -0
  124. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  125. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  126. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  127. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  128. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  131. package/cpp/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  132. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +327 -0
  134. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
  135. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  136. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  138. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +512 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +13131 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  141. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  142. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3492 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +254 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +60 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +287 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +47 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +828 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +695 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +873 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1269 -0
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +437 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +428 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +345 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3501 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +322 -0
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  211. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  212. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  213. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +189 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  239. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  240. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  241. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  242. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  243. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  249. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  251. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  253. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  254. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  255. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  256. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  258. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  259. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  260. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  261. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  262. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  263. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  265. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  266. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  267. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  268. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  274. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  275. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  276. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  277. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  278. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  279. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  280. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  282. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  283. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  284. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  285. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  286. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  287. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  288. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  289. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  290. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  291. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  292. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  293. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  294. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  295. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  296. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  297. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  298. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  299. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  300. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  301. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  302. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  311. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  313. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  314. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  315. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  316. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  317. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  319. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  320. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  321. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  322. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  323. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  324. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  325. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  326. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  327. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  329. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  330. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  331. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  332. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  333. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  334. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  335. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  336. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  337. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  338. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  339. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  340. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  341. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  342. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  343. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  344. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  345. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  346. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  347. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  348. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  349. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  350. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  351. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  352. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  353. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  354. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  355. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  356. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  357. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  358. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  359. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  360. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  361. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  362. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  363. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  364. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  365. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  366. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +279 -0
  367. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +57 -0
  368. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  369. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  370. package/cpp/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  371. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  372. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  373. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  374. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  375. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  376. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +131 -0
  377. package/cpp/llama.cpp/ggml/src/ggml-impl.h +601 -0
  378. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  379. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  380. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  381. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  382. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  383. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  384. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  385. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  386. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  387. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  388. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  389. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  390. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  391. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  392. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  394. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  395. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  396. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  397. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  398. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  399. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  400. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  401. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  402. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  403. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  404. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  405. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  406. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  407. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  408. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  409. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  410. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  411. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  412. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  413. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  414. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  415. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  416. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  417. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +120 -0
  418. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +618 -0
  419. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +5916 -0
  420. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +6891 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  422. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
  423. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4966 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  425. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  426. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  427. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  428. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  429. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  430. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  431. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  432. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  433. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  434. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  435. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  436. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  437. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  438. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  439. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  440. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  441. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  442. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  443. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  444. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  445. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  447. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  448. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  449. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  450. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  451. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  452. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  453. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  454. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  455. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  456. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  457. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  458. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  460. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  461. package/cpp/llama.cpp/ggml/src/ggml-quants.c +5232 -0
  462. package/cpp/llama.cpp/ggml/src/ggml-quants.h +100 -0
  463. package/cpp/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  464. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1813 -0
  465. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +183 -0
  466. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  467. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
  468. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  469. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  470. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +493 -0
  471. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +197 -0
  472. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +100 -0
  474. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +596 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  477. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  479. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +753 -0
  480. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1154 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  482. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1559 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +75 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +70 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +311 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  488. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4302 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  490. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  491. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  493. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  495. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1081 -0
  496. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  497. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +474 -0
  498. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  499. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +46 -0
  500. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  501. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  502. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +61 -0
  503. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +362 -0
  504. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  505. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +264 -0
  506. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  507. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  508. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  509. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  510. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  511. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1189 -0
  512. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  513. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  514. package/cpp/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  515. package/cpp/llama.cpp/ggml/src/ggml-threading.h +14 -0
  516. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +202 -0
  517. package/cpp/llama.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  518. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10502 -0
  519. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +22 -0
  520. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  521. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  522. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  523. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  524. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  525. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  526. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  527. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  528. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  529. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  530. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  531. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  532. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  533. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  534. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  535. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  536. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  537. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  538. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  539. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  540. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  541. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  542. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  543. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  544. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  545. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  546. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  547. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  548. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  549. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  550. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  551. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  552. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  553. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  554. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  555. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  556. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  557. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  558. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +483 -0
  559. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +383 -0
  560. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  561. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  562. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  563. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  564. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  565. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  566. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  567. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  568. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  569. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  570. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  571. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  572. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  573. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  574. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  575. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  576. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  577. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  578. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  579. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  580. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  581. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  582. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  583. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  584. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  585. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  586. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  587. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  588. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  589. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  590. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  591. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  592. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  593. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  594. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  595. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  596. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  597. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  598. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  599. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  600. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  601. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  602. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  603. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  604. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  605. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  606. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  607. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  608. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  609. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  610. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  611. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  612. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  613. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  614. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  615. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  616. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  617. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  618. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  619. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  620. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  621. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  622. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  623. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  624. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  625. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  626. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  627. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +740 -0
  628. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  629. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  630. package/cpp/llama.cpp/ggml/src/ggml.c +6499 -0
  631. package/cpp/llama.cpp/ggml/src/gguf.cpp +1330 -0
  632. package/cpp/llama.cpp/gguf-py/LICENSE +21 -0
  633. package/cpp/llama.cpp/gguf-py/README.md +99 -0
  634. package/cpp/llama.cpp/gguf-py/examples/reader.py +49 -0
  635. package/cpp/llama.cpp/gguf-py/examples/writer.py +39 -0
  636. package/cpp/llama.cpp/gguf-py/gguf/__init__.py +9 -0
  637. package/cpp/llama.cpp/gguf-py/gguf/constants.py +2296 -0
  638. package/cpp/llama.cpp/gguf-py/gguf/gguf.py +15 -0
  639. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +367 -0
  640. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +1041 -0
  641. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +223 -0
  642. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +642 -0
  643. package/cpp/llama.cpp/gguf-py/gguf/py.typed +0 -0
  644. package/cpp/llama.cpp/gguf-py/gguf/quants.py +1269 -0
  645. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +182 -0
  646. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +454 -0
  647. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +1610 -0
  648. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_hash.py +102 -0
  649. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +207 -0
  650. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_set_metadata.py +95 -0
  651. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +1172 -0
  652. package/cpp/llama.cpp/gguf-py/gguf/utility.py +264 -0
  653. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +492 -0
  654. package/cpp/llama.cpp/gguf-py/pyproject.toml +43 -0
  655. package/cpp/llama.cpp/gguf-py/tests/__init__.py +1 -0
  656. package/cpp/llama.cpp/gguf-py/tests/test_metadata.py +238 -0
  657. package/cpp/llama.cpp/gguf-py/tests/test_quants.py +238 -0
  658. package/cpp/llama.cpp/grammars/README.md +382 -0
  659. package/cpp/llama.cpp/grammars/arithmetic.gbnf +6 -0
  660. package/cpp/llama.cpp/grammars/c.gbnf +42 -0
  661. package/cpp/llama.cpp/grammars/chess.gbnf +13 -0
  662. package/cpp/llama.cpp/grammars/english.gbnf +6 -0
  663. package/cpp/llama.cpp/grammars/japanese.gbnf +7 -0
  664. package/cpp/llama.cpp/grammars/json.gbnf +25 -0
  665. package/cpp/llama.cpp/grammars/json_arr.gbnf +34 -0
  666. package/cpp/llama.cpp/grammars/list.gbnf +4 -0
  667. package/cpp/llama.cpp/include/llama-cpp.h +30 -0
  668. package/cpp/llama.cpp/include/llama.h +1440 -0
  669. package/cpp/llama.cpp/licenses/LICENSE-curl +9 -0
  670. package/cpp/llama.cpp/licenses/LICENSE-httplib +21 -0
  671. package/cpp/llama.cpp/licenses/LICENSE-jsonhpp +21 -0
  672. package/cpp/llama.cpp/licenses/LICENSE-linenoise +26 -0
  673. package/cpp/llama.cpp/media/llama0-banner.png +0 -0
  674. package/cpp/llama.cpp/media/llama0-logo.png +0 -0
  675. package/cpp/llama.cpp/media/llama1-banner.png +0 -0
  676. package/cpp/llama.cpp/media/llama1-logo.png +0 -0
  677. package/cpp/llama.cpp/media/llama1-logo.svg +34 -0
  678. package/cpp/llama.cpp/media/matmul.png +0 -0
  679. package/cpp/llama.cpp/media/matmul.svg +1238 -0
  680. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  681. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  682. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  683. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  684. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  685. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  686. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  687. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  688. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  689. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  690. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  691. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  692. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  693. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  694. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  695. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  696. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  697. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  698. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  699. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  700. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  701. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  702. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  703. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  704. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  705. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  706. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  707. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  708. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  709. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  710. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  711. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  712. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  713. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  714. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  715. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  716. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  717. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  718. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  719. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  720. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  721. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  722. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  723. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  724. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  725. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  726. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  727. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  728. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  729. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  730. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  731. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  732. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  733. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  734. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +202 -0
  735. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +156 -0
  736. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +152 -0
  737. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +152 -0
  738. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +54 -0
  739. package/cpp/llama.cpp/models/templates/README.md +22 -0
  740. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +1 -0
  741. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +1 -0
  742. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +57 -0
  743. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +4 -0
  744. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +76 -0
  745. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +58 -0
  746. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +287 -0
  747. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +109 -0
  748. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +93 -0
  749. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +109 -0
  750. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +8 -0
  751. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +87 -0
  752. package/cpp/llama.cpp/mypy.ini +7 -0
  753. package/cpp/llama.cpp/pocs/CMakeLists.txt +14 -0
  754. package/cpp/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  755. package/cpp/llama.cpp/pocs/vdot/q8dot.cpp +173 -0
  756. package/cpp/llama.cpp/pocs/vdot/vdot.cpp +311 -0
  757. package/cpp/llama.cpp/poetry.lock +1197 -0
  758. package/cpp/llama.cpp/prompts/LLM-questions.txt +49 -0
  759. package/cpp/llama.cpp/prompts/alpaca.txt +1 -0
  760. package/cpp/llama.cpp/prompts/assistant.txt +31 -0
  761. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  762. package/cpp/llama.cpp/prompts/chat-with-bob.txt +7 -0
  763. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  764. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  765. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  766. package/cpp/llama.cpp/prompts/chat.txt +28 -0
  767. package/cpp/llama.cpp/prompts/dan-modified.txt +1 -0
  768. package/cpp/llama.cpp/prompts/dan.txt +1 -0
  769. package/cpp/llama.cpp/prompts/mnemonics.txt +93 -0
  770. package/cpp/llama.cpp/prompts/parallel-questions.txt +43 -0
  771. package/cpp/llama.cpp/prompts/reason-act.txt +18 -0
  772. package/cpp/llama.cpp/pyproject.toml +45 -0
  773. package/cpp/llama.cpp/pyrightconfig.json +22 -0
  774. package/cpp/llama.cpp/requirements/requirements-all.txt +15 -0
  775. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  776. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  777. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  778. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +5 -0
  779. package/cpp/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  780. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  781. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  782. package/cpp/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  783. package/cpp/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  784. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  785. package/cpp/llama.cpp/requirements.txt +13 -0
  786. package/cpp/llama.cpp/src/CMakeLists.txt +45 -0
  787. package/cpp/llama.cpp/src/llama-adapter.cpp +388 -0
  788. package/cpp/llama.cpp/src/llama-adapter.h +76 -0
  789. package/cpp/llama.cpp/src/llama-arch.cpp +1743 -0
  790. package/cpp/llama.cpp/src/llama-arch.h +437 -0
  791. package/cpp/llama.cpp/src/llama-batch.cpp +372 -0
  792. package/cpp/llama.cpp/src/llama-batch.h +89 -0
  793. package/cpp/llama.cpp/src/llama-chat.cpp +663 -0
  794. package/cpp/llama.cpp/src/llama-chat.h +58 -0
  795. package/cpp/llama.cpp/src/llama-context.cpp +2459 -0
  796. package/cpp/llama.cpp/src/llama-context.h +246 -0
  797. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -0
  798. package/cpp/llama.cpp/src/llama-cparams.h +39 -0
  799. package/cpp/llama.cpp/src/llama-grammar.cpp +1219 -0
  800. package/cpp/llama.cpp/src/llama-grammar.h +173 -0
  801. package/cpp/llama.cpp/src/llama-graph.cpp +1713 -0
  802. package/cpp/llama.cpp/src/llama-graph.h +595 -0
  803. package/cpp/llama.cpp/src/llama-hparams.cpp +79 -0
  804. package/cpp/llama.cpp/src/llama-hparams.h +161 -0
  805. package/cpp/llama.cpp/src/llama-impl.cpp +167 -0
  806. package/cpp/llama.cpp/src/llama-impl.h +61 -0
  807. package/cpp/llama.cpp/src/llama-io.cpp +15 -0
  808. package/cpp/llama.cpp/src/llama-io.h +35 -0
  809. package/cpp/llama.cpp/src/llama-kv-cache.cpp +2486 -0
  810. package/cpp/llama.cpp/src/llama-kv-cache.h +405 -0
  811. package/cpp/llama.cpp/src/llama-memory.cpp +1 -0
  812. package/cpp/llama.cpp/src/llama-memory.h +31 -0
  813. package/cpp/llama.cpp/src/llama-mmap.cpp +600 -0
  814. package/cpp/llama.cpp/src/llama-mmap.h +68 -0
  815. package/cpp/llama.cpp/src/llama-model-loader.cpp +1133 -0
  816. package/cpp/llama.cpp/src/llama-model-loader.h +169 -0
  817. package/cpp/llama.cpp/src/llama-model.cpp +13453 -0
  818. package/cpp/llama.cpp/src/llama-model.h +420 -0
  819. package/cpp/llama.cpp/src/llama-quant.cpp +964 -0
  820. package/cpp/llama.cpp/src/llama-quant.h +1 -0
  821. package/cpp/llama.cpp/src/llama-sampling.cpp +2575 -0
  822. package/cpp/llama.cpp/src/llama-sampling.h +32 -0
  823. package/cpp/llama.cpp/src/llama-vocab.cpp +3313 -0
  824. package/cpp/llama.cpp/src/llama-vocab.h +125 -0
  825. package/cpp/llama.cpp/src/llama.cpp +340 -0
  826. package/cpp/llama.cpp/src/unicode-data.cpp +7034 -0
  827. package/cpp/llama.cpp/src/unicode-data.h +20 -0
  828. package/cpp/llama.cpp/src/unicode.cpp +849 -0
  829. package/cpp/llama.cpp/src/unicode.h +66 -0
  830. package/cpp/rn-completion.cpp +431 -0
  831. package/cpp/rn-llama.hpp +60 -0
  832. package/cpp/rn-utils.hpp +331 -0
  833. package/ios/OnLoad.mm +22 -0
  834. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec-generated.mm +64 -0
  835. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec.h +251 -0
  836. package/ios/generated/RNLlamaCppSpecJSI-generated.cpp +42 -0
  837. package/ios/generated/RNLlamaCppSpecJSI.h +336 -0
  838. package/ios/include/chat.h +135 -0
  839. package/ios/include/common/base64.hpp +392 -0
  840. package/ios/include/common/json.hpp +24766 -0
  841. package/ios/include/common/minja/chat-template.hpp +537 -0
  842. package/ios/include/common/minja/minja.hpp +2941 -0
  843. package/ios/include/common.h +668 -0
  844. package/ios/include/json-schema-to-grammar.h +21 -0
  845. package/ios/include/llama-cpp.h +30 -0
  846. package/ios/include/llama.h +1440 -0
  847. package/ios/include/log.h +103 -0
  848. package/ios/include/ngram-cache.h +101 -0
  849. package/ios/include/sampling.h +107 -0
  850. package/ios/include/speculative.h +28 -0
  851. package/ios/libs/llama.xcframework/Info.plist +135 -0
  852. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  853. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  854. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  855. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  856. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  857. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  858. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  859. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  860. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +2192 -0
  861. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/gguf.h +202 -0
  862. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +1440 -0
  863. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Info.plist +36 -0
  864. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Modules/module.modulemap +17 -0
  865. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  866. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  867. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  868. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  869. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  870. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  871. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  872. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  873. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  874. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  875. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  876. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  877. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  878. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Info.plist +36 -0
  879. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  880. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3442 -0
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +76 -0
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +354 -0
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +25 -0
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +143 -0
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +66 -0
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +2192 -0
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +202 -0
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +1440 -0
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +17 -0
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +32 -0
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +76 -0
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +354 -0
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +25 -0
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +143 -0
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +66 -0
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +2192 -0
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +202 -0
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +1440 -0
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +17 -0
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +32 -0
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  906. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +76 -0
  907. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +354 -0
  908. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +25 -0
  909. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +143 -0
  910. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +66 -0
  911. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +2192 -0
  912. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +202 -0
  913. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +1440 -0
  914. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +17 -0
  915. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +32 -0
  916. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  917. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  918. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  919. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  920. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  921. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  922. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  923. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  924. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  925. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  926. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +2192 -0
  927. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +202 -0
  928. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +1440 -0
  929. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +35 -0
  930. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +17 -0
  931. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  937. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  938. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  939. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  940. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  941. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  942. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  943. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  944. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +35 -0
  945. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  946. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  947. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  948. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  949. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4528 -0
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  952. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  953. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  954. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  955. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +2192 -0
  956. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +202 -0
  957. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +1440 -0
  958. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +32 -0
  959. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +17 -0
  960. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4549 -0
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3470 -0
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  968. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  969. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  970. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  971. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  972. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  973. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +32 -0
  974. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  975. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  976. package/lib/module/NativeRNLlamaCpp.js +35 -0
  977. package/lib/module/NativeRNLlamaCpp.js.map +1 -0
  978. package/lib/module/index.js +20 -0
  979. package/lib/module/index.js.map +1 -0
  980. package/lib/module/package.json +1 -0
  981. package/lib/typescript/package.json +1 -0
  982. package/lib/typescript/src/NativeRNLlamaCpp.d.ts +222 -0
  983. package/lib/typescript/src/NativeRNLlamaCpp.d.ts.map +1 -0
  984. package/lib/typescript/src/index.d.ts +5 -0
  985. package/lib/typescript/src/index.d.ts.map +1 -0
  986. package/package.json +161 -0
  987. package/react-native.config.js +15 -0
  988. package/src/NativeRNLlamaCpp.ts +282 -0
  989. package/src/index.tsx +54 -0
@@ -0,0 +1,1608 @@
1
+ ifndef LLAMA_MAKEFILE
2
+ $(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md)
3
+ endif
4
+
5
+ # Define the default target now so that it is always the first target
6
+ BUILD_TARGETS = \
7
+ libllava.a \
8
+ llama-batched \
9
+ llama-batched-bench \
10
+ llama-bench \
11
+ llama-cli \
12
+ llama-convert-llama2c-to-ggml \
13
+ llama-embedding \
14
+ llama-eval-callback \
15
+ llama-export-lora \
16
+ llama-gbnf-validator \
17
+ llama-gguf \
18
+ llama-gguf-hash \
19
+ llama-gguf-split \
20
+ llama-gritlm \
21
+ llama-imatrix \
22
+ llama-infill \
23
+ llama-llava-cli \
24
+ llama-minicpmv-cli\
25
+ llama-qwen2vl-cli\
26
+ llama-lookahead \
27
+ llama-lookup \
28
+ llama-lookup-create \
29
+ llama-lookup-merge \
30
+ llama-lookup-stats \
31
+ llama-parallel \
32
+ llama-passkey \
33
+ llama-perplexity \
34
+ llama-q8dot \
35
+ llama-quantize \
36
+ llama-quantize-stats \
37
+ llama-retrieval \
38
+ llama-save-load-state \
39
+ llama-server \
40
+ llama-simple \
41
+ llama-simple-chat \
42
+ llama-run \
43
+ llama-speculative \
44
+ llama-tokenize \
45
+ llama-vdot \
46
+ llama-cvector-generator \
47
+ llama-gen-docs \
48
+ tests/test-c.o
49
+
50
+ # Binaries only useful for tests
51
+ TEST_TARGETS = \
52
+ tests/test-arg-parser \
53
+ tests/test-autorelease \
54
+ tests/test-backend-ops \
55
+ tests/test-chat \
56
+ tests/test-chat-template \
57
+ tests/test-double-float \
58
+ tests/test-grammar-integration \
59
+ tests/test-grammar-parser \
60
+ tests/test-json-schema-to-grammar \
61
+ tests/test-llama-grammar \
62
+ tests/test-log \
63
+ tests/test-model-load-cancel \
64
+ tests/test-quantize-fns \
65
+ tests/test-quantize-perf \
66
+ tests/test-rope \
67
+ tests/test-sampling \
68
+ tests/test-tokenizer-0 \
69
+ tests/test-tokenizer-1-bpe \
70
+ tests/test-tokenizer-1-spm
71
+ # tests/test-opt \
72
+
73
+ # Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
74
+ LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \
75
+ simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
76
+ retrieval speculative infill tokenize parallel export-lora lookahead lookup passkey gritlm
77
+
78
+ # Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them.
79
+ # We don't want to clutter things too much, so we only build replacements for the most commonly used binaries.
80
+ LEGACY_TARGETS_BUILD = main quantize perplexity embedding server
81
+
82
+ # Deprecation aliases
83
+ ifdef LLAMA_CUBLAS
84
+ $(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
85
+ endif
86
+
87
+ ifdef LLAMA_CUDA
88
+ GGML_CUDA := 1
89
+ DEPRECATE_WARNING := 1
90
+ endif
91
+
92
+ ifdef LLAMA_KOMPUTE
93
+ GGML_KOMPUTE := 1
94
+ DEPRECATE_WARNING := 1
95
+ endif
96
+
97
+ ifdef LLAMA_METAL
98
+ GGML_METAL := 1
99
+ DEPRECATE_WARNING := 1
100
+ endif
101
+
102
+ ifdef LLAMA_RPC
103
+ GGML_RPC := 1
104
+ DEPRECATE_WARNING := 1
105
+ endif
106
+
107
+ ifdef LLAMA_SYCL
108
+ GGML_SYCL := 1
109
+ DEPRECATE_WARNING := 1
110
+ endif
111
+
112
+ ifdef LLAMA_SYCL_F16
113
+ GGML_SYCL_F16 := 1
114
+ DEPRECATE_WARNING := 1
115
+ endif
116
+
117
+ ifdef LLAMA_OPENBLAS
118
+ GGML_OPENBLAS := 1
119
+ DEPRECATE_WARNING := 1
120
+ endif
121
+
122
+ ifdef LLAMA_OPENBLAS64
123
+ GGML_OPENBLAS64 := 1
124
+ DEPRECATE_WARNING := 1
125
+ endif
126
+
127
+ ifdef LLAMA_BLIS
128
+ GGML_BLIS := 1
129
+ DEPRECATE_WARNING := 1
130
+ endif
131
+
132
+ ifdef LLAMA_NO_LLAMAFILE
133
+ GGML_NO_LLAMAFILE := 1
134
+ DEPRECATE_WARNING := 1
135
+ endif
136
+
137
+ ifdef LLAMA_NO_ACCELERATE
138
+ GGML_NO_ACCELERATE := 1
139
+ DEPRECATE_WARNING := 1
140
+ endif
141
+
142
+ ifdef LLAMA_NO_OPENMP
143
+ GGML_NO_OPENMP := 1
144
+ DEPRECATE_WARNING := 1
145
+ endif
146
+
147
+ ifdef LLAMA_NO_METAL
148
+ GGML_NO_METAL := 1
149
+ DEPRECATE_WARNING := 1
150
+ endif
151
+
152
+ ifdef LLAMA_DISABLE_LOGS
153
+ REMOVE_WARNING := 1
154
+ endif
155
+
156
+ ifdef LLAMA_SERVER_VERBOSE
157
+ REMOVE_WARNING := 1
158
+ endif
159
+
160
+ ifndef UNAME_S
161
+ UNAME_S := $(shell uname -s)
162
+ endif
163
+
164
+ ifndef UNAME_P
165
+ UNAME_P := $(shell uname -p)
166
+ endif
167
+
168
+ ifndef UNAME_M
169
+ UNAME_M := $(shell uname -m)
170
+ endif
171
+
172
+ # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
173
+ # of non-gcc compilers don't have to provide g++ alias or wrapper.
174
+ DEFCC := cc
175
+ DEFCXX := c++
176
+ ifeq ($(origin CC),default)
177
+ CC := $(DEFCC)
178
+ endif
179
+ ifeq ($(origin CXX),default)
180
+ CXX := $(DEFCXX)
181
+ endif
182
+
183
+ # Mac OS + Arm can report x86_64
184
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
185
+ ifeq ($(UNAME_S),Darwin)
186
+ ifndef GGML_NO_METAL
187
+ GGML_METAL := 1
188
+ endif
189
+
190
+ GGML_NO_OPENMP := 1
191
+
192
+ ifneq ($(UNAME_P),arm)
193
+ SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
194
+ ifeq ($(SYSCTL_M),1)
195
+ # UNAME_P := arm
196
+ # UNAME_M := arm64
197
+ warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
198
+ endif
199
+ endif
200
+ endif
201
+
202
+ ifdef GGML_METAL
203
+ GGML_METAL_EMBED_LIBRARY := 1
204
+ endif
205
+
206
+ ifdef GGML_RPC
207
+ BUILD_TARGETS += rpc-server
208
+ endif
209
+
210
+ ifdef GGML_VULKAN
211
+ BUILD_TARGETS += vulkan-shaders-gen
212
+ endif
213
+
214
+ default: $(BUILD_TARGETS) $(LEGACY_TARGETS_BUILD)
215
+
216
+ test: $(TEST_TARGETS)
217
+ @failures=0; \
218
+ for test_target in $(TEST_TARGETS); do \
219
+ if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
220
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
221
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
222
+ ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
223
+ ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
224
+ ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
225
+ ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
226
+ ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
227
+ ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
228
+ elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
229
+ continue; \
230
+ elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
231
+ continue; \
232
+ else \
233
+ echo "Running test $$test_target..."; \
234
+ ./$$test_target; \
235
+ fi; \
236
+ if [ $$? -ne 0 ]; then \
237
+ printf 'Test %s FAILED!\n\n' $$test_target; \
238
+ failures=$$(( failures + 1 )); \
239
+ else \
240
+ printf 'Test %s passed.\n\n' $$test_target; \
241
+ fi; \
242
+ done; \
243
+ if [ $$failures -gt 0 ]; then \
244
+ printf '\n%s tests failed.\n' $$failures; \
245
+ exit 1; \
246
+ fi
247
+ @echo 'All tests passed.'
248
+
249
+ all: $(BUILD_TARGETS) $(TEST_TARGETS) $(LEGACY_TARGETS_BUILD)
250
+
251
+ ifdef RISCV_CROSS_COMPILE
252
+ CC := riscv64-unknown-linux-gnu-gcc
253
+ CXX := riscv64-unknown-linux-gnu-g++
254
+ endif
255
+
256
+ #
257
+ # Compile flags
258
+ #
259
+
260
+ # keep standard at C11 and C++17
261
+ MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU
262
+ MK_CFLAGS = -std=c11 -fPIC
263
+ MK_CXXFLAGS = -std=c++17 -fPIC
264
+ MK_NVCCFLAGS = -std=c++17
265
+
266
+ ifdef LLAMA_NO_CCACHE
267
+ GGML_NO_CCACHE := 1
268
+ DEPRECATE_WARNING := 1
269
+ endif
270
+
271
+ ifndef GGML_NO_CCACHE
272
+ CCACHE := $(shell which ccache)
273
+ ifdef CCACHE
274
+ export CCACHE_SLOPPINESS = time_macros
275
+ $(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE.)
276
+ CC := $(CCACHE) $(CC)
277
+ CXX := $(CCACHE) $(CXX)
278
+ else
279
+ $(info I ccache not found. Consider installing it for faster compilation.)
280
+ endif # CCACHE
281
+ endif # GGML_NO_CCACHE
282
+
283
+ # clock_gettime came in POSIX.1b (1993)
284
+ # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
285
+ # posix_memalign came in POSIX.1-2001 / SUSv3
286
+ # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
287
+ MK_CPPFLAGS += -D_XOPEN_SOURCE=600
288
+
289
+ # Somehow in OpenBSD whenever POSIX conformance is specified
290
+ # some string functions rely on locale_t availability,
291
+ # which was introduced in POSIX.1-2008, forcing us to go higher
292
+ ifeq ($(UNAME_S),OpenBSD)
293
+ MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
294
+ endif
295
+
296
+ # Data types, macros and functions related to controlling CPU affinity and
297
+ # some memory allocation are available on Linux through GNU extensions in libc
298
+ ifeq ($(UNAME_S),Linux)
299
+ MK_CPPFLAGS += -D_GNU_SOURCE
300
+ MK_LDFLAGS += -ldl
301
+ endif
302
+
303
+ # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
304
+ # and on macOS its availability depends on enabling Darwin extensions
305
+ # similarly on DragonFly, enabling BSD extensions is necessary
306
+ ifeq ($(UNAME_S),Darwin)
307
+ MK_CPPFLAGS += -D_DARWIN_C_SOURCE
308
+ endif
309
+ ifeq ($(UNAME_S),DragonFly)
310
+ MK_CPPFLAGS += -D__BSD_VISIBLE
311
+ endif
312
+
313
+ # alloca is a non-standard interface that is not visible on BSDs when
314
+ # POSIX conformance is specified, but not all of them provide a clean way
315
+ # to enable it in such cases
316
+ ifeq ($(UNAME_S),FreeBSD)
317
+ MK_CPPFLAGS += -D__BSD_VISIBLE
318
+ endif
319
+ ifeq ($(UNAME_S),NetBSD)
320
+ MK_CPPFLAGS += -D_NETBSD_SOURCE
321
+ endif
322
+ ifeq ($(UNAME_S),OpenBSD)
323
+ MK_CPPFLAGS += -D_BSD_SOURCE
324
+ endif
325
+
326
+ ifdef GGML_SCHED_MAX_COPIES
327
+ MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES)
328
+ endif
329
+
330
+ ifdef LLAMA_DEBUG
331
+ MK_CFLAGS += -O0 -g
332
+ MK_CXXFLAGS += -O0 -g
333
+ MK_LDFLAGS += -g
334
+ MK_NVCCFLAGS += -O0 -g
335
+
336
+ ifeq ($(UNAME_S),Linux)
337
+ MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
338
+ endif
339
+ else
340
+ MK_CPPFLAGS += -DNDEBUG
341
+ MK_CFLAGS += -O3 -g
342
+ MK_CXXFLAGS += -O3 -g
343
+ MK_NVCCFLAGS += -O3 -g
344
+ endif
345
+
346
+ ifdef LLAMA_SANITIZE_THREAD
347
+ MK_CFLAGS += -fsanitize=thread -g
348
+ MK_CXXFLAGS += -fsanitize=thread -g
349
+ MK_LDFLAGS += -fsanitize=thread -g
350
+ endif
351
+
352
+ ifdef LLAMA_SANITIZE_ADDRESS
353
+ MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
354
+ MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
355
+ MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
356
+ endif
357
+
358
+ ifdef LLAMA_SANITIZE_UNDEFINED
359
+ MK_CFLAGS += -fsanitize=undefined -g
360
+ MK_CXXFLAGS += -fsanitize=undefined -g
361
+ MK_LDFLAGS += -fsanitize=undefined -g
362
+ endif
363
+
364
+ ifdef LLAMA_SERVER_SSL
365
+ MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
366
+ MK_LDFLAGS += -lssl -lcrypto
367
+ endif
368
+
369
+ ifndef GGML_NO_CPU_AARCH64
370
+ MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
371
+ endif
372
+
373
+ # warnings
374
+ WARN_FLAGS = \
375
+ -Wall \
376
+ -Wextra \
377
+ -Wpedantic \
378
+ -Wcast-qual \
379
+ -Wno-unused-function
380
+
381
+ MK_CFLAGS += \
382
+ $(WARN_FLAGS) \
383
+ -Wshadow \
384
+ -Wstrict-prototypes \
385
+ -Wpointer-arith \
386
+ -Wmissing-prototypes \
387
+ -Werror=implicit-int \
388
+ -Werror=implicit-function-declaration
389
+
390
+ MK_CXXFLAGS += \
391
+ $(WARN_FLAGS) \
392
+ -Wmissing-declarations \
393
+ -Wmissing-noreturn
394
+
395
+ ifeq ($(LLAMA_FATAL_WARNINGS),1)
396
+ MK_CFLAGS += -Werror
397
+ MK_CXXFLAGS += -Werror
398
+ endif
399
+
400
+ # this version of Apple ld64 is buggy
401
+ ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
402
+ MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
403
+ endif
404
+
405
+ # OS specific
406
+ # TODO: support Windows
407
+ ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
408
+ MK_CFLAGS += -pthread
409
+ MK_CXXFLAGS += -pthread
410
+ endif
411
+
412
+ # detect Windows
413
+ ifneq ($(findstring _NT,$(UNAME_S)),)
414
+ _WIN32 := 1
415
+ endif
416
+
417
+ # library name prefix
418
+ ifneq ($(_WIN32),1)
419
+ LIB_PRE := lib
420
+ endif
421
+
422
+ # Dynamic Shared Object extension
423
+ ifneq ($(_WIN32),1)
424
+ DSO_EXT := .so
425
+ else
426
+ DSO_EXT := .dll
427
+ endif
428
+
429
+ # Windows Sockets 2 (Winsock) for network-capable apps
430
+ ifeq ($(_WIN32),1)
431
+ LWINSOCK2 := -lws2_32
432
+ endif
433
+
434
+ ifdef LLAMA_GPROF
435
+ MK_CFLAGS += -pg
436
+ MK_CXXFLAGS += -pg
437
+ endif
438
+
439
+ # Architecture specific
440
+ # TODO: probably these flags need to be tweaked on some architectures
441
+ # feel free to update the Makefile for your architecture and send a pull request or issue
442
+
443
+ ifndef RISCV_CROSS_COMPILE
444
+
445
+ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
446
+ # Use all CPU extensions that are available:
447
+ MK_CFLAGS += -march=native -mtune=native
448
+ HOST_CXXFLAGS += -march=native -mtune=native
449
+
450
+ # Usage AMX build test
451
+ #MK_CFLAGS += -march=graniterapids -mtune=graniterapids
452
+ #HOST_CXXFLAGS += -march=graniterapids -mtune=graniterapids
453
+
454
+ # Usage AVX-only
455
+ #MK_CFLAGS += -mfma -mf16c -mavx
456
+ #MK_CXXFLAGS += -mfma -mf16c -mavx
457
+
458
+ # Usage SSSE3-only (Not is SSE3!)
459
+ #MK_CFLAGS += -mssse3
460
+ #MK_CXXFLAGS += -mssse3
461
+ endif
462
+
463
+ ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
464
+ # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
465
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
466
+ # https://github.com/ggml-org/llama.cpp/issues/2922
467
+ MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
468
+ MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
469
+
470
+ # Target Windows 8 for PrefetchVirtualMemory
471
+ MK_CPPFLAGS += -D_WIN32_WINNT=0x602
472
+ endif
473
+
474
+ ifneq ($(filter aarch64%,$(UNAME_M)),)
475
+ # Apple M1, M2, etc.
476
+ # Raspberry Pi 3, 4, Zero 2 (64-bit)
477
+ # Nvidia Jetson
478
+ MK_CFLAGS += -mcpu=native
479
+ MK_CXXFLAGS += -mcpu=native
480
+ JETSON_RELEASE_INFO = $(shell jetson_release)
481
+ ifdef JETSON_RELEASE_INFO
482
+ ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
483
+ JETSON_EOL_MODULE_DETECT = 1
484
+ CC = aarch64-unknown-linux-gnu-gcc
485
+ cxx = aarch64-unknown-linux-gnu-g++
486
+ endif
487
+ endif
488
+ endif
489
+
490
+ ifneq ($(filter armv6%,$(UNAME_M)),)
491
+ # Raspberry Pi 1, Zero
492
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
493
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
494
+ endif
495
+
496
+ ifneq ($(filter armv7%,$(UNAME_M)),)
497
+ # Raspberry Pi 2
498
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
499
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
500
+ endif
501
+
502
+ ifneq ($(filter armv8%,$(UNAME_M)),)
503
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
504
+ MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
505
+ MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
506
+ endif
507
+
508
+ ifneq ($(filter ppc64%,$(UNAME_M)),)
509
+ POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
510
+ ifneq (,$(findstring POWER9,$(POWER9_M)))
511
+ MK_CFLAGS += -mcpu=power9
512
+ MK_CXXFLAGS += -mcpu=power9
513
+ endif
514
+ endif
515
+
516
+ ifneq ($(filter ppc64le%,$(UNAME_M)),)
517
+ MK_CFLAGS += -mcpu=powerpc64le
518
+ MK_CXXFLAGS += -mcpu=powerpc64le
519
+ CUDA_POWER_ARCH = 1
520
+ endif
521
+
522
+ ifneq ($(filter loongarch64%,$(UNAME_M)),)
523
+ MK_CFLAGS += -mlasx
524
+ MK_CXXFLAGS += -mlasx
525
+ endif
526
+
527
+ ifneq ($(filter riscv64%,$(UNAME_M)),)
528
+ MK_CFLAGS += -march=rv64gcv -mabi=lp64d
529
+ MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
530
+ endif
531
+
532
+ else # RISC-V CROSS COMPILATION
533
+ MK_CFLAGS += -march=rv64gcv -mabi=lp64d
534
+ MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
535
+ endif
536
+
537
+ ifndef GGML_NO_ACCELERATE
538
+ # Mac OS - include Accelerate framework.
539
+ # `-framework Accelerate` works both with Apple Silicon and Mac Intel
540
+ ifeq ($(UNAME_S),Darwin)
541
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
542
+ MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
543
+ MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
544
+ MK_LDFLAGS += -framework Accelerate
545
+ OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
546
+ endif
547
+ endif # GGML_NO_ACCELERATE
548
+
549
+ ifndef GGML_NO_OPENMP
550
+ MK_CPPFLAGS += -DGGML_USE_OPENMP
551
+ MK_CFLAGS += -fopenmp
552
+ MK_CXXFLAGS += -fopenmp
553
+ endif # GGML_NO_OPENMP
554
+
555
+ ifdef GGML_OPENBLAS
556
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
557
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
558
+ MK_LDFLAGS += $(shell pkg-config --libs openblas)
559
+ OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
560
+ endif # GGML_OPENBLAS
561
+
562
+ ifdef GGML_OPENBLAS64
563
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
564
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
565
+ MK_LDFLAGS += $(shell pkg-config --libs openblas64)
566
+ OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
567
+ endif # GGML_OPENBLAS64
568
+
569
+ ifdef GGML_BLIS
570
+ MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis
571
+ MK_LDFLAGS += -lblis -L/usr/local/lib
572
+ OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
573
+ endif # GGML_BLIS
574
+
575
+ ifdef GGML_NVPL
576
+ MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas
577
+ MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp
578
+ OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
579
+ endif # GGML_NVPL
580
+
581
+ ifndef GGML_NO_LLAMAFILE
582
+ MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
583
+ OBJ_GGML_EXT += ggml/src/ggml-cpu/llamafile/sgemm.o
584
+ endif
585
+
586
+ ifndef GGML_NO_AMX
587
+ MK_CPPFLAGS += -DGGML_USE_AMX
588
+ OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o
589
+ endif
590
+
591
+ # only necessary for the CPU backend files
592
+ MK_CPPFLAGS += -Iggml/src/ggml-cpu
593
+
594
+ ifdef GGML_RPC
595
+ MK_CPPFLAGS += -DGGML_USE_RPC
596
+ OBJ_GGML_EXT += ggml/src/ggml-rpc.o
597
+ endif # GGML_RPC
598
+
599
+ OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu))
600
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
601
+
602
+ ifdef GGML_CUDA_FA_ALL_QUANTS
603
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu))
604
+ else
605
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
606
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
607
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
608
+ endif # GGML_CUDA_FA_ALL_QUANTS
609
+
610
+ ifdef GGML_CUDA
611
+ ifneq ('', '$(wildcard /opt/cuda)')
612
+ CUDA_PATH ?= /opt/cuda
613
+ else
614
+ CUDA_PATH ?= /usr/local/cuda
615
+ endif
616
+
617
+ MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
618
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
619
+ MK_NVCCFLAGS += -use_fast_math
620
+
621
+ OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o
622
+ OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
623
+ OBJ_GGML_EXT += $(OBJ_CUDA_TMPL)
624
+
625
+ ifdef LLAMA_FATAL_WARNINGS
626
+ MK_NVCCFLAGS += -Werror all-warnings
627
+ endif # LLAMA_FATAL_WARNINGS
628
+
629
+ ifndef JETSON_EOL_MODULE_DETECT
630
+ MK_NVCCFLAGS += --forward-unknown-to-host-compiler
631
+ endif # JETSON_EOL_MODULE_DETECT
632
+
633
+ ifdef LLAMA_DEBUG
634
+ MK_NVCCFLAGS += -lineinfo
635
+ endif # LLAMA_DEBUG
636
+
637
+ ifdef GGML_CUDA_DEBUG
638
+ MK_NVCCFLAGS += --device-debug
639
+ endif # GGML_CUDA_DEBUG
640
+
641
+ ifdef GGML_CUDA_NVCC
642
+ NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
643
+ else
644
+ NVCC = $(CCACHE) nvcc
645
+ endif # GGML_CUDA_NVCC
646
+
647
+ ifdef CUDA_DOCKER_ARCH
648
+ MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
649
+ else ifndef CUDA_POWER_ARCH
650
+ MK_NVCCFLAGS += -arch=native
651
+ endif # CUDA_DOCKER_ARCH
652
+
653
+ ifdef GGML_CUDA_FORCE_MMQ
654
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
655
+ endif # GGML_CUDA_FORCE_MMQ
656
+
657
+ ifdef GGML_CUDA_FORCE_CUBLAS
658
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_CUBLAS
659
+ endif # GGML_CUDA_FORCE_CUBLAS
660
+
661
+ ifdef GGML_CUDA_F16
662
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
663
+ endif # GGML_CUDA_F16
664
+
665
+ ifdef GGML_CUDA_DMMV_F16
666
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
667
+ endif # GGML_CUDA_DMMV_F16
668
+
669
+ ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
670
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
671
+ else
672
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
673
+ endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
674
+
675
+ ifdef GGML_CUDA_NO_PEER_COPY
676
+ MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
677
+ endif # GGML_CUDA_NO_PEER_COPY
678
+
679
+ ifdef GGML_CUDA_CCBIN
680
+ MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN)
681
+ endif # GGML_CUDA_CCBIN
682
+
683
+ ifdef GGML_CUDA_NO_FA
684
+ MK_NVCCFLAGS += -DGGML_CUDA_NO_FA
685
+ endif # GGML_CUDA_NO_FA
686
+
687
+ ifdef GGML_CUDA_FA_ALL_QUANTS
688
+ MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
689
+ endif # GGML_CUDA_FA_ALL_QUANTS
690
+
691
+ ifdef JETSON_EOL_MODULE_DETECT
692
+ define NVCC_COMPILE
693
+ $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
694
+ endef # NVCC_COMPILE
695
+ else
696
+ define NVCC_COMPILE
697
+ $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
698
+ endef # NVCC_COMPILE
699
+ endif # JETSON_EOL_MODULE_DETECT
700
+
701
+ ggml/src/ggml-cuda/%.o: \
702
+ ggml/src/ggml-cuda/%.cu \
703
+ ggml/include/ggml.h \
704
+ ggml/src/ggml-common.h \
705
+ ggml/src/ggml-cuda/common.cuh
706
+ $(NVCC_COMPILE)
707
+
708
+ ggml/src/ggml-cuda/ggml-cuda.o: \
709
+ ggml/src/ggml-cuda/ggml-cuda.cu \
710
+ ggml/include/ggml-cuda.h \
711
+ ggml/include/ggml.h \
712
+ ggml/include/ggml-backend.h \
713
+ ggml/src/ggml-backend-impl.h \
714
+ ggml/src/ggml-common.h \
715
+ $(wildcard ggml/src/ggml-cuda/*.cuh)
716
+ $(NVCC_COMPILE)
717
+ endif # GGML_CUDA
718
+
719
+ ifdef GGML_VULKAN
720
+ MK_CPPFLAGS += -DGGML_USE_VULKAN
721
+ MK_LDFLAGS += $(shell pkg-config --libs vulkan)
722
+ OBJ_GGML_EXT += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o
723
+
724
+ ifdef GGML_VULKAN_CHECK_RESULTS
725
+ MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
726
+ endif
727
+
728
+ ifdef GGML_VULKAN_DEBUG
729
+ MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
730
+ endif
731
+
732
+ ifdef GGML_VULKAN_MEMORY_DEBUG
733
+ MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
734
+ endif
735
+
736
+ ifdef GGML_VULKAN_PERF
737
+ MK_CPPFLAGS += -DGGML_VULKAN_PERF
738
+ endif
739
+
740
+ ifdef GGML_VULKAN_VALIDATE
741
+ MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
742
+ endif
743
+
744
+ ifdef GGML_VULKAN_RUN_TESTS
745
+ MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
746
+ endif
747
+
748
+ GLSLC_CMD = glslc
749
+ _ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen
750
+ _ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp
751
+ _ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp
752
+ _ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders
753
+ _ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp)
754
+
755
+ ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
756
+ $(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@
757
+
758
+ $(_ggml_vk_header): $(_ggml_vk_source)
759
+
760
+ $(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen
761
+ $(_ggml_vk_genshaders_cmd) \
762
+ --glslc $(GLSLC_CMD) \
763
+ --input-dir $(_ggml_vk_input_dir) \
764
+ --target-hpp $(_ggml_vk_header) \
765
+ --target-cpp $(_ggml_vk_source)
766
+
767
+ vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
768
+ $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
769
+
770
+ endif # GGML_VULKAN
771
+
772
+ ifdef GGML_HIP
773
+ ifeq ($(wildcard /opt/rocm),)
774
+ ROCM_PATH ?= /usr
775
+ AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
776
+ else
777
+ ROCM_PATH ?= /opt/rocm
778
+ AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
779
+ endif
780
+
781
+ MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA
782
+
783
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
784
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
785
+ MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
786
+
787
+ HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
788
+
789
+ HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
790
+
791
+ ifdef GGML_CUDA_FORCE_MMQ
792
+ HIPFLAGS += -DGGML_CUDA_FORCE_MMQ
793
+ endif # GGML_CUDA_FORCE_MMQ
794
+
795
+ ifdef GGML_CUDA_FORCE_CUBLAS
796
+ HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS
797
+ endif # GGML_CUDA_FORCE_CUBLAS
798
+
799
+ ifdef GGML_CUDA_NO_PEER_COPY
800
+ HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
801
+ endif # GGML_CUDA_NO_PEER_COPY
802
+
803
+ ifdef GGML_CUDA_NO_FA
804
+ HIPFLAGS += -DGGML_CUDA_NO_FA
805
+ endif # GGML_CUDA_NO_FA
806
+
807
+ OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o
808
+ OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
809
+ OBJ_GGML_EXT += $(OBJ_CUDA_TMPL)
810
+
811
+ ggml/src/ggml-cuda/ggml-cuda.o: \
812
+ ggml/src/ggml-cuda/ggml-cuda.cu \
813
+ ggml/include/ggml-cuda.h \
814
+ ggml/include/ggml.h \
815
+ ggml/include/ggml-backend.h \
816
+ ggml/src/ggml-backend-impl.h \
817
+ ggml/src/ggml-common.h \
818
+ $(wildcard ggml/src/ggml-cuda/*.cuh)
819
+ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
820
+
821
+ ggml/src/ggml-cuda/%.o: \
822
+ ggml/src/ggml-cuda/%.cu \
823
+ ggml/include/ggml.h \
824
+ ggml/src/ggml-common.h \
825
+ ggml/src/ggml-cuda/common.cuh
826
+ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
827
+ endif # GGML_HIP
828
+
829
+ ifdef GGML_MUSA
830
+ ifeq ($(wildcard /opt/musa),)
831
+ MUSA_PATH ?= /usr/local/musa
832
+ else
833
+ MUSA_PATH ?= /opt/musa
834
+ endif
835
+ MUSA_ARCHITECTURES ?= 21;22;31
836
+
837
+ MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
838
+ MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
839
+ MK_LDFLAGS += -lmusa -lmusart -lmublas
840
+
841
+ ifndef GGML_NO_OPENMP
842
+ # For Ubuntu Focal
843
+ MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
844
+ MK_LDFLAGS += -L/usr/lib/llvm-10/lib
845
+ # For Ubuntu Jammy
846
+ MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include
847
+ MK_LDFLAGS += -L/usr/lib/llvm-14/lib
848
+ endif # GGML_NO_OPENMP
849
+
850
+ CC := $(MUSA_PATH)/bin/clang
851
+ CXX := $(MUSA_PATH)/bin/clang++
852
+ MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc
853
+
854
+ MUSAFLAGS = -fsigned-char -x musa -mtgpu
855
+ MUSAFLAGS += $(foreach arch,$(subst ;, ,$(MUSA_ARCHITECTURES)),--cuda-gpu-arch=mp_$(arch))
856
+
857
+ ifdef GGML_CUDA_FORCE_MMQ
858
+ MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ
859
+ endif # GGML_CUDA_FORCE_MMQ
860
+
861
+ ifdef GGML_CUDA_FORCE_CUBLAS
862
+ MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS
863
+ endif # GGML_CUDA_FORCE_CUBLAS
864
+
865
+ ifdef GGML_CUDA_F16
866
+ MUSAFLAGS += -DGGML_CUDA_F16
867
+ endif # GGML_CUDA_F16
868
+
869
+ ifdef GGML_CUDA_DMMV_F16
870
+ MUSAFLAGS += -DGGML_CUDA_F16
871
+ endif # GGML_CUDA_DMMV_F16
872
+
873
+ ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
874
+ MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
875
+ else
876
+ MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
877
+ endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
878
+
879
+ ifdef GGML_CUDA_NO_PEER_COPY
880
+ MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY
881
+ endif # GGML_CUDA_NO_PEER_COPY
882
+
883
+ ifdef GGML_CUDA_NO_FA
884
+ MUSAFLAGS += -DGGML_CUDA_NO_FA
885
+ endif # GGML_CUDA_NO_FA
886
+
887
+ ifdef GGML_CUDA_FA_ALL_QUANTS
888
+ MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
889
+ endif # GGML_CUDA_FA_ALL_QUANTS
890
+
891
+ OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o
892
+ OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
893
+ OBJ_GGML_EXT += $(OBJ_CUDA_TMPL)
894
+
895
+ ggml/src/ggml-cuda/ggml-cuda.o: \
896
+ ggml/src/ggml-cuda/ggml-cuda.cu \
897
+ ggml/include/ggml-cuda.h \
898
+ ggml/include/ggml.h \
899
+ ggml/include/ggml-backend.h \
900
+ ggml/src/ggml-backend-impl.h \
901
+ ggml/src/ggml-common.h \
902
+ $(wildcard ggml/src/ggml-cuda/*.cuh)
903
+ $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $<
904
+
905
+ ggml/src/ggml-cuda/%.o: \
906
+ ggml/src/ggml-cuda/%.cu \
907
+ ggml/include/ggml.h \
908
+ ggml/src/ggml-common.h \
909
+ ggml/src/ggml-cuda/common.cuh
910
+ $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $<
911
+ endif # GGML_MUSA
912
+
913
+ ifdef GGML_METAL
914
+ MK_CPPFLAGS += -DGGML_USE_METAL
915
+ MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
916
+ OBJ_GGML_EXT += ggml/src/ggml-metal/ggml-metal.o
917
+
918
+ ifdef GGML_METAL_USE_BF16
919
+ MK_CPPFLAGS += -DGGML_METAL_USE_BF16
920
+ endif # GGML_METAL_USE_BF16
921
+ ifdef GGML_METAL_NDEBUG
922
+ MK_CPPFLAGS += -DGGML_METAL_NDEBUG
923
+ endif
924
+ ifdef GGML_METAL_EMBED_LIBRARY
925
+ MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
926
+ OBJ_GGML_EXT += ggml/src/ggml-metal-embed.o
927
+ endif
928
+ endif # GGML_METAL
929
+
930
+ ifdef GGML_METAL
931
+ ggml/src/ggml-metal/ggml-metal.o: \
932
+ ggml/src/ggml-metal/ggml-metal.m \
933
+ ggml/src/ggml-metal/ggml-metal-impl.h \
934
+ ggml/include/ggml-metal.h \
935
+ ggml/include/ggml.h
936
+ $(CC) $(CFLAGS) -c $< -o $@
937
+
938
+ ifdef GGML_METAL_EMBED_LIBRARY
939
+ ggml/src/ggml-metal-embed.o: \
940
+ ggml/src/ggml-metal/ggml-metal.metal \
941
+ ggml/src/ggml-metal/ggml-metal-impl.h \
942
+ ggml/src/ggml-common.h
943
+ @echo "Embedding Metal library"
944
+ @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
945
+ @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
946
+ $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
947
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
948
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
949
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
950
+ @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
951
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
952
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
953
+ $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
954
+ @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
955
+ @rmdir ${TEMP_ASSEMBLY}
956
+ endif
957
+ endif # GGML_METAL
958
+
959
+ DIR_GGML = ggml
960
+ DIR_LLAMA = src
961
+ DIR_COMMON = common
962
+
963
+ OBJ_GGML = \
964
+ $(DIR_GGML)/src/ggml.o \
965
+ $(DIR_GGML)/src/ggml-alloc.o \
966
+ $(DIR_GGML)/src/ggml-backend.o \
967
+ $(DIR_GGML)/src/ggml-backend-reg.o \
968
+ $(DIR_GGML)/src/ggml-opt.o \
969
+ $(DIR_GGML)/src/ggml-quants.o \
970
+ $(DIR_GGML)/src/ggml-threading.o \
971
+ $(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
972
+ $(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
973
+ $(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
974
+ $(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
975
+ $(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
976
+ $(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \
977
+ $(OBJ_GGML_EXT)
978
+
979
+ OBJ_LLAMA = \
980
+ $(DIR_LLAMA)/llama.o \
981
+ $(DIR_LLAMA)/llama-vocab.o \
982
+ $(DIR_LLAMA)/llama-grammar.o \
983
+ $(DIR_LLAMA)/llama-sampling.o \
984
+ $(DIR_LLAMA)/unicode.o \
985
+ $(DIR_LLAMA)/unicode-data.o
986
+
987
+ OBJ_COMMON = \
988
+ $(DIR_COMMON)/common.o \
989
+ $(DIR_COMMON)/arg.o \
990
+ $(DIR_COMMON)/log.o \
991
+ $(DIR_COMMON)/console.o \
992
+ $(DIR_COMMON)/ngram-cache.o \
993
+ $(DIR_COMMON)/sampling.o \
994
+ $(DIR_COMMON)/speculative.o \
995
+ $(DIR_COMMON)/chat.o \
996
+ $(DIR_COMMON)/build-info.o \
997
+ $(DIR_COMMON)/json-schema-to-grammar.o
998
+
999
+ OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
1000
+
1001
+ LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT)
1002
+ LIB_GGML_S = $(LIB_PRE)ggml.a
1003
+
1004
+ LIB_LLAMA = $(LIB_PRE)llama$(DSO_EXT)
1005
+ LIB_LLAMA_S = $(LIB_PRE)llama.a
1006
+
1007
+ LIB_COMMON = $(LIB_PRE)common$(DSO_EXT)
1008
+ LIB_COMMON_S = $(LIB_PRE)common.a
1009
+
1010
+ LIB_ALL = $(LIB_GGML) $(LIB_LLAMA) $(LIB_COMMON)
1011
+ LIB_ALL_S = $(LIB_GGML_S) $(LIB_LLAMA_S) $(LIB_COMMON_S)
1012
+
1013
+ GF_CC := $(CC)
1014
+ include scripts/get-flags.mk
1015
+
1016
+ # combine build flags with cmdline overrides
1017
+ override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
1018
+ override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
1019
+ BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
1020
+ override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
1021
+ override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
1022
+ override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
1023
+
1024
+ # identify CUDA host compiler
1025
+ ifdef GGML_CUDA
1026
+ GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
1027
+ include scripts/get-flags.mk
1028
+ CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
1029
+ endif
1030
+
1031
+ ifdef LLAMA_CURL
1032
+ override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
1033
+ override LDFLAGS := $(LDFLAGS) -lcurl
1034
+ endif
1035
+
1036
+ #
1037
+ # Print build information
1038
+ #
1039
+
1040
+ $(info I llama.cpp build info: )
1041
+ $(info I UNAME_S: $(UNAME_S))
1042
+ $(info I UNAME_P: $(UNAME_P))
1043
+ $(info I UNAME_M: $(UNAME_M))
1044
+ $(info I CFLAGS: $(CFLAGS))
1045
+ $(info I CXXFLAGS: $(CXXFLAGS))
1046
+ $(info I NVCCFLAGS: $(NVCCFLAGS))
1047
+ $(info I LDFLAGS: $(LDFLAGS))
1048
+ $(info I CC: $(shell $(CC) --version | head -n 1))
1049
+ $(info I CXX: $(shell $(CXX) --version | head -n 1))
1050
+ ifdef GGML_CUDA
1051
+ $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
1052
+ CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
1053
+ ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
1054
+
1055
+ ifndef CUDA_DOCKER_ARCH
1056
+ ifndef CUDA_POWER_ARCH
1057
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
1058
+ endif # CUDA_POWER_ARCH
1059
+ endif # CUDA_DOCKER_ARCH
1060
+
1061
+ endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
1062
+ endif # GGML_CUDA
1063
+ $(info )
1064
+
1065
+ ifdef DEPRECATE_WARNING
1066
+ $(info !!! DEPRECATION WARNING !!!)
1067
+ $(info The following LLAMA_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead)
1068
+ $(info - LLAMA_CUDA)
1069
+ $(info - LLAMA_METAL)
1070
+ $(info - LLAMA_METAL_EMBED_LIBRARY)
1071
+ $(info - LLAMA_OPENMP)
1072
+ $(info - LLAMA_RPC)
1073
+ $(info - LLAMA_SYCL)
1074
+ $(info - LLAMA_SYCL_F16)
1075
+ $(info - LLAMA_OPENBLAS)
1076
+ $(info - LLAMA_OPENBLAS64)
1077
+ $(info - LLAMA_BLIS)
1078
+ $(info - LLAMA_NO_LLAMAFILE)
1079
+ $(info - LLAMA_NO_ACCELERATE)
1080
+ $(info - LLAMA_NO_OPENMP)
1081
+ $(info - LLAMA_NO_METAL)
1082
+ $(info - LLAMA_NO_CCACHE)
1083
+ $(info )
1084
+ endif
1085
+
1086
+ ifdef REMOVE_WARNING
1087
+ $(info !!! REMOVAL WARNING !!!)
1088
+ $(info The following LLAMA_ options have been removed and are no longer supported)
1089
+ $(info - LLAMA_DISABLE_LOGS (https://github.com/ggml-org/llama.cpp/pull/9418))
1090
+ $(info - LLAMA_SERVER_VERBOSE (https://github.com/ggml-org/llama.cpp/pull/9418))
1091
+ $(info )
1092
+ endif
1093
+
1094
+ #
1095
+ # Build libraries
1096
+ #
1097
+
1098
+ # Libraries
1099
+ LIB_GGML = libggml.so
1100
+ LIB_GGML_S = libggml.a
1101
+
1102
+ LIB_LLAMA = libllama.so
1103
+ LIB_LLAMA_S = libllama.a
1104
+
1105
+ LIB_COMMON = libcommon.so
1106
+ LIB_COMMON_S = libcommon.a
1107
+
1108
+ # Targets
1109
+ BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S)
1110
+
1111
+ # Dependency files
1112
+ DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
1113
+
1114
+ # Default target
1115
+ all: $(BUILD_TARGETS)
1116
+
1117
+ # force c++ build for source file that have same name as c file
1118
+ # Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
1119
+ $(DIR_GGML)/%_cpp.o: $(DIR_GGML)/%.cpp
1120
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
1121
+
1122
+ # Rules for building object files
1123
+ $(DIR_GGML)/%.o: $(DIR_GGML)/%.c
1124
+ $(CC) $(CFLAGS) -MMD -c $< -o $@
1125
+
1126
+ $(DIR_GGML)/%.o: $(DIR_GGML)/%.cpp
1127
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
1128
+
1129
+ $(DIR_LLAMA)/%.o: $(DIR_LLAMA)/%.cpp
1130
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
1131
+
1132
+ $(DIR_COMMON)/%.o: $(DIR_COMMON)/%.cpp
1133
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
1134
+
1135
+ # Rules for building libraries
1136
+ $(LIB_GGML): $(OBJ_GGML)
1137
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1138
+
1139
+ $(LIB_GGML_S): $(OBJ_GGML)
1140
+ ar rcs $(LIB_GGML_S) $^
1141
+
1142
+ $(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML)
1143
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1144
+
1145
+ $(LIB_LLAMA_S): $(OBJ_LLAMA)
1146
+ ar rcs $(LIB_LLAMA_S) $^
1147
+
1148
+ $(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML)
1149
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1150
+
1151
+ $(LIB_COMMON_S): $(OBJ_COMMON)
1152
+ ar rcs $(LIB_COMMON_S) $^
1153
+
1154
+ # Include dependency files
1155
+ -include $(DEP_FILES)
1156
+
1157
+ # Clean generated server assets
1158
+ clean-server-assets:
1159
+ find tools/server -type f -name "*.js.hpp" -delete
1160
+ find tools/server -type f -name "*.mjs.hpp" -delete
1161
+ find tools/server -type f -name "*.css.hpp" -delete
1162
+ find tools/server -type f -name "*.html.hpp" -delete
1163
+
1164
+ # Clean rule
1165
+ clean: clean-server-assets
1166
+ rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS)
1167
+ rm -rvf *.a *.dll *.so *.dot
1168
+ find ggml src common tests examples pocs -type f -name "*.o" -delete
1169
+ find ggml src common tests examples pocs -type f -name "*.d" -delete
1170
+
1171
+ #
1172
+ # Examples
1173
+ #
1174
+
1175
+ # $< is the first prerequisite, i.e. the source file.
1176
+ # Explicitly compile this to an object file so that it can be cached with ccache.
1177
+ # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
1178
+
1179
+ # Helper function that replaces .c, .cpp, and .cu file endings with .o:
1180
+ GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
1181
+
1182
+ llama-cli: tools/main/main.cpp \
1183
+ $(OBJ_ALL)
1184
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1185
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1186
+ @echo
1187
+ @echo '==== Run ./llama-cli -h for help. ===='
1188
+ @echo
1189
+
1190
+ llama-run: tools/run/run.cpp \
1191
+ $(OBJ_ALL)
1192
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1193
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1194
+
1195
+ llama-simple: examples/simple/simple.cpp \
1196
+ $(OBJ_ALL)
1197
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1198
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1199
+
1200
+ llama-simple-chat: examples/simple-chat/simple-chat.cpp \
1201
+ $(OBJ_ALL)
1202
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1203
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1204
+
1205
+ llama-tokenize: tools/tokenize/tokenize.cpp \
1206
+ $(OBJ_ALL)
1207
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1208
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1209
+
1210
+ llama-batched: examples/batched/batched.cpp \
1211
+ $(OBJ_ALL)
1212
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1213
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1214
+
1215
+ llama-batched-bench: tools/batched-bench/batched-bench.cpp \
1216
+ $(OBJ_ALL)
1217
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1218
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1219
+
1220
+ llama-quantize: tools/quantize/quantize.cpp \
1221
+ $(OBJ_ALL)
1222
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1223
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1224
+
1225
+ llama-quantize-stats: tools/quantize-stats/quantize-stats.cpp \
1226
+ $(OBJ_ALL)
1227
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1228
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1229
+
1230
+ llama-perplexity: tools/perplexity/perplexity.cpp \
1231
+ $(OBJ_ALL)
1232
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1233
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1234
+
1235
+ llama-imatrix: tools/imatrix/imatrix.cpp \
1236
+ $(OBJ_ALL)
1237
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1238
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1239
+
1240
+ llama-embedding: examples/embedding/embedding.cpp \
1241
+ $(OBJ_ALL)
1242
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1243
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1244
+
1245
+ llama-gritlm: examples/gritlm/gritlm.cpp \
1246
+ $(OBJ_ALL)
1247
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1248
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1249
+
1250
+ llama-save-load-state: examples/save-load-state/save-load-state.cpp \
1251
+ $(OBJ_ALL)
1252
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1253
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1254
+
1255
+ llama-gguf: examples/gguf/gguf.cpp \
1256
+ $(OBJ_GGML)
1257
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1258
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1259
+
1260
+ examples/gguf-hash/deps/sha1/sha1.o: \
1261
+ examples/gguf-hash/deps/sha1/sha1.c
1262
+ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1263
+
1264
+ examples/gguf-hash/deps/xxhash/xxhash.o: \
1265
+ examples/gguf-hash/deps/xxhash/xxhash.c
1266
+ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1267
+
1268
+ examples/gguf-hash/deps/sha256/sha256.o: \
1269
+ examples/gguf-hash/deps/sha256/sha256.c
1270
+ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1271
+
1272
+ llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\
1273
+ $(OBJ_ALL)
1274
+ $(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
1275
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1276
+
1277
+ llama-gguf-split: tools/gguf-split/gguf-split.cpp \
1278
+ $(OBJ_ALL)
1279
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1280
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1281
+
1282
+ llama-eval-callback: examples/eval-callback/eval-callback.cpp \
1283
+ $(OBJ_ALL)
1284
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1285
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1286
+
1287
+ llama-cvector-generator: tools/cvector-generator/cvector-generator.cpp \
1288
+ $(OBJ_ALL)
1289
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1290
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1291
+
1292
+ llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
1293
+ $(OBJ_ALL)
1294
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1295
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1296
+
1297
+ llama-bench: tools/llama-bench/llama-bench.cpp \
1298
+ $(OBJ_ALL)
1299
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1300
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1301
+
1302
+ llama-export-lora: tools/export-lora/export-lora.cpp \
1303
+ $(OBJ_ALL)
1304
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1305
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1306
+
1307
+ llama-retrieval: examples/retrieval/retrieval.cpp \
1308
+ $(OBJ_ALL)
1309
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1310
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1311
+
1312
+ llama-speculative: examples/speculative/speculative.cpp \
1313
+ $(OBJ_ALL)
1314
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1315
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1316
+
1317
+ llama-parallel: examples/parallel/parallel.cpp \
1318
+ $(OBJ_ALL)
1319
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1320
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1321
+
1322
+ llama-lookahead: examples/lookahead/lookahead.cpp \
1323
+ $(OBJ_ALL)
1324
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1325
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1326
+
1327
+ llama-lookup: examples/lookup/lookup.cpp \
1328
+ $(OBJ_ALL)
1329
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1330
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1331
+
1332
+ llama-lookup-create: examples/lookup/lookup-create.cpp \
1333
+ $(OBJ_ALL)
1334
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1335
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1336
+
1337
+ llama-lookup-merge: examples/lookup/lookup-merge.cpp \
1338
+ $(OBJ_ALL)
1339
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1340
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1341
+
1342
+ llama-lookup-stats: examples/lookup/lookup-stats.cpp \
1343
+ $(OBJ_ALL)
1344
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1345
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1346
+
1347
+ llama-passkey: examples/passkey/passkey.cpp \
1348
+ $(OBJ_ALL)
1349
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1350
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1351
+
1352
+ llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \
1353
+ $(OBJ_ALL)
1354
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1355
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1356
+
1357
+ ifdef GGML_RPC
1358
+ rpc-server: tools/rpc/rpc-server.cpp \
1359
+ $(OBJ_GGML)
1360
+ $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
1361
+ endif # GGML_RPC
1362
+
1363
+ llama-server: \
1364
+ tools/server/server.cpp \
1365
+ tools/server/utils.hpp \
1366
+ tools/server/httplib.h \
1367
+ tools/server/index.html.hpp \
1368
+ tools/server/loading.html.hpp \
1369
+ common/chat.cpp \
1370
+ common/chat.h \
1371
+ common/chat-template.hpp \
1372
+ common/json.hpp \
1373
+ common/minja.hpp \
1374
+ $(OBJ_ALL)
1375
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1376
+ $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Itools/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
1377
+
1378
+ # Portable equivalent of `cd tools/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
1379
+ tools/server/%.hpp: tools/server/public/% FORCE Makefile
1380
+ @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
1381
+ echo "unsigned char $${NAME}[] = {" && \
1382
+ cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
1383
+ echo "};" && \
1384
+ echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
1385
+ ) > $@
1386
+
1387
+ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
1388
+ $(OBJ_ALL)
1389
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1390
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1391
+
1392
+ libllava.a: tools/mtmd/llava.cpp \
1393
+ tools/mtmd/llava.h \
1394
+ tools/mtmd/clip.cpp \
1395
+ tools/mtmd/clip.h \
1396
+ common/stb_image.h \
1397
+ common/base64.hpp \
1398
+ $(OBJ_ALL)
1399
+ $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
1400
+
1401
+ llama-llava-cli: tools/mtmd/llava-cli.cpp \
1402
+ tools/mtmd/llava.cpp \
1403
+ tools/mtmd/llava.h \
1404
+ tools/mtmd/clip.cpp \
1405
+ tools/mtmd/clip.h \
1406
+ $(OBJ_ALL)
1407
+ $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1408
+
1409
+ llama-minicpmv-cli: tools/mtmd/minicpmv-cli.cpp \
1410
+ tools/mtmd/llava.cpp \
1411
+ tools/mtmd/llava.h \
1412
+ tools/mtmd/clip.cpp \
1413
+ tools/mtmd/clip.h \
1414
+ $(OBJ_ALL)
1415
+ $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1416
+
1417
+ llama-qwen2vl-cli: tools/mtmd/qwen2vl-cli.cpp \
1418
+ tools/mtmd/llava.cpp \
1419
+ tools/mtmd/llava.h \
1420
+ tools/mtmd/clip.cpp \
1421
+ tools/mtmd/clip.h \
1422
+ $(OBJ_ALL)
1423
+ $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1424
+
1425
+ ifeq ($(UNAME_S),Darwin)
1426
+ swift: examples/batched.swift
1427
+ (cd examples/batched.swift; make build)
1428
+ endif
1429
+
1430
+ common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
1431
+ @sh scripts/build-info.sh "$(CC)" > $@.tmp
1432
+ @if ! cmp -s $@.tmp $@; then \
1433
+ mv $@.tmp $@; \
1434
+ else \
1435
+ rm $@.tmp; \
1436
+ fi
1437
+
1438
+ common/build-info.o: common/build-info.cpp
1439
+ $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
1440
+
1441
+ #
1442
+ # Tests
1443
+ #
1444
+
1445
+ tests: $(TEST_TARGETS)
1446
+
1447
+ tests/test-arg-parser: tests/test-arg-parser.cpp \
1448
+ $(OBJ_ALL)
1449
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1450
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1451
+
1452
+ tests/test-llama-grammar: tests/test-llama-grammar.cpp \
1453
+ $(OBJ_ALL)
1454
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1455
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1456
+
1457
+ tests/test-log: tests/test-log.cpp \
1458
+ $(OBJ_ALL)
1459
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1460
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1461
+
1462
+ tests/test-grammar-parser: tests/test-grammar-parser.cpp \
1463
+ $(OBJ_ALL)
1464
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1465
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1466
+
1467
+ tests/test-grammar-integration: tests/test-grammar-integration.cpp \
1468
+ $(OBJ_ALL)
1469
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1470
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1471
+
1472
+ tests/test-double-float: tests/test-double-float.cpp
1473
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1474
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1475
+
1476
+ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
1477
+ $(OBJ_ALL)
1478
+ $(CXX) $(CXXFLAGS) -Itools/server -c $< -o $(call GET_OBJ_FILE, $<)
1479
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1480
+
1481
+ tests/test-chat: tests/test-chat.cpp \
1482
+ $(OBJ_ALL)
1483
+ $(CXX) $(CXXFLAGS) -Itools/server -c $< -o $(call GET_OBJ_FILE, $<)
1484
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1485
+
1486
+ tests/test-opt: tests/test-opt.cpp \
1487
+ $(OBJ_GGML)
1488
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1489
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1490
+
1491
+ tests/test-quantize-fns: tests/test-quantize-fns.cpp \
1492
+ $(OBJ_GGML)
1493
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1494
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1495
+
1496
+ tests/test-quantize-perf: tests/test-quantize-perf.cpp \
1497
+ $(OBJ_GGML)
1498
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1499
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1500
+
1501
+ tests/test-sampling: tests/test-sampling.cpp \
1502
+ $(OBJ_ALL)
1503
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1504
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1505
+
1506
+ tests/test-tokenizer-0: tests/test-tokenizer-0.cpp \
1507
+ $(OBJ_ALL)
1508
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1509
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1510
+
1511
+ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp \
1512
+ $(OBJ_ALL)
1513
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1514
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1515
+
1516
+ tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp \
1517
+ $(OBJ_ALL)
1518
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1519
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1520
+
1521
+ tests/test-rope: tests/test-rope.cpp ggml/src/ggml.o \
1522
+ $(OBJ_GGML)
1523
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1524
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1525
+
1526
+ tests/test-c.o: tests/test-c.c include/llama.h
1527
+ $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1528
+
1529
+ tests/test-backend-ops: tests/test-backend-ops.cpp \
1530
+ $(OBJ_GGML)
1531
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1532
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1533
+
1534
+ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp tests/get-model.cpp \
1535
+ $(OBJ_ALL)
1536
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1537
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1538
+
1539
+ tests/test-autorelease: tests/test-autorelease.cpp tests/get-model.cpp \
1540
+ $(OBJ_ALL)
1541
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1542
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1543
+
1544
+ tests/test-chat-template: tests/test-chat-template.cpp \
1545
+ $(OBJ_ALL)
1546
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1547
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1548
+
1549
+ #
1550
+ # PoCs
1551
+ #
1552
+
1553
+ llama-vdot: pocs/vdot/vdot.cpp ggml/src/ggml.o \
1554
+ $(OBJ_GGML)
1555
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1556
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1557
+
1558
+ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
1559
+ $(OBJ_GGML)
1560
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1561
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1562
+
1563
+ #
1564
+ # Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
1565
+ #
1566
+ # Mark legacy binary targets as .PHONY so that they are always checked.
1567
+ .PHONY: FORCE main quantize perplexity embedding server
1568
+
1569
+ # Define the object file target
1570
+ examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp
1571
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1572
+
1573
+ # NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
1574
+ # Eventually we will want to remove these target from building all the time.
1575
+ main: examples/deprecation-warning/deprecation-warning.o
1576
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1577
+ @echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
1578
+
1579
+ server: examples/deprecation-warning/deprecation-warning.o
1580
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1581
+ @echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
1582
+
1583
+ quantize: examples/deprecation-warning/deprecation-warning.o
1584
+ ifneq (,$(wildcard quantize))
1585
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1586
+ @echo "#########"
1587
+ @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
1588
+ @echo " Remove the 'quantize' binary to remove this warning."
1589
+ @echo "#########"
1590
+ endif
1591
+
1592
+ perplexity: examples/deprecation-warning/deprecation-warning.o
1593
+ ifneq (,$(wildcard perplexity))
1594
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1595
+ @echo "#########"
1596
+ @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
1597
+ @echo " Remove the 'perplexity' binary to remove this warning."
1598
+ @echo "#########"
1599
+ endif
1600
+
1601
+ embedding: examples/deprecation-warning/deprecation-warning.o
1602
+ ifneq (,$(wildcard embedding))
1603
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1604
+ @echo "#########"
1605
+ @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
1606
+ @echo " Remove the 'embedding' binary to remove this warning."
1607
+ @echo "#########"
1608
+ endif