@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
@@ -0,0 +1,1320 @@
1
+ cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
2
+ project("llama.cpp" C CXX)
3
+ include(CheckIncludeFileCXX)
4
+
5
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
6
+
7
+ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
8
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
9
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
10
+ endif()
11
+
12
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
13
+
14
+ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
15
+ set(LLAMA_STANDALONE ON)
16
+
17
+ # configure project version
18
+ # TODO
19
+ else()
20
+ set(LLAMA_STANDALONE OFF)
21
+ endif()
22
+
23
+ if (EMSCRIPTEN)
24
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
25
+
26
+ option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
27
+ else()
28
+ if (MINGW)
29
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
30
+ else()
31
+ set(BUILD_SHARED_LIBS_DEFAULT ON)
32
+ endif()
33
+ endif()
34
+
35
+
36
+ #
37
+ # Option list
38
+ #
39
+
40
+ if (APPLE)
41
+ set(LLAMA_METAL_DEFAULT ON)
42
+ else()
43
+ set(LLAMA_METAL_DEFAULT OFF)
44
+ endif()
45
+
46
+ if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
47
+ set(LLAMA_LLAMAFILE_DEFAULT OFF)
48
+ else()
49
+ set(LLAMA_LLAMAFILE_DEFAULT ON)
50
+ endif()
51
+
52
+ # general
53
+ option(BUILD_SHARED_LIBS "build shared libraries" OFF)
54
+ option(LLAMA_STATIC "llama: static link libraries" OFF)
55
+ option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
56
+ option(LLAMA_LTO "llama: enable link time optimization" OFF)
57
+ option(LLAMA_CCACHE "llama: use ccache if available" ON)
58
+
59
+ # debug
60
+ option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
61
+ option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
62
+ option(LLAMA_GPROF "llama: enable gprof" OFF)
63
+
64
+ # build
65
+ option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF)
66
+
67
+ # sanitizers
68
+ option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
69
+ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
70
+ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
71
+
72
+ # instruction set specific
73
+ if (LLAMA_NATIVE)
74
+ set(INS_ENB OFF)
75
+ else()
76
+ set(INS_ENB ON)
77
+ endif()
78
+
79
+ option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
80
+ option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
81
+ option(LLAMA_AVX512 "llama: enable AVX512" OFF)
82
+ option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
83
+ option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
84
+ option(LLAMA_FMA "llama: enable FMA" ${INS_ENB})
85
+ # in MSVC F16C is implied with AVX2/AVX512
86
+ if (NOT MSVC)
87
+ option(LLAMA_F16C "llama: enable F16C" ${INS_ENB})
88
+ endif()
89
+
90
+ if (WIN32)
91
+ set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version")
92
+ endif()
93
+
94
+ # 3rd party libs
95
+ option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
96
+ option(LLAMA_BLAS "llama: use BLAS" OFF)
97
+ option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
98
+ set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
99
+ option(LLAMA_CUDA "llama: use CUDA" OFF)
100
+ option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
101
+ option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
102
+ option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
103
+ set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
104
+ set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
105
+ option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
106
+ set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
107
+ set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
108
+ "llama: max. batch size for using peer access")
109
+ option(LLAMA_CUDA_NO_PEER_COPY "llama: do not use peer to peer copies" OFF)
110
+ option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
111
+ option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
112
+ option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
113
+ option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
114
+ option(LLAMA_VULKAN "llama: use Vulkan" OFF)
115
+ option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF)
116
+ option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF)
117
+ option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF)
118
+ option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF)
119
+ option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
120
+ option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
121
+ option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
122
+ option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF)
123
+ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
124
+ "llama: metal minimum macOS version")
125
+ set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
126
+ option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
127
+ option(LLAMA_MPI "llama: use MPI" OFF)
128
+ option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
129
+ option(LLAMA_SYCL "llama: use SYCL" OFF)
130
+ option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
131
+ set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
132
+ option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
133
+ set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
134
+
135
+ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
136
+ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
137
+ option(LLAMA_BUILD_SERVER "llama: build server example" ON)
138
+
139
+ # add perf arguments
140
+ option(LLAMA_PERF "llama: enable perf" OFF)
141
+
142
+ # Required for relocatable CMake package
143
+ include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
144
+
145
+ #
146
+ # Compile flags
147
+ #
148
+
149
+ if (LLAMA_SYCL)
150
+ set(CMAKE_CXX_STANDARD 17)
151
+ else()
152
+ set(CMAKE_CXX_STANDARD 11)
153
+ endif()
154
+
155
+ set(CMAKE_CXX_STANDARD_REQUIRED true)
156
+ set(CMAKE_C_STANDARD 11)
157
+ set(CMAKE_C_STANDARD_REQUIRED true)
158
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
159
+
160
+ find_package(Threads REQUIRED)
161
+ include(CheckCXXCompilerFlag)
162
+
163
+ add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
164
+
165
+ # enable libstdc++ assertions for debug builds
166
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
167
+ add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
168
+ endif()
169
+
170
+ if (NOT MSVC)
171
+ if (LLAMA_SANITIZE_THREAD)
172
+ add_compile_options(-fsanitize=thread)
173
+ link_libraries (-fsanitize=thread)
174
+ endif()
175
+
176
+ if (LLAMA_SANITIZE_ADDRESS)
177
+ add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
178
+ link_libraries (-fsanitize=address)
179
+ endif()
180
+
181
+ if (LLAMA_SANITIZE_UNDEFINED)
182
+ add_compile_options(-fsanitize=undefined)
183
+ link_libraries (-fsanitize=undefined)
184
+ endif()
185
+ endif()
186
+
187
+ if (APPLE AND LLAMA_ACCELERATE)
188
+ find_library(ACCELERATE_FRAMEWORK Accelerate)
189
+ if (ACCELERATE_FRAMEWORK)
190
+ message(STATUS "Accelerate framework found")
191
+
192
+ add_compile_definitions(GGML_USE_ACCELERATE)
193
+ add_compile_definitions(ACCELERATE_NEW_LAPACK)
194
+ add_compile_definitions(ACCELERATE_LAPACK_ILP64)
195
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
196
+ else()
197
+ message(WARNING "Accelerate framework not found")
198
+ endif()
199
+ endif()
200
+
201
+ if (LLAMA_METAL)
202
+ find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
203
+ find_library(METAL_FRAMEWORK Metal REQUIRED)
204
+ find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
205
+
206
+ message(STATUS "Metal framework found")
207
+ set(GGML_HEADERS_METAL ggml-metal.h)
208
+ set(GGML_SOURCES_METAL ggml-metal.m)
209
+
210
+ add_compile_definitions(GGML_USE_METAL)
211
+ if (LLAMA_METAL_NDEBUG)
212
+ add_compile_definitions(GGML_METAL_NDEBUG)
213
+ endif()
214
+
215
+ # copy ggml-common.h and ggml-metal.metal to bin directory
216
+ configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
217
+ configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
218
+
219
+ if (LLAMA_METAL_EMBED_LIBRARY)
220
+ enable_language(ASM)
221
+ add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
222
+
223
+ set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
224
+ set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
225
+
226
+ file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
227
+
228
+ # merge ggml-common.h and ggml-metal.metal into a single file
229
+ set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
230
+ set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
231
+
232
+ add_custom_command(
233
+ OUTPUT ${METALLIB_EMBED_ASM}
234
+ COMMAND echo "Embedding Metal library"
235
+ COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
236
+ COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
237
+ COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
238
+ COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
239
+ COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
240
+ COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
241
+ COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
242
+ DEPENDS ggml-metal.metal ggml-common.h
243
+ COMMENT "Generate assembly for embedded Metal library"
244
+ )
245
+
246
+ set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
247
+ else()
248
+ if (LLAMA_METAL_SHADER_DEBUG)
249
+ # custom command to do the following:
250
+ # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
251
+ # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
252
+ #
253
+ # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
254
+ # disabling fast math is needed in order to pass tests/test-backend-ops
255
+ # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
256
+ # note: unfortunately, we have to call it default.metallib instead of ggml.metallib
257
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/1720
258
+ set(XC_FLAGS -fno-fast-math -fno-inline -g)
259
+ else()
260
+ set(XC_FLAGS -O3)
261
+ endif()
262
+
263
+ # Append macOS metal versioning flags
264
+ if (LLAMA_METAL_MACOSX_VERSION_MIN)
265
+ message(STATUS "Adding -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN} flag to metal compilation")
266
+ list(APPEND XC_FLAGS -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN})
267
+ endif()
268
+ if (LLAMA_METAL_STD)
269
+ message(STATUS "Adding -std=${LLAMA_METAL_STD} flag to metal compilation")
270
+ list(APPEND XC_FLAGS -std=${LLAMA_METAL_STD})
271
+ endif()
272
+
273
+ add_custom_command(
274
+ OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
275
+ COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
276
+ COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
277
+ COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
278
+ COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
279
+ COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
280
+ DEPENDS ggml-metal.metal ggml-common.h
281
+ COMMENT "Compiling Metal kernels"
282
+ )
283
+
284
+ add_custom_target(
285
+ ggml-metal ALL
286
+ DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
287
+ )
288
+ endif() # LLAMA_METAL_EMBED_LIBRARY
289
+
290
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
291
+ ${FOUNDATION_LIBRARY}
292
+ ${METAL_FRAMEWORK}
293
+ ${METALKIT_FRAMEWORK}
294
+ )
295
+ endif()
296
+
297
+ if (LLAMA_BLAS)
298
+ if (LLAMA_STATIC)
299
+ set(BLA_STATIC ON)
300
+ endif()
301
+ if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22)
302
+ set(BLA_SIZEOF_INTEGER 8)
303
+ endif()
304
+
305
+ set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
306
+ find_package(BLAS)
307
+
308
+ if (BLAS_FOUND)
309
+ message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
310
+
311
+ if ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
312
+ # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
313
+ # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
314
+ find_package(PkgConfig REQUIRED)
315
+ if (${LLAMA_BLAS_VENDOR} MATCHES "Generic")
316
+ pkg_check_modules(DepBLAS REQUIRED blas)
317
+ elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS")
318
+ # As of openblas v0.3.22, the 64-bit is named openblas64.pc
319
+ pkg_check_modules(DepBLAS openblas64)
320
+ if (NOT DepBLAS_FOUND)
321
+ pkg_check_modules(DepBLAS REQUIRED openblas)
322
+ endif()
323
+ elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME")
324
+ pkg_check_modules(DepBLAS REQUIRED blis)
325
+ elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS")
326
+ pkg_check_modules(DepBLAS REQUIRED blas-atlas)
327
+ elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS")
328
+ pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
329
+ elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel")
330
+ # all Intel* libraries share the same include path
331
+ pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
332
+ elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC")
333
+ # this doesn't provide pkg-config
334
+ # suggest to assign BLAS_INCLUDE_DIRS on your own
335
+ if ("${NVHPC_VERSION}" STREQUAL "")
336
+ message(WARNING "Better to set NVHPC_VERSION")
337
+ else()
338
+ set(DepBLAS_FOUND ON)
339
+ set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
340
+ endif()
341
+ endif()
342
+ if (DepBLAS_FOUND)
343
+ set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
344
+ else()
345
+ message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
346
+ " detected by pkgconfig, trying to find cblas.h from possible paths...")
347
+ find_path(BLAS_INCLUDE_DIRS
348
+ NAMES cblas.h
349
+ HINTS
350
+ /usr/include
351
+ /usr/local/include
352
+ /usr/include/openblas
353
+ /opt/homebrew/opt/openblas/include
354
+ /usr/local/opt/openblas/include
355
+ /usr/include/x86_64-linux-gnu/openblas/include
356
+ )
357
+ endif()
358
+ endif()
359
+
360
+ message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
361
+
362
+ add_compile_options(${BLAS_LINKER_FLAGS})
363
+
364
+ add_compile_definitions(GGML_USE_OPENBLAS)
365
+
366
+ if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
367
+ add_compile_definitions(GGML_BLAS_USE_MKL)
368
+ endif()
369
+
370
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
371
+ set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
372
+ else()
373
+ message(WARNING "BLAS not found, please refer to "
374
+ "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
375
+ " to set correct LLAMA_BLAS_VENDOR")
376
+ endif()
377
+ endif()
378
+
379
+ if (LLAMA_LLAMAFILE)
380
+ add_compile_definitions(GGML_USE_LLAMAFILE)
381
+
382
+ set(GGML_HEADERS_LLAMAFILE sgemm.h)
383
+ set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
384
+ endif()
385
+
386
+ if (LLAMA_QKK_64)
387
+ add_compile_definitions(GGML_QKK_64)
388
+ endif()
389
+
390
+ if (LLAMA_CUBLAS)
391
+ message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead")
392
+ set(LLAMA_CUDA ON)
393
+ endif()
394
+
395
+ if (LLAMA_CUDA)
396
+ cmake_minimum_required(VERSION 3.17)
397
+
398
+ find_package(CUDAToolkit)
399
+ if (CUDAToolkit_FOUND)
400
+ message(STATUS "CUDA found")
401
+
402
+ enable_language(CUDA)
403
+
404
+ set(GGML_HEADERS_CUDA ggml-cuda.h)
405
+
406
+ file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
407
+ list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
408
+
409
+ add_compile_definitions(GGML_USE_CUDA)
410
+ if (LLAMA_CUDA_FORCE_DMMV)
411
+ add_compile_definitions(GGML_CUDA_FORCE_DMMV)
412
+ endif()
413
+ if (LLAMA_CUDA_FORCE_MMQ)
414
+ add_compile_definitions(GGML_CUDA_FORCE_MMQ)
415
+ endif()
416
+ add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
417
+ add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
418
+ if (DEFINED LLAMA_CUDA_DMMV_Y)
419
+ add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
420
+ endif()
421
+ if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
422
+ add_compile_definitions(GGML_CUDA_F16)
423
+ endif()
424
+ add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
425
+ add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})
426
+ if (LLAMA_CUDA_NO_PEER_COPY)
427
+ add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
428
+ endif()
429
+
430
+ if (LLAMA_STATIC)
431
+ if (WIN32)
432
+ # As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
433
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
434
+ else ()
435
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
436
+ endif()
437
+ else()
438
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
439
+ endif()
440
+
441
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver)
442
+
443
+ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
444
+ # 52 == lowest CUDA 12 standard
445
+ # 60 == f16 CUDA intrinsics
446
+ # 61 == integer CUDA intrinsics
447
+ # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
448
+ if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
449
+ set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
450
+ else()
451
+ set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
452
+ #set(CMAKE_CUDA_ARCHITECTURES "") # use this to compile much faster, but only F16 models work
453
+ endif()
454
+ endif()
455
+ message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
456
+
457
+ else()
458
+ message(WARNING "CUDA not found")
459
+ endif()
460
+ endif()
461
+
462
+ if (LLAMA_MPI)
463
+ cmake_minimum_required(VERSION 3.10)
464
+ find_package(MPI)
465
+ if (MPI_C_FOUND)
466
+ message(STATUS "MPI found")
467
+
468
+ set(GGML_HEADERS_MPI ggml-mpi.h)
469
+ set(GGML_SOURCES_MPI ggml-mpi.c)
470
+
471
+ add_compile_definitions(GGML_USE_MPI)
472
+ add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
473
+
474
+ if (NOT MSVC)
475
+ add_compile_options(-Wno-cast-qual)
476
+ endif()
477
+
478
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
479
+ set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
480
+
481
+ # Even if you're only using the C header, C++ programs may bring in MPI
482
+ # C++ functions, so more linkage is needed
483
+ if (MPI_CXX_FOUND)
484
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_CXX_LIBRARIES})
485
+ endif()
486
+ else()
487
+ message(WARNING "MPI not found")
488
+ endif()
489
+ endif()
490
+
491
+ if (LLAMA_CLBLAST)
492
+ find_package(CLBlast)
493
+ if (CLBlast_FOUND)
494
+ message(STATUS "CLBlast found")
495
+
496
+ set(GGML_HEADERS_OPENCL ggml-opencl.h)
497
+ set(GGML_SOURCES_OPENCL ggml-opencl.cpp)
498
+
499
+ add_compile_definitions(GGML_USE_CLBLAST)
500
+
501
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
502
+ else()
503
+ message(WARNING "CLBlast not found")
504
+ endif()
505
+ endif()
506
+
507
+ if (LLAMA_VULKAN)
508
+ find_package(Vulkan)
509
+ if (Vulkan_FOUND)
510
+ message(STATUS "Vulkan found")
511
+
512
+ set(GGML_HEADERS_VULKAN ggml-vulkan.h)
513
+ set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)
514
+
515
+ add_compile_definitions(GGML_USE_VULKAN)
516
+
517
+ if (LLAMA_VULKAN_CHECK_RESULTS)
518
+ add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
519
+ endif()
520
+
521
+ if (LLAMA_VULKAN_DEBUG)
522
+ add_compile_definitions(GGML_VULKAN_DEBUG)
523
+ endif()
524
+
525
+ if (LLAMA_VULKAN_VALIDATE)
526
+ add_compile_definitions(GGML_VULKAN_VALIDATE)
527
+ endif()
528
+
529
+ if (LLAMA_VULKAN_RUN_TESTS)
530
+ add_compile_definitions(GGML_VULKAN_RUN_TESTS)
531
+ endif()
532
+
533
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan)
534
+ else()
535
+ message(WARNING "Vulkan not found")
536
+ endif()
537
+ endif()
538
+
539
+ if (LLAMA_HIPBLAS)
540
+ list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
541
+
542
+ if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
543
+ message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
544
+ endif()
545
+
546
+ if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
547
+ message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
548
+ endif()
549
+
550
+ find_package(hip REQUIRED)
551
+ find_package(hipblas REQUIRED)
552
+ find_package(rocblas REQUIRED)
553
+
554
+ message(STATUS "HIP and hipBLAS found")
555
+
556
+ set(GGML_HEADERS_ROCM ggml-cuda.h)
557
+
558
+ file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
559
+ list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
560
+
561
+ add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA)
562
+
563
+ if (LLAMA_HIP_UMA)
564
+ add_compile_definitions(GGML_HIP_UMA)
565
+ endif()
566
+
567
+ if (LLAMA_CUDA_FORCE_DMMV)
568
+ add_compile_definitions(GGML_CUDA_FORCE_DMMV)
569
+ endif()
570
+
571
+ if (LLAMA_CUDA_FORCE_MMQ)
572
+ add_compile_definitions(GGML_CUDA_FORCE_MMQ)
573
+ endif()
574
+
575
+ if (LLAMA_CUDA_NO_PEER_COPY)
576
+ add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
577
+ endif()
578
+
579
+ add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
580
+ add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
581
+ add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
582
+
583
+ set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
584
+
585
+ if (LLAMA_STATIC)
586
+ message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
587
+ endif()
588
+
589
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
590
+ endif()
591
+
592
+ if (LLAMA_SYCL)
593
+ if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
594
+ message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
595
+ endif()
596
+
597
+ if ( NOT DEFINED ENV{ONEAPI_ROOT})
598
+ message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
599
+ endif()
600
+ #todo: AOT
601
+
602
+ find_package(IntelSYCL REQUIRED)
603
+
604
+ message(STATUS "SYCL found")
605
+
606
+ add_compile_definitions(GGML_USE_SYCL)
607
+
608
+ if (LLAMA_SYCL_F16)
609
+ add_compile_definitions(GGML_SYCL_F16)
610
+ endif()
611
+
612
+ add_compile_options(-I./) #include DPCT
613
+ add_compile_options(-I/${SYCL_INCLUDE_DIR})
614
+
615
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
616
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
617
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
618
+ if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
619
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
620
+ endif()
621
+
622
+ set(GGML_HEADERS_SYCL ggml-sycl.h)
623
+ set(GGML_SOURCES_SYCL ggml-sycl.cpp)
624
+
625
+ if (WIN32)
626
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
627
+ else()
628
+ if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
629
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
630
+ elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
631
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl)
632
+ endif()
633
+ endif()
634
+ endif()
635
+
636
+ if (LLAMA_KOMPUTE)
637
+ add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
638
+ find_package(Vulkan COMPONENTS glslc REQUIRED)
639
+ find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
640
+ if (NOT glslc_executable)
641
+ message(FATAL_ERROR "glslc not found")
642
+ endif()
643
+
644
+ function(compile_shader)
645
+ set(options)
646
+ set(oneValueArgs)
647
+ set(multiValueArgs SOURCES)
648
+ cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
649
+ foreach(source ${compile_shader_SOURCES})
650
+ get_filename_component(filename ${source} NAME)
651
+ set(spv_file ${filename}.spv)
652
+ add_custom_command(
653
+ OUTPUT ${spv_file}
654
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
655
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
656
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
657
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
658
+ ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
659
+ COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
660
+ COMMENT "Compiling ${source} to ${spv_file}"
661
+ )
662
+
663
+ get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
664
+ set(FILE_NAME "shader${RAW_FILE_NAME}")
665
+ string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
666
+ string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
667
+ string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
668
+ set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
669
+ message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
670
+ if(CMAKE_GENERATOR MATCHES "Visual Studio")
671
+ add_custom_command(
672
+ OUTPUT ${OUTPUT_HEADER_FILE}
673
+ COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
674
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
675
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
676
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
677
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
678
+ COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
679
+ COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
680
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
681
+ DEPENDS ${spv_file} xxd
682
+ COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
683
+ )
684
+ else()
685
+ add_custom_command(
686
+ OUTPUT ${OUTPUT_HEADER_FILE}
687
+ COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
688
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
689
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
690
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
691
+ COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
692
+ COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
693
+ COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
694
+ COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
695
+ DEPENDS ${spv_file} xxd
696
+ COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
697
+ )
698
+ endif()
699
+ endforeach()
700
+ endfunction()
701
+
702
+ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
703
+ message(STATUS "Kompute found")
704
+ set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
705
+ add_subdirectory(kompute)
706
+
707
+ # Compile our shaders
708
+ compile_shader(SOURCES
709
+ kompute-shaders/op_scale.comp
710
+ kompute-shaders/op_scale_8.comp
711
+ kompute-shaders/op_add.comp
712
+ kompute-shaders/op_addrow.comp
713
+ kompute-shaders/op_mul.comp
714
+ kompute-shaders/op_silu.comp
715
+ kompute-shaders/op_relu.comp
716
+ kompute-shaders/op_gelu.comp
717
+ kompute-shaders/op_softmax.comp
718
+ kompute-shaders/op_norm.comp
719
+ kompute-shaders/op_rmsnorm.comp
720
+ kompute-shaders/op_diagmask.comp
721
+ kompute-shaders/op_mul_mat_mat_f32.comp
722
+ kompute-shaders/op_mul_mat_f16.comp
723
+ kompute-shaders/op_mul_mat_q8_0.comp
724
+ kompute-shaders/op_mul_mat_q4_0.comp
725
+ kompute-shaders/op_mul_mat_q4_1.comp
726
+ kompute-shaders/op_mul_mat_q6_k.comp
727
+ kompute-shaders/op_getrows_f16.comp
728
+ kompute-shaders/op_getrows_q4_0.comp
729
+ kompute-shaders/op_getrows_q4_1.comp
730
+ kompute-shaders/op_getrows_q6_k.comp
731
+ kompute-shaders/op_rope_f16.comp
732
+ kompute-shaders/op_rope_f32.comp
733
+ kompute-shaders/op_cpy_f16_f16.comp
734
+ kompute-shaders/op_cpy_f16_f32.comp
735
+ kompute-shaders/op_cpy_f32_f16.comp
736
+ kompute-shaders/op_cpy_f32_f32.comp
737
+ )
738
+
739
+ # Create a custom target for our generated shaders
740
+ add_custom_target(generated_shaders DEPENDS
741
+ shaderop_scale.h
742
+ shaderop_scale_8.h
743
+ shaderop_add.h
744
+ shaderop_addrow.h
745
+ shaderop_mul.h
746
+ shaderop_silu.h
747
+ shaderop_relu.h
748
+ shaderop_gelu.h
749
+ shaderop_softmax.h
750
+ shaderop_norm.h
751
+ shaderop_rmsnorm.h
752
+ shaderop_diagmask.h
753
+ shaderop_mul_mat_mat_f32.h
754
+ shaderop_mul_mat_f16.h
755
+ shaderop_mul_mat_q8_0.h
756
+ shaderop_mul_mat_q4_0.h
757
+ shaderop_mul_mat_q4_1.h
758
+ shaderop_mul_mat_q6_k.h
759
+ shaderop_getrows_f16.h
760
+ shaderop_getrows_q4_0.h
761
+ shaderop_getrows_q4_1.h
762
+ shaderop_getrows_q6_k.h
763
+ shaderop_rope_f16.h
764
+ shaderop_rope_f32.h
765
+ shaderop_cpy_f16_f16.h
766
+ shaderop_cpy_f16_f32.h
767
+ shaderop_cpy_f32_f16.h
768
+ shaderop_cpy_f32_f32.h
769
+ )
770
+
771
+ # Create a custom command that depends on the generated_shaders
772
+ add_custom_command(
773
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
774
+ COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
775
+ DEPENDS generated_shaders
776
+ COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
777
+ )
778
+
779
+ # Add the stamp to the main sources to ensure dependency tracking
780
+ set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
781
+ set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
782
+
783
+ add_compile_definitions(GGML_USE_KOMPUTE)
784
+
785
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
786
+ set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
787
+ else()
788
+ message(WARNING "Kompute not found")
789
+ endif()
790
+ endif()
791
+
792
+ if (LLAMA_CPU_HBM)
793
+ find_library(memkind memkind REQUIRED)
794
+
795
+ add_compile_definitions(GGML_USE_CPU_HBM)
796
+
797
+ target_link_libraries(ggml PUBLIC memkind)
798
+ endif()
799
+
800
+ if (LLAMA_PERF)
801
+ add_compile_definitions(GGML_PERF)
802
+ endif()
803
+
804
+ function(get_flags CCID CCVER)
805
+ set(C_FLAGS "")
806
+ set(CXX_FLAGS "")
807
+
808
+ if (CCID MATCHES "Clang")
809
+ set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
810
+ set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
811
+
812
+ if (
813
+ (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
814
+ (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
815
+ )
816
+ list(APPEND C_FLAGS -Wdouble-promotion)
817
+ endif()
818
+ elseif (CCID STREQUAL "GNU")
819
+ set(C_FLAGS -Wdouble-promotion)
820
+ set(CXX_FLAGS -Wno-array-bounds)
821
+
822
+ if (CCVER VERSION_GREATER_EQUAL 7.1.0)
823
+ list(APPEND CXX_FLAGS -Wno-format-truncation)
824
+ endif()
825
+ if (CCVER VERSION_GREATER_EQUAL 8.1.0)
826
+ list(APPEND CXX_FLAGS -Wextra-semi)
827
+ endif()
828
+ endif()
829
+
830
+ set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
831
+ set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
832
+ endfunction()
833
+
834
+ if (LLAMA_FATAL_WARNINGS)
835
+ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
836
+ list(APPEND C_FLAGS -Werror)
837
+ list(APPEND CXX_FLAGS -Werror)
838
+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
839
+ add_compile_options(/WX)
840
+ endif()
841
+ endif()
842
+
843
+ if (LLAMA_ALL_WARNINGS)
844
+ if (NOT MSVC)
845
+ list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
846
+ list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
847
+ -Werror=implicit-int -Werror=implicit-function-declaration)
848
+ list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
849
+
850
+ list(APPEND C_FLAGS ${WARNING_FLAGS})
851
+ list(APPEND CXX_FLAGS ${WARNING_FLAGS})
852
+
853
+ get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
854
+
855
+ add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
856
+ "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
857
+ else()
858
+ # todo : msvc
859
+ set(C_FLAGS "")
860
+ set(CXX_FLAGS "")
861
+ endif()
862
+ endif()
863
+
864
+ set(CUDA_CXX_FLAGS "")
865
+
866
+ if (LLAMA_CUDA)
867
+ set(CUDA_FLAGS -use_fast_math)
868
+
869
+ if (LLAMA_FATAL_WARNINGS)
870
+ list(APPEND CUDA_FLAGS -Werror all-warnings)
871
+ endif()
872
+
873
+ if (LLAMA_ALL_WARNINGS AND NOT MSVC)
874
+ set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
875
+ if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
876
+ list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
877
+ endif()
878
+
879
+ execute_process(
880
+ COMMAND ${NVCC_CMD} -Xcompiler --version
881
+ OUTPUT_VARIABLE CUDA_CCFULLVER
882
+ ERROR_QUIET
883
+ )
884
+
885
+ if (NOT CUDA_CCFULLVER MATCHES clang)
886
+ set(CUDA_CCID "GNU")
887
+ execute_process(
888
+ COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
889
+ OUTPUT_VARIABLE CUDA_CCVER
890
+ ERROR_QUIET
891
+ )
892
+ else()
893
+ if (CUDA_CCFULLVER MATCHES Apple)
894
+ set(CUDA_CCID "AppleClang")
895
+ else()
896
+ set(CUDA_CCID "Clang")
897
+ endif()
898
+ string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
899
+ endif()
900
+
901
+ message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
902
+
903
+ get_flags(${CUDA_CCID} ${CUDA_CCVER})
904
+ list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
905
+ endif()
906
+
907
+ if (NOT MSVC)
908
+ list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
909
+ endif()
910
+ endif()
911
+
912
+ if (WIN32)
913
+ add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
914
+
915
+ if (BUILD_SHARED_LIBS)
916
+ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
917
+ endif()
918
+ endif()
919
+
920
+ if (LLAMA_LTO)
921
+ include(CheckIPOSupported)
922
+ check_ipo_supported(RESULT result OUTPUT output)
923
+ if (result)
924
+ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
925
+ else()
926
+ message(WARNING "IPO is not supported: ${output}")
927
+ endif()
928
+ endif()
929
+
930
+ if (LLAMA_CCACHE)
931
+ find_program(LLAMA_CCACHE_FOUND ccache)
932
+ if (LLAMA_CCACHE_FOUND)
933
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
934
+ set(ENV{CCACHE_SLOPPINESS} time_macros)
935
+ message(STATUS "ccache found, compilation results will be cached. Disable with LLAMA_CCACHE=OFF.")
936
+ else()
937
+ message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with LLAMA_CCACHE=OFF")
938
+ endif ()
939
+ endif()
940
+
941
+ # this version of Apple ld64 is buggy
942
+ execute_process(
943
+ COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
944
+ ERROR_VARIABLE output
945
+ OUTPUT_QUIET
946
+ )
947
+
948
+ if (output MATCHES "dyld-1015\.7")
949
+ add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
950
+ endif()
951
+
952
+ # Architecture specific
953
+ # TODO: probably these flags need to be tweaked on some architectures
954
+ # feel free to update the Makefile for your architecture and send a pull request or issue
955
+ message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
956
+ if (MSVC)
957
+ string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
958
+ message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
959
+ else ()
960
+ set(CMAKE_GENERATOR_PLATFORM_LWR "")
961
+ endif ()
962
+
963
+ if (NOT MSVC)
964
+ if (LLAMA_STATIC)
965
+ add_link_options(-static)
966
+ if (MINGW)
967
+ add_link_options(-static-libgcc -static-libstdc++)
968
+ endif()
969
+ endif()
970
+ if (LLAMA_GPROF)
971
+ add_compile_options(-pg)
972
+ endif()
973
+ endif()
974
+
975
+ set(ARCH_FLAGS "")
976
+
977
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
978
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
979
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
980
+ message(STATUS "ARM detected")
981
+ if (MSVC)
982
+ add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
983
+ add_compile_definitions(__ARM_NEON)
984
+ add_compile_definitions(__ARM_FEATURE_FMA)
985
+
986
+ set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
987
+ string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
988
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
989
+ if (GGML_COMPILER_SUPPORT_DOTPROD)
990
+ add_compile_definitions(__ARM_FEATURE_DOTPROD)
991
+ endif ()
992
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
993
+ if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
994
+ add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
995
+ endif ()
996
+ set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
997
+ else()
998
+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
999
+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
1000
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
1001
+ endif()
1002
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
1003
+ # Raspberry Pi 1, Zero
1004
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
1005
+ endif()
1006
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
1007
+ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
1008
+ # Android armeabi-v7a
1009
+ list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
1010
+ else()
1011
+ # Raspberry Pi 2
1012
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
1013
+ endif()
1014
+ endif()
1015
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
1016
+ # Android arm64-v8a
1017
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
1018
+ list(APPEND ARCH_FLAGS -mno-unaligned-access)
1019
+ endif()
1020
+ endif()
1021
+ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
1022
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
1023
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
1024
+ message(STATUS "x86 detected")
1025
+ if (MSVC)
1026
+ # instruction set detection for MSVC only
1027
+ if (LLAMA_NATIVE)
1028
+ include(cmake/FindSIMD.cmake)
1029
+ endif ()
1030
+ if (LLAMA_AVX512)
1031
+ list(APPEND ARCH_FLAGS /arch:AVX512)
1032
+ # MSVC has no compile-time flags enabling specific
1033
+ # AVX512 extensions, neither it defines the
1034
+ # macros corresponding to the extensions.
1035
+ # Do it manually.
1036
+ if (LLAMA_AVX512_VBMI)
1037
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
1038
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
1039
+ endif()
1040
+ if (LLAMA_AVX512_VNNI)
1041
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
1042
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
1043
+ endif()
1044
+ elseif (LLAMA_AVX2)
1045
+ list(APPEND ARCH_FLAGS /arch:AVX2)
1046
+ elseif (LLAMA_AVX)
1047
+ list(APPEND ARCH_FLAGS /arch:AVX)
1048
+ endif()
1049
+ else()
1050
+ if (LLAMA_NATIVE)
1051
+ list(APPEND ARCH_FLAGS -march=native)
1052
+ endif()
1053
+ if (LLAMA_F16C)
1054
+ list(APPEND ARCH_FLAGS -mf16c)
1055
+ endif()
1056
+ if (LLAMA_FMA)
1057
+ list(APPEND ARCH_FLAGS -mfma)
1058
+ endif()
1059
+ if (LLAMA_AVX)
1060
+ list(APPEND ARCH_FLAGS -mavx)
1061
+ endif()
1062
+ if (LLAMA_AVX2)
1063
+ list(APPEND ARCH_FLAGS -mavx2)
1064
+ endif()
1065
+ if (LLAMA_AVX512)
1066
+ list(APPEND ARCH_FLAGS -mavx512f)
1067
+ list(APPEND ARCH_FLAGS -mavx512bw)
1068
+ endif()
1069
+ if (LLAMA_AVX512_VBMI)
1070
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
1071
+ endif()
1072
+ if (LLAMA_AVX512_VNNI)
1073
+ list(APPEND ARCH_FLAGS -mavx512vnni)
1074
+ endif()
1075
+ endif()
1076
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
1077
+ message(STATUS "PowerPC detected")
1078
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
1079
+ list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
1080
+ else()
1081
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
1082
+ #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
1083
+ endif()
1084
+ else()
1085
+ message(STATUS "Unknown architecture")
1086
+ endif()
1087
+
1088
+ add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
1089
+ add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
1090
+
1091
+ if (LLAMA_CUDA)
1092
+ list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
1093
+ list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
1094
+ if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
1095
+ list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
1096
+ endif()
1097
+ add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
1098
+ endif()
1099
+
1100
+ if (MINGW)
1101
+ # Target Windows 8 for PrefetchVirtualMemory
1102
+ add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
1103
+ endif()
1104
+
1105
+ #
1106
+ # POSIX conformance
1107
+ #
1108
+
1109
+ # clock_gettime came in POSIX.1b (1993)
1110
+ # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
1111
+ # posix_memalign came in POSIX.1-2001 / SUSv3
1112
+ # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
1113
+ add_compile_definitions(_XOPEN_SOURCE=600)
1114
+
1115
+ # Somehow in OpenBSD whenever POSIX conformance is specified
1116
+ # some string functions rely on locale_t availability,
1117
+ # which was introduced in POSIX.1-2008, forcing us to go higher
1118
+ if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
1119
+ remove_definitions(-D_XOPEN_SOURCE=600)
1120
+ add_compile_definitions(_XOPEN_SOURCE=700)
1121
+ endif()
1122
+
1123
+ # Data types, macros and functions related to controlling CPU affinity and
1124
+ # some memory allocation are available on Linux through GNU extensions in libc
1125
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
1126
+ add_compile_definitions(_GNU_SOURCE)
1127
+ endif()
1128
+
1129
+ # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
1130
+ # and on macOS its availability depends on enabling Darwin extensions
1131
+ # similarly on DragonFly, enabling BSD extensions is necessary
1132
+ if (
1133
+ CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
1134
+ CMAKE_SYSTEM_NAME MATCHES "iOS" OR
1135
+ CMAKE_SYSTEM_NAME MATCHES "tvOS" OR
1136
+ CMAKE_SYSTEM_NAME MATCHES "DragonFly"
1137
+ )
1138
+ add_compile_definitions(_DARWIN_C_SOURCE)
1139
+ endif()
1140
+
1141
+ # alloca is a non-standard interface that is not visible on BSDs when
1142
+ # POSIX conformance is specified, but not all of them provide a clean way
1143
+ # to enable it in such cases
1144
+ if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
1145
+ add_compile_definitions(__BSD_VISIBLE)
1146
+ endif()
1147
+ if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
1148
+ add_compile_definitions(_NETBSD_SOURCE)
1149
+ endif()
1150
+ if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
1151
+ add_compile_definitions(_BSD_SOURCE)
1152
+ endif()
1153
+
1154
+ #
1155
+ # libraries
1156
+ #
1157
+
1158
+ # ggml
1159
+
1160
+ add_library(ggml OBJECT
1161
+ ggml.c
1162
+ ggml.h
1163
+ ggml-alloc.c
1164
+ ggml-alloc.h
1165
+ ggml-backend.c
1166
+ ggml-backend.h
1167
+ ggml-quants.c
1168
+ ggml-quants.h
1169
+ ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1170
+ ${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1171
+ ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1172
+ ${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1173
+ ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1174
+ ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1175
+ ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1176
+ ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1177
+ ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1178
+ ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1179
+ )
1180
+
1181
+ target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
1182
+ target_compile_features (ggml PUBLIC c_std_11) # don't bump
1183
+
1184
+ target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
1185
+
1186
+ add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
1187
+
1188
+ if (BUILD_SHARED_LIBS)
1189
+ set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
1190
+ add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
1191
+ target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
1192
+ install(TARGETS ggml_shared LIBRARY)
1193
+ endif()
1194
+
1195
+ # llama
1196
+
1197
+ add_library(llama
1198
+ llama.cpp
1199
+ llama.h
1200
+ unicode.h
1201
+ unicode.cpp
1202
+ unicode-data.cpp
1203
+ )
1204
+
1205
+ target_include_directories(llama PUBLIC .)
1206
+ target_compile_features (llama PUBLIC cxx_std_11) # don't bump
1207
+
1208
+ target_link_libraries(llama PRIVATE
1209
+ ggml
1210
+ ${LLAMA_EXTRA_LIBS}
1211
+ )
1212
+
1213
+ if (BUILD_SHARED_LIBS)
1214
+ set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
1215
+ target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
1216
+ if (LLAMA_METAL)
1217
+ set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
1218
+ endif()
1219
+ endif()
1220
+
1221
+
1222
+ #
1223
+ # install
1224
+ #
1225
+
1226
+ include(GNUInstallDirs)
1227
+ include(CMakePackageConfigHelpers)
1228
+
1229
+ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}
1230
+ CACHE PATH "Location of header files")
1231
+ set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}
1232
+ CACHE PATH "Location of library files")
1233
+ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}
1234
+ CACHE PATH "Location of binary files")
1235
+ set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
1236
+ set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
1237
+ set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
1238
+ get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
1239
+
1240
+ configure_package_config_file(
1241
+ ${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in
1242
+ ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
1243
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama
1244
+ PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
1245
+ LLAMA_LIB_INSTALL_DIR
1246
+ LLAMA_BIN_INSTALL_DIR )
1247
+
1248
+ write_basic_package_version_file(
1249
+ ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
1250
+ VERSION ${LLAMA_INSTALL_VERSION}
1251
+ COMPATIBILITY SameMajorVersion)
1252
+
1253
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
1254
+ ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
1255
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
1256
+
1257
+ set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
1258
+ "${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1259
+ "${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
1260
+
1261
+ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
1262
+ install(TARGETS ggml PUBLIC_HEADER)
1263
+
1264
+ set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/llama.h)
1265
+ install(TARGETS llama LIBRARY PUBLIC_HEADER)
1266
+
1267
+ install(
1268
+ FILES convert.py
1269
+ PERMISSIONS
1270
+ OWNER_READ
1271
+ OWNER_WRITE
1272
+ OWNER_EXECUTE
1273
+ GROUP_READ
1274
+ GROUP_EXECUTE
1275
+ WORLD_READ
1276
+ WORLD_EXECUTE
1277
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
1278
+ install(
1279
+ FILES convert-lora-to-ggml.py
1280
+ PERMISSIONS
1281
+ OWNER_READ
1282
+ OWNER_WRITE
1283
+ OWNER_EXECUTE
1284
+ GROUP_READ
1285
+ GROUP_EXECUTE
1286
+ WORLD_READ
1287
+ WORLD_EXECUTE
1288
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
1289
+ if (LLAMA_METAL)
1290
+ install(
1291
+ FILES ggml-metal.metal
1292
+ PERMISSIONS
1293
+ OWNER_READ
1294
+ OWNER_WRITE
1295
+ GROUP_READ
1296
+ WORLD_READ
1297
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
1298
+ if (NOT LLAMA_METAL_EMBED_LIBRARY)
1299
+ install(
1300
+ FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
1301
+ DESTINATION ${CMAKE_INSTALL_BINDIR}
1302
+ )
1303
+ endif()
1304
+ endif()
1305
+
1306
+ #
1307
+ # programs, examples and tests
1308
+ #
1309
+
1310
+ add_subdirectory(common)
1311
+
1312
+ if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
1313
+ include(CTest)
1314
+ add_subdirectory(tests)
1315
+ endif ()
1316
+
1317
+ if (LLAMA_BUILD_EXAMPLES)
1318
+ add_subdirectory(examples)
1319
+ add_subdirectory(pocs)
1320
+ endif()