@fugood/llama.node 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CMakeLists.txt +5 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/EmbeddingWorker.cpp +15 -5
  19. package/src/EmbeddingWorker.h +2 -1
  20. package/src/LlamaCompletionWorker.cpp +1 -1
  21. package/src/LlamaContext.cpp +81 -18
  22. package/src/LlamaContext.h +2 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +197 -159
  24. package/src/llama.cpp/.github/workflows/docker.yml +5 -8
  25. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  27. package/src/llama.cpp/CMakeLists.txt +11 -6
  28. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  29. package/src/llama.cpp/cmake/common.cmake +33 -0
  30. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  31. package/src/llama.cpp/common/CMakeLists.txt +6 -2
  32. package/src/llama.cpp/common/arg.cpp +426 -245
  33. package/src/llama.cpp/common/common.cpp +143 -80
  34. package/src/llama.cpp/common/common.h +81 -24
  35. package/src/llama.cpp/common/sampling.cpp +53 -19
  36. package/src/llama.cpp/common/sampling.h +22 -1
  37. package/src/llama.cpp/common/speculative.cpp +274 -0
  38. package/src/llama.cpp/common/speculative.h +28 -0
  39. package/src/llama.cpp/docs/build.md +101 -148
  40. package/src/llama.cpp/examples/CMakeLists.txt +32 -13
  41. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +5 -4
  43. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  47. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  48. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  49. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  50. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  52. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  55. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  57. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  59. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
  61. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/infill/infill.cpp +1 -1
  63. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  64. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
  65. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  66. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  67. package/src/llama.cpp/examples/llava/clip.cpp +262 -66
  68. package/src/llama.cpp/examples/llava/clip.h +8 -2
  69. package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
  70. package/src/llama.cpp/examples/llava/llava.cpp +46 -19
  71. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
  72. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  73. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  75. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  76. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
  77. package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
  78. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/main/main.cpp +9 -5
  80. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  83. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  84. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  87. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  88. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
  90. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  91. package/src/llama.cpp/examples/run/run.cpp +911 -0
  92. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
  94. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
  95. package/src/llama.cpp/examples/server/server.cpp +1758 -886
  96. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  97. package/src/llama.cpp/examples/server/utils.hpp +94 -304
  98. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  99. package/src/llama.cpp/examples/simple/simple.cpp +4 -0
  100. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
  101. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
  102. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
  104. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  106. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
  108. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  109. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  110. package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
  111. package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
  112. package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
  113. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  114. package/src/llama.cpp/ggml/include/ggml.h +106 -24
  115. package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
  123. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  124. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  125. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
  126. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  127. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  128. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  129. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  130. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  131. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  132. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  133. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  134. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  135. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
  136. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  137. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  138. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
  139. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
  140. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
  141. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  142. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
  143. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
  151. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
  152. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
  153. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  155. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
  156. package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
  157. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
  158. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
  159. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
  160. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
  161. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
  162. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  163. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  164. package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
  165. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
  167. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
  169. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
  172. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  173. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  174. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
  175. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
  176. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  177. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
  178. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
  182. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
  183. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  184. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  185. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
  187. package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
  188. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
  189. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
  190. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
  191. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
  192. package/src/llama.cpp/ggml/src/ggml.c +367 -207
  193. package/src/llama.cpp/include/llama-cpp.h +25 -0
  194. package/src/llama.cpp/include/llama.h +26 -19
  195. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  196. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  197. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  198. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  199. package/src/llama.cpp/src/CMakeLists.txt +2 -7
  200. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  201. package/src/llama.cpp/src/llama-grammar.h +2 -5
  202. package/src/llama.cpp/src/llama-sampling.cpp +35 -90
  203. package/src/llama.cpp/src/llama-vocab.cpp +6 -1
  204. package/src/llama.cpp/src/llama.cpp +1748 -640
  205. package/src/llama.cpp/src/unicode.cpp +62 -51
  206. package/src/llama.cpp/src/unicode.h +9 -10
  207. package/src/llama.cpp/tests/CMakeLists.txt +48 -37
  208. package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
  209. package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
  210. package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
  211. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  212. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  213. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  214. package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
  215. package/src/llama.cpp/tests/test-rope.cpp +61 -20
  216. package/src/llama.cpp/tests/test-sampling.cpp +2 -2
  217. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  218. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  219. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  220. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  221. package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
  222. package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
  223. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
  224. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  225. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
@@ -1,261 +1,336 @@
1
- add_library(ggml-cpu
2
- ggml-cpu.c
3
- ggml-cpu.cpp
4
- ggml-cpu-aarch64.c
5
- ggml-cpu-aarch64.h
6
- ggml-cpu-quants.c
7
- ggml-cpu-quants.h
8
- )
1
+ function(ggml_add_cpu_backend_variant_impl tag_name)
2
+ if (tag_name)
3
+ set(GGML_CPU_NAME ggml-cpu-${tag_name})
4
+ else()
5
+ set(GGML_CPU_NAME ggml-cpu)
6
+ endif()
9
7
 
10
- target_link_libraries(ggml-cpu PRIVATE ggml-base)
11
- target_include_directories(ggml-cpu PRIVATE . ..)
8
+ ggml_add_backend_library(${GGML_CPU_NAME})
12
9
 
13
- if (APPLE AND GGML_ACCELERATE)
14
- find_library(ACCELERATE_FRAMEWORK Accelerate)
15
- if (ACCELERATE_FRAMEWORK)
16
- message(STATUS "Accelerate framework found")
10
+ list (APPEND GGML_CPU_SOURCES
11
+ ggml-cpu/ggml-cpu.c
12
+ ggml-cpu/ggml-cpu.cpp
13
+ ggml-cpu/ggml-cpu-aarch64.cpp
14
+ ggml-cpu/ggml-cpu-aarch64.h
15
+ ggml-cpu/ggml-cpu-hbm.cpp
16
+ ggml-cpu/ggml-cpu-hbm.h
17
+ ggml-cpu/ggml-cpu-quants.c
18
+ ggml-cpu/ggml-cpu-quants.h
19
+ ggml-cpu/ggml-cpu-traits.cpp
20
+ ggml-cpu/ggml-cpu-traits.h
21
+ ggml-cpu/amx/amx.cpp
22
+ ggml-cpu/amx/amx.h
23
+ ggml-cpu/amx/mmq.cpp
24
+ ggml-cpu/amx/mmq.h
25
+ ggml-cpu/ggml-cpu-impl.h
26
+ )
17
27
 
18
- add_compile_definitions(GGML_USE_ACCELERATE)
19
- add_compile_definitions(ACCELERATE_NEW_LAPACK)
20
- add_compile_definitions(ACCELERATE_LAPACK_ILP64)
28
+ target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
29
+ target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
21
30
 
22
- target_link_libraries(ggml-cpu PRIVATE ${ACCELERATE_FRAMEWORK})
23
- else()
24
- message(WARNING "Accelerate framework not found")
25
- endif()
26
- endif()
31
+ if (APPLE AND GGML_ACCELERATE)
32
+ find_library(ACCELERATE_FRAMEWORK Accelerate)
33
+ if (ACCELERATE_FRAMEWORK)
34
+ message(STATUS "Accelerate framework found")
27
35
 
28
- if (GGML_OPENMP)
29
- find_package(OpenMP)
30
- if (OpenMP_FOUND)
31
- message(STATUS "OpenMP found")
36
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
37
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
38
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
32
39
 
33
- add_compile_definitions(GGML_USE_OPENMP)
40
+ target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
41
+ else()
42
+ message(WARNING "Accelerate framework not found")
43
+ endif()
44
+ endif()
34
45
 
35
- target_link_libraries(ggml-cpu PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
46
+ if (GGML_OPENMP)
47
+ find_package(OpenMP)
48
+ if (OpenMP_FOUND)
49
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
36
50
 
37
- # FIXME: should be replaced with a compiler id check
38
- #if (GGML_MUSA)
39
- # list(APPEND GGML_CPU_EXTRA_INCLUDES "/usr/lib/llvm-14/lib/clang/14.0.0/include")
40
- # list(APPEND GGML_CPU_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-14/lib/libomp.so")
41
- #endif()
42
- else()
43
- message(WARNING "OpenMP not found")
51
+ target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
52
+ else()
53
+ message(WARNING "OpenMP not found")
54
+ endif()
44
55
  endif()
45
- endif()
46
56
 
47
- if (GGML_LLAMAFILE)
48
- message(STATUS "Using llamafile")
57
+ if (GGML_LLAMAFILE)
58
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
49
59
 
50
- add_compile_definitions(GGML_USE_LLAMAFILE)
60
+ list(APPEND GGML_CPU_SOURCES
61
+ ggml-cpu/llamafile/sgemm.cpp
62
+ ggml-cpu/llamafile/sgemm.h)
63
+ endif()
51
64
 
52
- target_sources(ggml-cpu PRIVATE
53
- llamafile/sgemm.cpp
54
- llamafile/sgemm.h)
55
- endif()
65
+ if (GGML_CPU_HBM)
66
+ find_library(memkind memkind REQUIRED)
56
67
 
57
- if (GGML_CPU_HBM)
58
- find_library(memkind memkind REQUIRED)
68
+ message(STATUS "Using memkind for CPU HBM")
59
69
 
60
- message(STATUS "Using memkind for CPU HBM")
70
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
61
71
 
62
- add_compile_definitions(GGML_USE_CPU_HBM)
72
+ target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
73
+ endif()
63
74
 
64
- target_link_libraries(ggml-cpu PUBLIC memkind)
65
- endif()
75
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
76
+ CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
77
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
78
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
66
79
 
67
- if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
68
- CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
69
- (NOT CMAKE_OSX_ARCHITECTURES AND
70
- NOT CMAKE_GENERATOR_PLATFORM_LWR AND
71
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
80
+ message(STATUS "ARM detected")
72
81
 
73
- message(STATUS "ARM detected")
82
+ if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
83
+ message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
84
+ else()
85
+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
86
+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
87
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
88
+ endif()
74
89
 
75
- if (MSVC)
76
- add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
77
- add_compile_definitions(__ARM_NEON)
78
- add_compile_definitions(__ARM_FEATURE_FMA)
90
+ if (GGML_NATIVE)
91
+ # -mcpu=native does not always enable all the features in some compilers,
92
+ # so we check for them manually and enable them if available
79
93
 
80
- set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
81
- string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
94
+ execute_process(
95
+ COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
96
+ INPUT_FILE "/dev/null"
97
+ OUTPUT_QUIET
98
+ ERROR_VARIABLE ARM_MCPU
99
+ RESULT_VARIABLE ARM_MCPU_RESULT
100
+ )
101
+ if (NOT ARM_MCPU_RESULT)
102
+ string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
103
+ endif()
104
+ if ("${ARM_MCPU_FLAG}" STREQUAL "")
105
+ set(ARM_MCPU_FLAG -mcpu=native)
106
+ message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
107
+ endif()
82
108
 
83
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
84
- if (GGML_COMPILER_SUPPORT_DOTPROD)
85
- add_compile_definitions(__ARM_FEATURE_DOTPROD)
86
- endif ()
109
+ set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
110
+ include(CheckCXXSourceRuns)
87
111
 
88
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
112
+ set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+dotprod")
113
+ check_cxx_source_runs(
114
+ "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
115
+ GGML_COMPILER_SUPPORT_DOTPROD)
116
+ if (GGML_COMPILER_SUPPORT_DOTPROD)
117
+ set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+dotprod")
118
+ endif()
89
119
 
90
- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
91
- add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
92
- endif ()
120
+ set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+i8mm")
121
+ check_cxx_source_runs(
122
+ "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
123
+ GGML_COMPILER_SUPPORT_I8MM)
124
+ if (GGML_COMPILER_SUPPORT_I8MM)
125
+ set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+i8mm")
126
+ endif()
93
127
 
94
- check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
95
- if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
96
- add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
97
- endif ()
128
+ set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
129
+ list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
98
130
 
99
- set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
100
- else()
101
- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
102
- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
103
- list(APPEND ARCH_FLAGS -mfp16-format=ieee)
104
- endif()
105
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
106
- # Raspberry Pi 1, Zero
107
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
108
- endif()
109
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
110
- if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
111
- # Android armeabi-v7a
112
- list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
113
131
  else()
114
- # Raspberry Pi 2
115
- list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
132
+ if (GGML_CPU_ARM_ARCH)
133
+ list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
134
+ endif()
135
+ endif()
136
+
137
+ # show enabled features
138
+ execute_process(
139
+ COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
140
+ INPUT_FILE "/dev/null"
141
+ OUTPUT_VARIABLE ARM_FEATURE
142
+ RESULT_VARIABLE ARM_FEATURE_RESULT
143
+ )
144
+ if (ARM_FEATURE_RESULT)
145
+ message(FATAL_ERROR "Failed to get ARM features")
146
+ else()
147
+ foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
148
+ string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
149
+ if (NOT ${feature_pos} EQUAL -1)
150
+ message(STATUS "ARM feature ${feature} enabled")
151
+ endif()
152
+ endforeach()
116
153
  endif()
117
154
  endif()
118
- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
119
- # Android arm64-v8a
120
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
121
- list(APPEND ARCH_FLAGS -mno-unaligned-access)
122
- endif()
123
- if (GGML_SVE)
124
- list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
125
- endif()
126
- endif()
127
- elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
128
- (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
129
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
130
- message(STATUS "x86 detected")
131
- if (MSVC)
132
- # instruction set detection for MSVC only
133
- if (GGML_NATIVE)
134
- # TODO: improve, should not reference files from the parent folder
135
- include(cmake/FindSIMD.cmake)
136
- endif ()
137
- if (GGML_AVX512)
138
- list(APPEND ARCH_FLAGS /arch:AVX512)
139
- # MSVC has no compile-time flags enabling specific
140
- # AVX512 extensions, neither it defines the
141
- # macros corresponding to the extensions.
142
- # Do it manually.
143
- if (GGML_AVX512_VBMI)
144
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
145
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
146
- if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
147
- list(APPEND ARCH_FLAGS -mavx512vbmi)
155
+ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
156
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
157
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
158
+
159
+ message(STATUS "x86 detected")
160
+
161
+ if (MSVC)
162
+ # instruction set detection for MSVC only
163
+ if (GGML_NATIVE)
164
+ include(ggml-cpu/cmake/FindSIMD.cmake)
165
+ endif ()
166
+ if (GGML_AVX512)
167
+ list(APPEND ARCH_FLAGS /arch:AVX512)
168
+ # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
169
+ # MSVC has no compile-time flags enabling specific
170
+ # AVX512 extensions, neither it defines the
171
+ # macros corresponding to the extensions.
172
+ # Do it manually.
173
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512)
174
+ if (GGML_AVX512_VBMI)
175
+ list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
176
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
177
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
178
+ endif()
179
+ endif()
180
+ if (GGML_AVX512_VNNI)
181
+ list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
182
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
183
+ list(APPEND ARCH_FLAGS -mavx512vnni)
184
+ endif()
185
+ endif()
186
+ if (GGML_AVX512_BF16)
187
+ list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
188
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
189
+ list(APPEND ARCH_FLAGS -mavx512bf16)
190
+ endif()
191
+ endif()
192
+ if (GGML_AMX_TILE)
193
+ list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
148
194
  endif()
195
+ if (GGML_AMX_INT8)
196
+ list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
197
+ endif()
198
+ if (GGML_AMX_BF16)
199
+ list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
200
+ endif()
201
+ elseif (GGML_AVX2)
202
+ list(APPEND ARCH_FLAGS /arch:AVX2)
203
+ list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
204
+ elseif (GGML_AVX)
205
+ list(APPEND ARCH_FLAGS /arch:AVX)
206
+ list(APPEND ARCH_DEFINITIONS GGML_AVX)
207
+ else ()
208
+ list(APPEND ARCH_FLAGS /arch:SSE4.2)
209
+ list(APPEND ARCH_DEFINITIONS GGML_SSE42)
210
+ endif()
211
+ if (GGML_AVX_VNNI)
212
+ # MSVC generates AVX512 with AVX-VNNI intrinsics even with /arch:AVX2
213
+ #list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
149
214
  endif()
150
- if (GGML_AVX512_VNNI)
151
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
152
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
153
- if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
215
+ else ()
216
+ if (GGML_NATIVE)
217
+ list(APPEND ARCH_FLAGS -march=native)
218
+ else ()
219
+ list(APPEND ARCH_FLAGS -msse4.2)
220
+ list(APPEND ARCH_DEFINITIONS GGML_SSE42)
221
+ if (GGML_F16C)
222
+ list(APPEND ARCH_FLAGS -mf16c)
223
+ list(APPEND ARCH_DEFINITIONS GGML_F16C)
224
+ endif()
225
+ if (GGML_FMA)
226
+ list(APPEND ARCH_FLAGS -mfma)
227
+ list(APPEND ARCH_DEFINITIONS GGML_FMA)
228
+ endif()
229
+ if (GGML_AVX)
230
+ list(APPEND ARCH_FLAGS -mavx)
231
+ list(APPEND ARCH_DEFINITIONS GGML_AVX)
232
+ endif()
233
+ if (GGML_AVX2)
234
+ list(APPEND ARCH_FLAGS -mavx2)
235
+ list(APPEND ARCH_DEFINITIONS GGML_AVX2)
236
+ endif()
237
+ if (GGML_AVX_VNNI)
238
+ list(APPEND ARCH_FLAGS -mavxvnni)
239
+ list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
240
+ endif()
241
+ if (GGML_AVX512)
242
+ list(APPEND ARCH_FLAGS -mavx512f)
243
+ list(APPEND ARCH_FLAGS -mavx512cd)
244
+ list(APPEND ARCH_FLAGS -mavx512vl)
245
+ list(APPEND ARCH_FLAGS -mavx512dq)
246
+ list(APPEND ARCH_FLAGS -mavx512bw)
247
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512)
248
+ endif()
249
+ if (GGML_AVX512_VBMI)
250
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
251
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
252
+ endif()
253
+ if (GGML_AVX512_VNNI)
154
254
  list(APPEND ARCH_FLAGS -mavx512vnni)
255
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
155
256
  endif()
156
- endif()
157
- if (GGML_AVX512_BF16)
158
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
159
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
160
- if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
257
+ if (GGML_AVX512_BF16)
161
258
  list(APPEND ARCH_FLAGS -mavx512bf16)
259
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
260
+ endif()
261
+ if (GGML_AMX_TILE)
262
+ list(APPEND ARCH_FLAGS -mamx-tile)
263
+ list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
264
+ endif()
265
+ if (GGML_AMX_INT8)
266
+ list(APPEND ARCH_FLAGS -mamx-int8)
267
+ list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
268
+ endif()
269
+ if (GGML_AMX_BF16)
270
+ list(APPEND ARCH_FLAGS -mamx-bf16)
271
+ list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
162
272
  endif()
163
273
  endif()
164
- if (GGML_AMX_TILE)
165
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
166
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
167
- endif()
168
- if (GGML_AMX_INT8)
169
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
170
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
171
- endif()
172
- if (GGML_AMX_BF16)
173
- add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
174
- add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
175
- endif()
176
- elseif (GGML_AVX2)
177
- list(APPEND ARCH_FLAGS /arch:AVX2)
178
- elseif (GGML_AVX)
179
- list(APPEND ARCH_FLAGS /arch:AVX)
180
- endif()
181
- else()
182
- if (GGML_NATIVE)
183
- list(APPEND ARCH_FLAGS -march=native)
184
- endif()
185
- if (GGML_F16C)
186
- list(APPEND ARCH_FLAGS -mf16c)
187
- endif()
188
- if (GGML_FMA)
189
- list(APPEND ARCH_FLAGS -mfma)
190
- endif()
191
- if (GGML_AVX)
192
- list(APPEND ARCH_FLAGS -mavx)
193
- endif()
194
- if (GGML_AVX2)
195
- list(APPEND ARCH_FLAGS -mavx2)
196
- endif()
197
- if (GGML_AVX512)
198
- list(APPEND ARCH_FLAGS -mavx512f)
199
- list(APPEND ARCH_FLAGS -mavx512dq)
200
- list(APPEND ARCH_FLAGS -mavx512bw)
201
- endif()
202
- if (GGML_AVX512_VBMI)
203
- list(APPEND ARCH_FLAGS -mavx512vbmi)
204
274
  endif()
205
- if (GGML_AVX512_VNNI)
206
- list(APPEND ARCH_FLAGS -mavx512vnni)
275
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
276
+ message(STATUS "PowerPC detected")
277
+ execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
278
+ string(FIND "${POWER10_M}" "POWER10" substring_index)
279
+ if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
280
+ set(substring_index -1)
207
281
  endif()
208
- if (GGML_AVX512_BF16)
209
- list(APPEND ARCH_FLAGS -mavx512bf16)
282
+
283
+ if (${substring_index} GREATER_EQUAL 0)
284
+ list(APPEND ARCH_FLAGS -mcpu=power10)
285
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
286
+ list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
287
+ else()
288
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
289
+ # TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
210
290
  endif()
211
- if (GGML_AMX_TILE)
212
- list(APPEND ARCH_FLAGS -mamx-tile)
291
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
292
+ message(STATUS "loongarch64 detected")
293
+
294
+ list(APPEND ARCH_FLAGS -march=loongarch64)
295
+ if (GGML_LASX)
296
+ list(APPEND ARCH_FLAGS -mlasx)
213
297
  endif()
214
- if (GGML_AMX_INT8)
215
- list(APPEND ARCH_FLAGS -mamx-int8)
298
+ if (GGML_LSX)
299
+ list(APPEND ARCH_FLAGS -mlsx)
216
300
  endif()
217
- if (GGML_AMX_BF16)
218
- list(APPEND ARCH_FLAGS -mamx-bf16)
301
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
302
+ message(STATUS "RISC-V detected")
303
+ if (GGML_RVV)
304
+ list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
219
305
  endif()
220
- endif()
221
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
222
- message(STATUS "PowerPC detected")
223
- execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
224
- string(FIND "${POWER10_M}" "POWER10" substring_index)
225
- if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
226
- set(substring_index -1)
227
- endif()
228
-
229
- if (${substring_index} GREATER_EQUAL 0)
230
- list(APPEND ARCH_FLAGS -mcpu=power10)
231
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
232
- list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
233
306
  else()
234
- list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
235
- #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
307
+ message(STATUS "Unknown architecture")
236
308
  endif()
237
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
238
- message(STATUS "loongarch64 detected")
239
309
 
240
- list(APPEND ARCH_FLAGS -march=loongarch64)
241
- if (GGML_LASX)
242
- list(APPEND ARCH_FLAGS -mlasx)
243
- endif()
244
- if (GGML_LSX)
245
- list(APPEND ARCH_FLAGS -mlsx)
310
+ if (GGML_CPU_AARCH64)
311
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
246
312
  endif()
247
- else()
248
- message(STATUS "Unknown architecture")
249
- endif()
250
313
 
251
- if (GGML_CPU_AARCH64)
252
- message(STATUS "Using runtime weight conversion of Q4_0 to Q4_0_x_x to enable optimized GEMM/GEMV kernels")
253
- add_compile_definitions(GGML_USE_CPU_AARCH64)
254
- endif()
314
+ message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
315
+ target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
316
+ target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
317
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
255
318
 
256
- target_compile_options(ggml-cpu PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
257
- target_compile_options(ggml-cpu PRIVATE "$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
319
+ if (GGML_BACKEND_DL)
320
+ # The feature detection code is compiled as a separate target so that
321
+ # it can be built without the architecture flags
322
+ # Since multiple variants of the CPU backend may be included in the same
323
+ # build, using set_source_files_properties() to set the arch flags is not possible
324
+ set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
325
+ add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
326
+ target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
327
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
328
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
329
+ set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
330
+ target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
331
+ endif()
258
332
 
259
- if (EMSCRIPTEN)
260
- set_target_properties(ggml-cpu PROPERTIES COMPILE_FLAGS "-msimd128")
261
- endif()
333
+ if (EMSCRIPTEN)
334
+ set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
335
+ endif()
336
+ endfunction()