@fugood/llama.node 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CMakeLists.txt +5 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/EmbeddingWorker.cpp +15 -5
  19. package/src/EmbeddingWorker.h +2 -1
  20. package/src/LlamaCompletionWorker.cpp +1 -1
  21. package/src/LlamaContext.cpp +81 -18
  22. package/src/LlamaContext.h +2 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +197 -159
  24. package/src/llama.cpp/.github/workflows/docker.yml +5 -8
  25. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  27. package/src/llama.cpp/CMakeLists.txt +11 -6
  28. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  29. package/src/llama.cpp/cmake/common.cmake +33 -0
  30. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  31. package/src/llama.cpp/common/CMakeLists.txt +6 -2
  32. package/src/llama.cpp/common/arg.cpp +426 -245
  33. package/src/llama.cpp/common/common.cpp +143 -80
  34. package/src/llama.cpp/common/common.h +81 -24
  35. package/src/llama.cpp/common/sampling.cpp +53 -19
  36. package/src/llama.cpp/common/sampling.h +22 -1
  37. package/src/llama.cpp/common/speculative.cpp +274 -0
  38. package/src/llama.cpp/common/speculative.h +28 -0
  39. package/src/llama.cpp/docs/build.md +101 -148
  40. package/src/llama.cpp/examples/CMakeLists.txt +32 -13
  41. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +5 -4
  43. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  47. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  48. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  49. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  50. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  52. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  55. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  57. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  59. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
  61. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/infill/infill.cpp +1 -1
  63. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  64. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
  65. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  66. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  67. package/src/llama.cpp/examples/llava/clip.cpp +262 -66
  68. package/src/llama.cpp/examples/llava/clip.h +8 -2
  69. package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
  70. package/src/llama.cpp/examples/llava/llava.cpp +46 -19
  71. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
  72. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  73. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  75. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  76. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
  77. package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
  78. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/main/main.cpp +9 -5
  80. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  83. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  84. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  87. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  88. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
  90. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  91. package/src/llama.cpp/examples/run/run.cpp +911 -0
  92. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
  94. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
  95. package/src/llama.cpp/examples/server/server.cpp +1758 -886
  96. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  97. package/src/llama.cpp/examples/server/utils.hpp +94 -304
  98. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  99. package/src/llama.cpp/examples/simple/simple.cpp +4 -0
  100. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
  101. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
  102. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
  104. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  106. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
  108. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  109. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  110. package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
  111. package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
  112. package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
  113. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  114. package/src/llama.cpp/ggml/include/ggml.h +106 -24
  115. package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
  123. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  124. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  125. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
  126. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  127. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  128. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  129. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  130. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  131. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  132. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  133. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  134. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  135. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
  136. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  137. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  138. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
  139. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
  140. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
  141. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  142. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
  143. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
  151. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
  152. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
  153. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  155. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
  156. package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
  157. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
  158. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
  159. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
  160. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
  161. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
  162. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  163. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  164. package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
  165. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
  167. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
  169. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
  172. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  173. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  174. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
  175. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
  176. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  177. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
  178. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
  182. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
  183. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  184. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  185. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
  187. package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
  188. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
  189. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
  190. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
  191. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
  192. package/src/llama.cpp/ggml/src/ggml.c +367 -207
  193. package/src/llama.cpp/include/llama-cpp.h +25 -0
  194. package/src/llama.cpp/include/llama.h +26 -19
  195. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  196. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  197. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  198. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  199. package/src/llama.cpp/src/CMakeLists.txt +2 -7
  200. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  201. package/src/llama.cpp/src/llama-grammar.h +2 -5
  202. package/src/llama.cpp/src/llama-sampling.cpp +35 -90
  203. package/src/llama.cpp/src/llama-vocab.cpp +6 -1
  204. package/src/llama.cpp/src/llama.cpp +1748 -640
  205. package/src/llama.cpp/src/unicode.cpp +62 -51
  206. package/src/llama.cpp/src/unicode.h +9 -10
  207. package/src/llama.cpp/tests/CMakeLists.txt +48 -37
  208. package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
  209. package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
  210. package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
  211. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  212. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  213. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  214. package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
  215. package/src/llama.cpp/tests/test-rope.cpp +61 -20
  216. package/src/llama.cpp/tests/test-sampling.cpp +2 -2
  217. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  218. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  219. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  220. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  221. package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
  222. package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
  223. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
  224. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  225. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
@@ -32,7 +32,15 @@ else()
32
32
  endif()
33
33
  endif()
34
34
 
35
+ # remove the lib prefix on win32 mingw
36
+ if (WIN32)
37
+ set(CMAKE_STATIC_LIBRARY_PREFIX "")
38
+ set(CMAKE_SHARED_LIBRARY_PREFIX "")
39
+ set(CMAKE_SHARED_MODULE_PREFIX "")
40
+ endif()
41
+
35
42
  option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
43
+ option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
36
44
 
37
45
  #
38
46
  # option list
@@ -66,10 +74,10 @@ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
66
74
  endif()
67
75
 
68
76
  # general
69
- option(GGML_STATIC "ggml: static link libraries" OFF)
70
- option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
71
- option(GGML_LTO "ggml: enable link time optimization" OFF)
72
- option(GGML_CCACHE "ggml: use ccache if available" ON)
77
+ option(GGML_STATIC "ggml: static link libraries" OFF)
78
+ option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
79
+ option(GGML_LTO "ggml: enable link time optimization" OFF)
80
+ option(GGML_CCACHE "ggml: use ccache if available" ON)
73
81
 
74
82
  # debug
75
83
  option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
@@ -91,28 +99,34 @@ else()
91
99
  set(INS_ENB ON)
92
100
  endif()
93
101
 
94
- option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
95
- option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
96
-
97
- option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
98
- option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
99
- option(GGML_AVX512 "ggml: enable AVX512" OFF)
100
- option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
101
- option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
102
- option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
103
- option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
104
- option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
105
- option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
106
- option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
102
+ option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
103
+ option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
104
+ option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
105
+ option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
106
+ option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
107
+ option(GGML_AVX512 "ggml: enable AVX512F" OFF)
108
+ option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
109
+ option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
110
+ option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
107
111
  if (NOT MSVC)
108
- option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
112
+ # in MSVC F16C and FMA is implied with AVX2/AVX512
113
+ option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
114
+ option(GGML_F16C "ggml: enable F16C" ${INS_ENB})
115
+ # MSVC does not seem to support AMX
116
+ option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
117
+ option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
118
+ option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
109
119
  endif()
110
- option(GGML_LASX "ggml: enable lasx" ON)
111
- option(GGML_LSX "ggml: enable lsx" ON)
112
- option(GGML_SVE "ggml: enable SVE" OFF)
120
+ option(GGML_LASX "ggml: enable lasx" ON)
121
+ option(GGML_LSX "ggml: enable lsx" ON)
122
+ option(GGML_RVV "ggml: enable rvv" ON)
123
+
124
+ option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
125
+ set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
126
+
113
127
 
114
128
  if (WIN32)
115
- set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
129
+ set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
116
130
  endif()
117
131
 
118
132
  # ggml core
@@ -159,11 +173,17 @@ set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
159
173
  set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
160
174
  option(GGML_OPENMP "ggml: use OpenMP" ON)
161
175
  option(GGML_RPC "ggml: use RPC" OFF)
162
- option(GGML_AMX "ggml: use AMX" OFF)
163
176
  option(GGML_SYCL "ggml: use SYCL" OFF)
164
177
  option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
165
178
  set (GGML_SYCL_TARGET "INTEL" CACHE STRING
166
179
  "ggml: sycl target device")
180
+ set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
181
+ "ggml: sycl device architecture")
182
+
183
+ option(GGML_OPENCL "ggml: use OpenCL" OFF)
184
+ option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
185
+ option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
186
+ option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
167
187
 
168
188
  # extra artifacts
169
189
  option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
@@ -176,11 +196,7 @@ option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
176
196
  set(CMAKE_C_STANDARD 11)
177
197
  set(CMAKE_C_STANDARD_REQUIRED true)
178
198
 
179
- if (GGML_SYCL)
180
- set(CMAKE_CXX_STANDARD 17)
181
- else()
182
- set(CMAKE_CXX_STANDARD 11)
183
- endif()
199
+ set(CMAKE_CXX_STANDARD 17)
184
200
  set(CMAKE_CXX_STANDARD_REQUIRED true)
185
201
 
186
202
  set(THREADS_PREFER_PTHREAD_FLAG ON)
@@ -233,12 +249,8 @@ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
233
249
  #if (GGML_METAL)
234
250
  # set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
235
251
  #endif()
236
- install(TARGETS ggml PUBLIC_HEADER)
237
-
238
- if (BUILD_SHARED_LIBS)
239
- install(TARGETS ggml LIBRARY)
240
- install(TARGETS ggml-base LIBRARY)
241
- endif()
252
+ install(TARGETS ggml LIBRARY PUBLIC_HEADER)
253
+ install(TARGETS ggml-base LIBRARY)
242
254
 
243
255
  # FIXME: this should be done in the backend cmake files
244
256
  if (GGML_METAL)
@@ -190,6 +190,14 @@ extern "C" {
190
190
  typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
191
191
  // Get additional buffer types provided by the device (returns a NULL-terminated array)
192
192
  typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
193
+ // Set the abort callback for the backend
194
+ typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
195
+ // Get a list of feature flags supported by the backend (returns a NULL-terminated array)
196
+ struct ggml_backend_feature {
197
+ const char * name;
198
+ const char * value;
199
+ };
200
+ typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
193
201
 
194
202
  //
195
203
  // Backend registry
@@ -214,6 +222,14 @@ extern "C" {
214
222
  // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
215
223
  GGML_API ggml_backend_t ggml_backend_init_best(void);
216
224
 
225
+ // Load a backend from a dynamic library and register it
226
+ GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
227
+ // Unload a backend if loaded dynamically and unregister it
228
+ GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
229
+ // Load all known backends from dynamic libraries
230
+ GGML_API void ggml_backend_load_all(void);
231
+ GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
232
+
217
233
  //
218
234
  // Backend scheduler
219
235
  //
@@ -7,29 +7,6 @@
7
7
  extern "C" {
8
8
  #endif
9
9
 
10
- // Scheduling priorities
11
- enum ggml_sched_priority {
12
- GGML_SCHED_PRIO_NORMAL,
13
- GGML_SCHED_PRIO_MEDIUM,
14
- GGML_SCHED_PRIO_HIGH,
15
- GGML_SCHED_PRIO_REALTIME
16
- };
17
-
18
- // Threadpool params
19
- // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
20
- struct ggml_threadpool_params {
21
- bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
22
- int n_threads; // number of threads
23
- enum ggml_sched_priority prio; // thread priority
24
- uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
25
- bool strict_cpu; // strict cpu placement
26
- bool paused; // start in paused state
27
- };
28
-
29
- struct ggml_threadpool; // forward declaration, see ggml.c
30
-
31
- typedef struct ggml_threadpool * ggml_threadpool_t;
32
-
33
10
  // the compute plan that needs to be prepared for ggml_graph_compute()
34
11
  // since https://github.com/ggerganov/ggml/issues/287
35
12
  struct ggml_cplan {
@@ -75,14 +52,11 @@ extern "C" {
75
52
  GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
76
53
  GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
77
54
 
78
- GGML_BACKEND_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
79
- GGML_BACKEND_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
80
- GGML_BACKEND_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
81
- GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
82
- GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
83
- GGML_BACKEND_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
84
- GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
85
- GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
55
+ GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
56
+ GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
57
+ GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
58
+ GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
59
+ GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
86
60
 
87
61
  // ggml_graph_plan() has to be called before ggml_graph_compute()
88
62
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
@@ -104,10 +78,10 @@ extern "C" {
104
78
  GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
105
79
  GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
106
80
  GGML_BACKEND_API int ggml_cpu_has_avx (void);
81
+ GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
107
82
  GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
108
83
  GGML_BACKEND_API int ggml_cpu_has_f16c (void);
109
84
  GGML_BACKEND_API int ggml_cpu_has_fma (void);
110
- GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
111
85
  GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
112
86
  GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
113
87
  GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
@@ -117,6 +91,7 @@ extern "C" {
117
91
  GGML_BACKEND_API int ggml_cpu_has_neon (void);
118
92
  GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
119
93
  GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
94
+ GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
120
95
  GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
121
96
  GGML_BACKEND_API int ggml_cpu_has_sve (void);
122
97
  GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
@@ -128,24 +103,14 @@ extern "C" {
128
103
 
129
104
  // Internal types and functions exposed for tests and benchmarks
130
105
 
131
- typedef void (*ggml_from_float_to_mat_t)
132
- (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
133
106
  typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
134
107
  const void * GGML_RESTRICT y, size_t by, int nrc);
135
- typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
136
- const void * GGML_RESTRICT y, int nr, int nc);
137
- typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
138
- const void * GGML_RESTRICT y, int nr, int nc);
139
108
 
140
109
  struct ggml_type_traits_cpu {
141
110
  ggml_from_float_t from_float;
142
- ggml_from_float_to_mat_t from_float_to_mat;
143
111
  ggml_vec_dot_t vec_dot;
144
112
  enum ggml_type vec_dot_type;
145
113
  int64_t nrows; // number of rows to process simultaneously
146
- int64_t ncols; // number of columns to process simultaneously
147
- ggml_gemv_t gemv;
148
- ggml_gemm_t gemm;
149
114
  };
150
115
 
151
116
  GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
@@ -165,13 +130,6 @@ extern "C" {
165
130
 
166
131
  GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
167
132
 
168
- #ifdef GGML_USE_CPU_HBM
169
- GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
170
- #endif
171
-
172
- GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
173
- GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);
174
-
175
133
  #ifdef __cplusplus
176
134
  }
177
135
  #endif
@@ -0,0 +1,26 @@
1
+ #ifndef GGML_OPENCL_H
2
+ #define GGML_OPENCL_H
3
+
4
+ #include "ggml.h"
5
+ #include "ggml-backend.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ //
12
+ // backend API
13
+ //
14
+ GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
15
+ GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
16
+
17
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
18
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
19
+
20
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
21
+
22
+ #ifdef __cplusplus
23
+ }
24
+ #endif
25
+
26
+ #endif // GGML_OPENCL_H
@@ -237,7 +237,9 @@
237
237
  #define GGML_EXIT_SUCCESS 0
238
238
  #define GGML_EXIT_ABORTED 1
239
239
 
240
- #define GGML_ROPE_TYPE_NEOX 2
240
+ #define GGML_ROPE_TYPE_NEOX 2
241
+ #define GGML_ROPE_TYPE_MROPE 8
242
+ #define GGML_ROPE_TYPE_VISION 24
241
243
 
242
244
  #define GGUF_MAGIC "GGUF"
243
245
 
@@ -384,12 +386,15 @@ extern "C" {
384
386
  GGML_TYPE_F64 = 28,
385
387
  GGML_TYPE_IQ1_M = 29,
386
388
  GGML_TYPE_BF16 = 30,
387
- GGML_TYPE_Q4_0_4_4 = 31,
388
- GGML_TYPE_Q4_0_4_8 = 32,
389
- GGML_TYPE_Q4_0_8_8 = 33,
389
+ // GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
390
+ // GGML_TYPE_Q4_0_4_8 = 32,
391
+ // GGML_TYPE_Q4_0_8_8 = 33,
390
392
  GGML_TYPE_TQ1_0 = 34,
391
393
  GGML_TYPE_TQ2_0 = 35,
392
- GGML_TYPE_COUNT,
394
+ // GGML_TYPE_IQ4_NL_4_4 = 36,
395
+ // GGML_TYPE_IQ4_NL_4_8 = 37,
396
+ // GGML_TYPE_IQ4_NL_8_8 = 38,
397
+ GGML_TYPE_COUNT = 39,
393
398
  };
394
399
 
395
400
  // precision
@@ -430,9 +435,6 @@ extern "C" {
430
435
  GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
431
436
  GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
432
437
  GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
433
- GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
434
- GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
435
- GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
436
438
  };
437
439
 
438
440
  // available tensor operations:
@@ -496,6 +498,7 @@ extern "C" {
496
498
  GGML_OP_POOL_2D_BACK,
497
499
  GGML_OP_UPSCALE, // nearest interpolate
498
500
  GGML_OP_PAD,
501
+ GGML_OP_PAD_REFLECT_1D,
499
502
  GGML_OP_ARANGE,
500
503
  GGML_OP_TIMESTEP_EMBEDDING,
501
504
  GGML_OP_ARGSORT,
@@ -1442,6 +1445,22 @@ extern "C" {
1442
1445
  float beta_fast,
1443
1446
  float beta_slow);
1444
1447
 
1448
+ GGML_API struct ggml_tensor * ggml_rope_multi(
1449
+ struct ggml_context * ctx,
1450
+ struct ggml_tensor * a,
1451
+ struct ggml_tensor * b,
1452
+ struct ggml_tensor * c,
1453
+ int n_dims,
1454
+ int sections[4],
1455
+ int mode,
1456
+ int n_ctx_orig,
1457
+ float freq_base,
1458
+ float freq_scale,
1459
+ float ext_factor,
1460
+ float attn_factor,
1461
+ float beta_fast,
1462
+ float beta_slow);
1463
+
1445
1464
  // in-place, returns view(a)
1446
1465
  GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
1447
1466
  struct ggml_context * ctx,
@@ -1545,17 +1564,6 @@ extern "C" {
1545
1564
  int d1, // dilation dimension 1
1546
1565
  bool is_2D);
1547
1566
 
1548
- GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1549
- struct ggml_context * ctx,
1550
- struct ggml_tensor * a, // convolution kernel
1551
- struct ggml_tensor * b, // data
1552
- int s0, // stride dimension 0
1553
- int s1, // stride dimension 1
1554
- int p0, // padding dimension 0
1555
- int p1, // padding dimension 1
1556
- int d0, // dilation dimension 0
1557
- int d1); // dilation dimension 1
1558
-
1559
1567
  GGML_API struct ggml_tensor * ggml_conv_1d(
1560
1568
  struct ggml_context * ctx,
1561
1569
  struct ggml_tensor * a, // convolution kernel
@@ -1573,6 +1581,23 @@ extern "C" {
1573
1581
  int s, // stride
1574
1582
  int d); // dilation
1575
1583
 
1584
+ // depthwise
1585
+ // TODO: this is very likely wrong for some cases! - needs more testing
1586
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw(
1587
+ struct ggml_context * ctx,
1588
+ struct ggml_tensor * a, // convolution kernel
1589
+ struct ggml_tensor * b, // data
1590
+ int s0, // stride
1591
+ int p0, // padding
1592
+ int d0); // dilation
1593
+
1594
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
1595
+ struct ggml_context * ctx,
1596
+ struct ggml_tensor * a, // convolution kernel
1597
+ struct ggml_tensor * b, // data
1598
+ int s0, // stride
1599
+ int d0); // dilation
1600
+
1576
1601
  GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1577
1602
  struct ggml_context * ctx,
1578
1603
  struct ggml_tensor * a, // convolution kernel
@@ -1592,7 +1617,6 @@ extern "C" {
1592
1617
  int d0, // dilation dimension 0
1593
1618
  int d1); // dilation dimension 1
1594
1619
 
1595
-
1596
1620
  // kernel size is a->ne[0] x a->ne[1]
1597
1621
  // stride is equal to kernel size
1598
1622
  // padding is zero
@@ -1619,6 +1643,18 @@ extern "C" {
1619
1643
  struct ggml_tensor * a,
1620
1644
  struct ggml_tensor * b);
1621
1645
 
1646
+ // depthwise
1647
+ GGML_API struct ggml_tensor * ggml_conv_2d_dw(
1648
+ struct ggml_context * ctx,
1649
+ struct ggml_tensor * a, // convolution kernel
1650
+ struct ggml_tensor * b, // data
1651
+ int s0, // stride dimension 0
1652
+ int s1, // stride dimension 1
1653
+ int p0, // padding dimension 0
1654
+ int p1, // padding dimension 1
1655
+ int d0, // dilation dimension 0
1656
+ int d1); // dilation dimension 1
1657
+
1622
1658
  GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
1623
1659
  struct ggml_context * ctx,
1624
1660
  struct ggml_tensor * a,
@@ -1692,6 +1728,13 @@ extern "C" {
1692
1728
  int p2,
1693
1729
  int p3);
1694
1730
 
1731
+ // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
1732
+ GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
1733
+ struct ggml_context * ctx,
1734
+ struct ggml_tensor * a,
1735
+ int p0,
1736
+ int p1);
1737
+
1695
1738
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1696
1739
  // timesteps: [N,]
1697
1740
  // return: [N, dim]
@@ -2194,11 +2237,19 @@ extern "C" {
2194
2237
  GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2195
2238
  GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
2196
2239
 
2197
- #ifdef __cplusplus
2198
- // restrict not standard in C++
2199
- #define GGML_RESTRICT
2240
+ #ifdef __cplusplus
2241
+ // restrict not standard in C++
2242
+ # if defined(__GNUC__)
2243
+ # define GGML_RESTRICT __restrict__
2244
+ # elif defined(__clang__)
2245
+ # define GGML_RESTRICT __restrict
2246
+ # elif defined(_MSC_VER)
2247
+ # define GGML_RESTRICT __restrict
2248
+ # else
2249
+ # define GGML_RESTRICT
2250
+ # endif
2200
2251
  #else
2201
- #define GGML_RESTRICT restrict
2252
+ # define GGML_RESTRICT restrict
2202
2253
  #endif
2203
2254
  typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2204
2255
  typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -2215,6 +2266,37 @@ extern "C" {
2215
2266
 
2216
2267
  GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
2217
2268
 
2269
+ // ggml threadpool
2270
+ // TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
2271
+ // the goal should be to create an API that other backends can use move everything to the ggml base
2272
+
2273
+ // scheduling priorities
2274
+ enum ggml_sched_priority {
2275
+ GGML_SCHED_PRIO_NORMAL,
2276
+ GGML_SCHED_PRIO_MEDIUM,
2277
+ GGML_SCHED_PRIO_HIGH,
2278
+ GGML_SCHED_PRIO_REALTIME
2279
+ };
2280
+
2281
+ // threadpool params
2282
+ // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
2283
+ struct ggml_threadpool_params {
2284
+ bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
2285
+ int n_threads; // number of threads
2286
+ enum ggml_sched_priority prio; // thread priority
2287
+ uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
2288
+ bool strict_cpu; // strict cpu placement
2289
+ bool paused; // start in paused state
2290
+ };
2291
+
2292
+ struct ggml_threadpool; // forward declaration, see ggml.c
2293
+
2294
+ typedef struct ggml_threadpool * ggml_threadpool_t;
2295
+
2296
+ GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
2297
+ GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
2298
+ GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
2299
+
2218
2300
  #ifdef __cplusplus
2219
2301
  }
2220
2302
  #endif
@@ -24,7 +24,7 @@ if (NOT MSVC)
24
24
  endif()
25
25
  endif()
26
26
 
27
- function(get_flags CCID CCVER)
27
+ function(ggml_get_flags CCID CCVER)
28
28
  set(C_FLAGS "")
29
29
  set(CXX_FLAGS "")
30
30
 
@@ -41,6 +41,7 @@ function(get_flags CCID CCVER)
41
41
  elseif (CCID STREQUAL "GNU")
42
42
  set(C_FLAGS -Wdouble-promotion)
43
43
  set(CXX_FLAGS -Wno-array-bounds)
44
+
44
45
  if (CCVER VERSION_GREATER_EQUAL 8.1.0)
45
46
  list(APPEND CXX_FLAGS -Wextra-semi)
46
47
  endif()
@@ -69,7 +70,7 @@ if (GGML_ALL_WARNINGS)
69
70
  list(APPEND C_FLAGS ${WARNING_FLAGS})
70
71
  list(APPEND CXX_FLAGS ${WARNING_FLAGS})
71
72
 
72
- get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
73
+ ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
73
74
 
74
75
  add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
75
76
  "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
@@ -193,15 +194,14 @@ endif()
193
194
 
194
195
  if (WIN32)
195
196
  add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
196
-
197
- if (BUILD_SHARED_LIBS)
198
- # TODO: should not use this
199
- set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
200
- endif()
201
197
  endif()
202
198
 
203
199
  # ggml
204
200
 
201
+ if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
202
+ message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
203
+ endif()
204
+
205
205
  add_library(ggml-base
206
206
  ../include/ggml.h
207
207
  ../include/ggml-alloc.h
@@ -215,9 +215,7 @@ add_library(ggml-base
215
215
  ggml-threading.cpp
216
216
  ggml-threading.h
217
217
  ggml-quants.c
218
- ggml-quants.h
219
- ggml-aarch64.c
220
- ggml-aarch64.h)
218
+ ggml-quants.h)
221
219
 
222
220
  target_include_directories(ggml-base PRIVATE .)
223
221
 
@@ -226,44 +224,95 @@ add_library(ggml
226
224
 
227
225
  target_link_libraries(ggml PUBLIC ggml-base)
228
226
 
227
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
228
+ target_link_libraries(ggml PRIVATE dl)
229
+ endif()
230
+
231
+ function(ggml_add_backend_library backend)
232
+ if (GGML_BACKEND_DL)
233
+ add_library(${backend} MODULE ${ARGN})
234
+ # write the shared library to the output directory
235
+ set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
236
+ target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
237
+ else()
238
+ add_library(${backend} ${ARGN})
239
+ target_link_libraries(ggml PUBLIC ${backend})
240
+ install(TARGETS ${backend} LIBRARY)
241
+ endif()
242
+
243
+ target_link_libraries(${backend} PRIVATE ggml-base)
244
+ target_include_directories(${backend} PRIVATE ..)
245
+
246
+ if (${BUILD_SHARED_LIBS})
247
+ target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
248
+ target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
249
+ endif()
250
+ endfunction()
251
+
229
252
  function(ggml_add_backend backend)
230
253
  string(TOUPPER "GGML_${backend}" backend_id)
231
254
  if (${backend_id})
232
255
  string(TOLOWER "ggml-${backend}" backend_target)
233
256
  add_subdirectory(${backend_target})
234
- # check again in case the backend disabled itself
235
- # note that this should NOT be the normal behavior, in case of errors the backend should fail the build
236
- # however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
237
- if (${backend_id})
238
- message(STATUS "Including ${backend} backend")
239
- if (${BUILD_SHARED_LIBS})
240
- target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_BUILD)
241
- target_compile_definitions(${backend_target} PUBLIC GGML_BACKEND_SHARED)
242
- install(TARGETS ${backend_target} LIBRARY)
243
- endif()
244
- target_link_libraries(ggml PUBLIC ${backend_target})
257
+ message(STATUS "Including ${backend} backend")
258
+ if (NOT GGML_BACKEND_DL)
245
259
  string(TOUPPER "GGML_USE_${backend}" backend_use)
246
260
  target_compile_definitions(ggml PUBLIC ${backend_use})
247
261
  endif()
248
262
  endif()
249
263
  endfunction()
250
264
 
265
+ function(ggml_add_cpu_backend_variant tag_name)
266
+ set(GGML_CPU_TAG_NAME ${tag_name})
267
+ # other: OPENMP LLAMAFILE CPU_HBM
268
+ foreach (feat NATIVE
269
+ AVX AVX2 AVX_VNNI FMA F16C
270
+ AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
271
+ AMX_TILE AMX_INT8 AMX_BF16)
272
+ set(GGML_${feat} OFF)
273
+ endforeach()
274
+
275
+ foreach (feat ${ARGN})
276
+ set(GGML_${feat} ON)
277
+ endforeach()
278
+
279
+ ggml_add_cpu_backend_variant_impl(${tag_name})
280
+ endfunction()
281
+
251
282
  ggml_add_backend(CPU)
252
- ggml_add_backend(AMX)
283
+
284
+ if (GGML_CPU_ALL_VARIANTS)
285
+ if (NOT GGML_BACKEND_DL)
286
+ message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
287
+ endif()
288
+ ggml_add_cpu_backend_variant(sandybridge AVX)
289
+ ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
290
+ ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
291
+ ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
292
+ if (NOT MSVC)
293
+ # MSVC doesn't support AVX-VNNI or AMX
294
+ ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
295
+ ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
296
+ endif()
297
+ else ()
298
+ ggml_add_cpu_backend_variant_impl("")
299
+ endif()
300
+
253
301
  ggml_add_backend(BLAS)
254
302
  ggml_add_backend(CANN)
255
303
  ggml_add_backend(CUDA)
256
304
  ggml_add_backend(HIP)
257
305
  ggml_add_backend(Kompute)
258
306
  ggml_add_backend(METAL)
307
+ ggml_add_backend(MUSA)
259
308
  ggml_add_backend(RPC)
260
309
  ggml_add_backend(SYCL)
261
310
  ggml_add_backend(Vulkan)
262
- ggml_add_backend(MUSA)
311
+ ggml_add_backend(OpenCL)
263
312
 
264
313
  foreach (target ggml-base ggml)
265
314
  target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
266
- target_compile_features (${target} PRIVATE c_std_11) # don't bump
315
+ target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
267
316
  endforeach()
268
317
 
269
318
  target_link_libraries(ggml-base PRIVATE Threads::Threads)