@fugood/llama.node 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/CMakeLists.txt +1 -10
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +6 -4
  17. package/src/LlamaCompletionWorker.cpp +6 -6
  18. package/src/LlamaContext.cpp +7 -9
  19. package/src/common.hpp +2 -1
  20. package/src/llama.cpp/.github/workflows/build.yml +98 -24
  21. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  22. package/src/llama.cpp/.github/workflows/docker.yml +43 -34
  23. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  24. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  25. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  26. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  27. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  28. package/src/llama.cpp/CMakeLists.txt +20 -8
  29. package/src/llama.cpp/common/CMakeLists.txt +12 -10
  30. package/src/llama.cpp/common/arg.cpp +2006 -0
  31. package/src/llama.cpp/common/arg.h +77 -0
  32. package/src/llama.cpp/common/common.cpp +496 -1632
  33. package/src/llama.cpp/common/common.h +161 -63
  34. package/src/llama.cpp/common/console.cpp +3 -0
  35. package/src/llama.cpp/common/log.cpp +401 -0
  36. package/src/llama.cpp/common/log.h +66 -698
  37. package/src/llama.cpp/common/ngram-cache.cpp +3 -0
  38. package/src/llama.cpp/common/sampling.cpp +348 -350
  39. package/src/llama.cpp/common/sampling.h +62 -139
  40. package/src/llama.cpp/common/stb_image.h +5990 -6398
  41. package/src/llama.cpp/common/train.cpp +2 -0
  42. package/src/llama.cpp/docs/build.md +36 -1
  43. package/src/llama.cpp/examples/CMakeLists.txt +0 -1
  44. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
  45. package/src/llama.cpp/examples/batched/batched.cpp +39 -55
  46. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
  47. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  48. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
  49. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  50. package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
  51. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
  52. package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
  53. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  54. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
  55. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  57. package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
  58. package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
  59. package/src/llama.cpp/examples/infill/infill.cpp +117 -132
  60. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
  61. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
  62. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  63. package/src/llama.cpp/examples/llava/clip.cpp +685 -150
  64. package/src/llama.cpp/examples/llava/clip.h +11 -2
  65. package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
  66. package/src/llama.cpp/examples/llava/llava.cpp +110 -24
  67. package/src/llama.cpp/examples/llava/llava.h +2 -3
  68. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  69. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  70. package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
  71. package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
  72. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
  73. package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
  74. package/src/llama.cpp/examples/main/main.cpp +210 -262
  75. package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
  76. package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
  77. package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
  78. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  80. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
  81. package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
  82. package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
  83. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
  84. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
  85. package/src/llama.cpp/examples/server/server.cpp +1027 -1073
  86. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  87. package/src/llama.cpp/examples/server/utils.hpp +107 -105
  88. package/src/llama.cpp/examples/simple/simple.cpp +35 -41
  89. package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
  90. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  91. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  92. package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
  93. package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
  94. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  95. package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
  96. package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
  97. package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
  98. package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
  99. package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
  100. package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
  101. package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
  102. package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
  103. package/src/llama.cpp/ggml/include/ggml.h +293 -186
  104. package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
  105. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
  106. package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
  107. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
  108. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
  109. package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
  110. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  111. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  112. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  113. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  114. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  116. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  117. package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
  118. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
  120. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  121. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  122. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  123. package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
  124. package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
  125. package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
  126. package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
  127. package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
  128. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  129. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
  130. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  132. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  134. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  135. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  136. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  137. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
  138. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  141. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
  142. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
  143. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  144. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  145. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  146. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
  148. package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
  149. package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
  150. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
  151. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
  152. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
  153. package/src/llama.cpp/include/llama.h +241 -264
  154. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  155. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  156. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  157. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  158. package/src/llama.cpp/src/llama-grammar.h +120 -15
  159. package/src/llama.cpp/src/llama-impl.h +156 -1
  160. package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
  161. package/src/llama.cpp/src/llama-sampling.h +20 -47
  162. package/src/llama.cpp/src/llama-vocab.cpp +343 -120
  163. package/src/llama.cpp/src/llama-vocab.h +33 -17
  164. package/src/llama.cpp/src/llama.cpp +4247 -1525
  165. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  166. package/src/llama.cpp/src/unicode-data.h +4 -4
  167. package/src/llama.cpp/src/unicode.cpp +15 -7
  168. package/src/llama.cpp/tests/CMakeLists.txt +3 -0
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
  171. package/src/llama.cpp/tests/test-barrier.cpp +93 -0
  172. package/src/llama.cpp/tests/test-grad0.cpp +187 -70
  173. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  174. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  175. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
  176. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  177. package/src/llama.cpp/tests/test-log.cpp +39 -0
  178. package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
  179. package/src/llama.cpp/tests/test-rope.cpp +1 -1
  180. package/src/llama.cpp/tests/test-sampling.cpp +157 -98
  181. package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
  182. package/patches/llama.patch +0 -22
  183. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  184. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  185. package/src/llama.cpp/common/grammar-parser.h +0 -29
  186. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  187. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
@@ -26,6 +26,9 @@ if (NOT MSVC)
26
26
  endif()
27
27
  endif()
28
28
 
29
+ unset(GGML_EXTRA_LIBS_PRIVATE)
30
+ unset(GGML_EXTRA_LIBS_PUBLIC)
31
+
29
32
  if (APPLE AND GGML_ACCELERATE)
30
33
  find_library(ACCELERATE_FRAMEWORK Accelerate)
31
34
  if (ACCELERATE_FRAMEWORK)
@@ -35,7 +38,7 @@ if (APPLE AND GGML_ACCELERATE)
35
38
  add_compile_definitions(ACCELERATE_NEW_LAPACK)
36
39
  add_compile_definitions(ACCELERATE_LAPACK_ILP64)
37
40
 
38
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
41
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE ${ACCELERATE_FRAMEWORK})
39
42
  else()
40
43
  message(WARNING "Accelerate framework not found")
41
44
  endif()
@@ -87,7 +90,7 @@ if (GGML_METAL)
87
90
  COMMENT "Generate assembly for embedded Metal library"
88
91
  )
89
92
 
90
- set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
93
+ list(APPEND GGML_SOURCES_METAL ${METALLIB_EMBED_ASM})
91
94
  else()
92
95
  if (GGML_METAL_SHADER_DEBUG)
93
96
  # custom command to do the following:
@@ -132,7 +135,7 @@ if (GGML_METAL)
132
135
  )
133
136
  endif() # GGML_METAL_EMBED_LIBRARY
134
137
 
135
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS}
138
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE
136
139
  ${FOUNDATION_LIBRARY}
137
140
  ${METAL_FRAMEWORK}
138
141
  ${METALKIT_FRAMEWORK}
@@ -157,11 +160,11 @@ if (GGML_OPENMP)
157
160
 
158
161
  add_compile_definitions(GGML_USE_OPENMP)
159
162
 
160
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
163
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
161
164
 
162
165
  if (GGML_MUSA)
163
- set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} "/usr/lib/llvm-10/include/openmp")
164
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} "/usr/lib/llvm-10/lib/libomp.so")
166
+ list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp")
167
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so")
165
168
  endif()
166
169
  else()
167
170
  message(WARNING "OpenMP not found")
@@ -244,8 +247,8 @@ if (GGML_BLAS)
244
247
  set(GGML_HEADERS_BLAS ../include/ggml-blas.h)
245
248
  set(GGML_SOURCES_BLAS ggml-blas.cpp)
246
249
 
247
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES})
248
- set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
250
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE ${BLAS_LIBRARIES})
251
+ list(APPEND GGML_EXTRA_INCLUDES ${BLAS_INCLUDE_DIRS})
249
252
  else()
250
253
  message(WARNING "BLAS not found, please refer to "
251
254
  "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
@@ -326,7 +329,7 @@ if (GGML_CUDA)
326
329
  add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
327
330
  add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
328
331
 
329
- if (GGML_CUDA_USE_GRAPHS)
332
+ if (GGML_CUDA_GRAPHS)
330
333
  add_compile_definitions(GGML_CUDA_USE_GRAPHS)
331
334
  endif()
332
335
 
@@ -361,26 +364,26 @@ if (GGML_CUDA)
361
364
  if (GGML_MUSA)
362
365
  set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX)
363
366
  foreach(SOURCE ${GGML_SOURCES_CUDA})
364
- set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_22")
367
+ set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
365
368
  endforeach()
366
369
  endif()
367
370
 
368
371
  if (GGML_STATIC)
369
372
  if (WIN32)
370
373
  # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
371
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
374
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
372
375
  else ()
373
376
  if (GGML_MUSA)
374
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart_static MUSA::mublas_static)
377
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart_static MUSA::mublas_static)
375
378
  else()
376
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
379
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
377
380
  endif()
378
381
  endif()
379
382
  else()
380
383
  if (GGML_MUSA)
381
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart MUSA::mublas)
384
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart MUSA::mublas)
382
385
  else()
383
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
386
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
384
387
  endif()
385
388
  endif()
386
389
 
@@ -388,9 +391,9 @@ if (GGML_CUDA)
388
391
  # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
389
392
  else()
390
393
  if (GGML_MUSA)
391
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
394
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
392
395
  else()
393
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
396
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
394
397
  endif()
395
398
  endif()
396
399
  else()
@@ -495,7 +498,7 @@ if (GGML_HIPBLAS)
495
498
 
496
499
  if (CXX_IS_HIPCC)
497
500
  set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
498
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} hip::device)
501
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE hip::device)
499
502
  else()
500
503
  set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
501
504
  endif()
@@ -504,16 +507,17 @@ if (GGML_HIPBLAS)
504
507
  message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
505
508
  endif()
506
509
 
507
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
510
+ list(APPEND GGML_EXTRA_LIBS_PUBLIC hip::host roc::rocblas roc::hipblas)
508
511
  endif()
509
512
 
510
513
  if (GGML_SYCL)
511
- if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
512
- message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
514
+ if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$")
515
+ message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD")
513
516
  endif()
514
517
 
515
518
  check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL)
516
- if ( DEFINED ENV{ONEAPI_ROOT})
519
+
520
+ if (DEFINED ENV{ONEAPI_ROOT})
517
521
  message(STATUS "Using oneAPI Release SYCL compiler (icpx).")
518
522
  elseif(SUPPORTS_SYCL)
519
523
  message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}.
@@ -528,6 +532,9 @@ if (GGML_SYCL)
528
532
  list(APPEND GGML_CDEF_PUBLIC GGML_USE_SYCL)
529
533
 
530
534
  if (GGML_SYCL_F16)
535
+ if (GGML_SYCL_TARGET STREQUAL "AMD")
536
+ message(WARNING "AMD target does not entirely support FP16 in the SYCL backend.")
537
+ endif()
531
538
  add_compile_definitions(GGML_SYCL_F16)
532
539
  endif()
533
540
 
@@ -539,6 +546,12 @@ if (GGML_SYCL)
539
546
 
540
547
  if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
541
548
  add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
549
+ elseif (GGML_SYCL_TARGET STREQUAL "AMD")
550
+ # INFO: Allowed Sub_group_sizes are not consistent through all
551
+ # hip targets. For example, 64 is used for certain models, but the backend
552
+ # does not support it.
553
+ # Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
554
+ add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
542
555
  else()
543
556
  add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
544
557
  endif()
@@ -549,16 +562,35 @@ if (GGML_SYCL)
549
562
  file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
550
563
  list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
551
564
 
565
+ find_package(DNNL)
566
+ message("-- DNNL found:" ${DNNL_FOUND})
567
+
568
+ if (GGML_SYCL_TARGET STREQUAL "INTEL")
569
+ add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
570
+ else()
571
+ add_compile_definitions(GGML_SYCL_DNNL=0)
572
+ endif()
573
+
574
+ if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
575
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE DNNL::dnnl)
576
+ endif()
577
+
552
578
  if (WIN32)
553
579
  find_package(IntelSYCL REQUIRED)
554
580
  find_package(MKL REQUIRED)
555
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
581
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
556
582
  else()
557
583
  if (GGML_SYCL_TARGET STREQUAL "INTEL")
558
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
584
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
559
585
  elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
560
586
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
561
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl pthread m dl onemkl)
587
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
588
+ elseif (GGML_SYCL_TARGET STREQUAL "AMD")
589
+ if (GGML_SYCL_HIP_TARGET STREQUAL "")
590
+ message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_HIP_TARGET has not been set.")
591
+ endif()
592
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${GGML_SYCL_HIP_TARGET}")
593
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
562
594
  endif()
563
595
  endif()
564
596
  endif()
@@ -569,7 +601,7 @@ if (GGML_RPC)
569
601
  list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC)
570
602
 
571
603
  if (WIN32)
572
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ws2_32)
604
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE ws2_32)
573
605
  endif()
574
606
 
575
607
  set(GGML_HEADERS_RPC ../include/ggml-rpc.h)
@@ -602,6 +634,14 @@ if (GGML_VULKAN)
602
634
  add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG)
603
635
  endif()
604
636
 
637
+ if (GGML_VULKAN_SHADER_DEBUG_INFO)
638
+ add_compile_definitions(GGML_VULKAN_SHADER_DEBUG_INFO)
639
+ endif()
640
+
641
+ if (GGML_VULKAN_PERF)
642
+ add_compile_definitions(GGML_VULKAN_PERF)
643
+ endif()
644
+
605
645
  if (GGML_VULKAN_VALIDATE)
606
646
  add_compile_definitions(GGML_VULKAN_VALIDATE)
607
647
  endif()
@@ -639,8 +679,8 @@ if (GGML_VULKAN)
639
679
  set(GGML_HEADERS_VULKAN ${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml-vulkan.h ${_ggml_vk_header})
640
680
  set(GGML_SOURCES_VULKAN ggml-vulkan.cpp ${_ggml_vk_source})
641
681
 
642
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} Vulkan::Vulkan)
643
- set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR})
682
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE Vulkan::Vulkan)
683
+ list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
644
684
  else()
645
685
  message(WARNING "Vulkan not found")
646
686
  endif()
@@ -799,8 +839,8 @@ if (GGML_KOMPUTE)
799
839
 
800
840
  list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE)
801
841
 
802
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} kompute)
803
- set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR})
842
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE kompute)
843
+ list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
804
844
  else()
805
845
  message(WARNING "Kompute not found")
806
846
  endif()
@@ -849,11 +889,6 @@ if (GGML_CANN)
849
889
  ${CANN_INSTALL_DIR}/acllib/include
850
890
  )
851
891
 
852
- # TODO: find libs
853
- link_directories(
854
- ${CANN_INSTALL_DIR}/lib64
855
- )
856
-
857
892
  add_subdirectory(ggml-cann/kernels)
858
893
  list(APPEND CANN_LIBRARIES
859
894
  ascendcl
@@ -870,8 +905,10 @@ if (GGML_CANN)
870
905
  message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
871
906
  message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
872
907
 
873
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
874
- set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
908
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE ${CANN_LIBRARIES} )
909
+ list(APPEND GGML_EXTRA_INCLUDES ${CANN_INCLUDE_DIRS})
910
+ list(APPEND GGML_EXTRA_LIBDIRS ${CANN_INSTALL_DIR}/lib64)
911
+
875
912
  list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
876
913
  endif()
877
914
  else()
@@ -1164,6 +1201,7 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
1164
1201
  endif()
1165
1202
  if (GGML_AVX512)
1166
1203
  list(APPEND ARCH_FLAGS -mavx512f)
1204
+ list(APPEND ARCH_FLAGS -mavx512dq)
1167
1205
  list(APPEND ARCH_FLAGS -mavx512bw)
1168
1206
  endif()
1169
1207
  if (GGML_AVX512_VBMI)
@@ -1237,7 +1275,7 @@ endif()
1237
1275
 
1238
1276
  # Data types, macros and functions related to controlling CPU affinity and
1239
1277
  # some memory allocation are available on Linux through GNU extensions in libc
1240
- if (CMAKE_SYSTEM_NAME MATCHES "Linux")
1278
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
1241
1279
  add_compile_definitions(_GNU_SOURCE)
1242
1280
  endif()
1243
1281
 
@@ -1287,7 +1325,7 @@ add_library(ggml
1287
1325
  ../include/ggml-backend.h
1288
1326
  ggml.c
1289
1327
  ggml-alloc.c
1290
- ggml-backend.c
1328
+ ggml-backend.cpp
1291
1329
  ggml-quants.c
1292
1330
  ggml-quants.h
1293
1331
  ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
@@ -1308,21 +1346,25 @@ if (EMSCRIPTEN)
1308
1346
  set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128")
1309
1347
  endif()
1310
1348
 
1311
- target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC})
1312
- target_include_directories(ggml PUBLIC ../include)
1349
+ target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC})
1350
+ target_include_directories(ggml PUBLIC ../include)
1313
1351
  target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES})
1314
- target_link_directories(ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
1352
+ target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
1315
1353
  target_compile_features (ggml PRIVATE c_std_11) # don't bump
1316
1354
 
1317
- target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS})
1355
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE Threads::Threads)
1318
1356
 
1319
1357
  find_library(MATH_LIBRARY m)
1320
1358
  if (MATH_LIBRARY)
1321
1359
  if (NOT WIN32 OR NOT GGML_SYCL)
1322
- target_link_libraries(ggml PRIVATE ${MATH_LIBRARY})
1360
+ list(APPEND GGML_EXTRA_LIBS_PRIVATE m)
1323
1361
  endif()
1324
1362
  endif()
1325
1363
 
1364
+ list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PRIVATE)
1365
+ list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PUBLIC)
1366
+ target_link_libraries(ggml PRIVATE ${GGML_EXTRA_LIBS_PRIVATE} PUBLIC ${GGML_EXTRA_LIBS_PUBLIC})
1367
+
1326
1368
  if (BUILD_SHARED_LIBS)
1327
1369
  set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
1328
1370
  target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD)