@fugood/llama.node 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -10
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +6 -4
- package/src/LlamaCompletionWorker.cpp +6 -6
- package/src/LlamaContext.cpp +7 -9
- package/src/common.hpp +2 -1
- package/src/llama.cpp/.github/workflows/build.yml +98 -24
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +43 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +20 -8
- package/src/llama.cpp/common/CMakeLists.txt +12 -10
- package/src/llama.cpp/common/arg.cpp +2006 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +496 -1632
- package/src/llama.cpp/common/common.h +161 -63
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +3 -0
- package/src/llama.cpp/common/sampling.cpp +348 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/common/train.cpp +2 -0
- package/src/llama.cpp/docs/build.md +36 -1
- package/src/llama.cpp/examples/CMakeLists.txt +0 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +39 -55
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
- package/src/llama.cpp/examples/infill/infill.cpp +117 -132
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +685 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
- package/src/llama.cpp/examples/llava/llava.cpp +110 -24
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
- package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
- package/src/llama.cpp/examples/main/main.cpp +210 -262
- package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
- package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
- package/src/llama.cpp/examples/server/server.cpp +1027 -1073
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +107 -105
- package/src/llama.cpp/examples/simple/simple.cpp +35 -41
- package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
- package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
- package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
- package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
- package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
- package/src/llama.cpp/ggml/include/ggml.h +293 -186
- package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
- package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
- package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
- package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
- package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
- package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
- package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
- package/src/llama.cpp/include/llama.h +241 -264
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
- package/src/llama.cpp/src/llama-sampling.h +20 -47
- package/src/llama.cpp/src/llama-vocab.cpp +343 -120
- package/src/llama.cpp/src/llama-vocab.h +33 -17
- package/src/llama.cpp/src/llama.cpp +4247 -1525
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +3 -0
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
- package/src/llama.cpp/tests/test-barrier.cpp +93 -0
- package/src/llama.cpp/tests/test-grad0.cpp +187 -70
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
- package/src/llama.cpp/tests/test-rope.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +157 -98
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
|
@@ -26,6 +26,9 @@ if (NOT MSVC)
|
|
|
26
26
|
endif()
|
|
27
27
|
endif()
|
|
28
28
|
|
|
29
|
+
unset(GGML_EXTRA_LIBS_PRIVATE)
|
|
30
|
+
unset(GGML_EXTRA_LIBS_PUBLIC)
|
|
31
|
+
|
|
29
32
|
if (APPLE AND GGML_ACCELERATE)
|
|
30
33
|
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
|
31
34
|
if (ACCELERATE_FRAMEWORK)
|
|
@@ -35,7 +38,7 @@ if (APPLE AND GGML_ACCELERATE)
|
|
|
35
38
|
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
|
36
39
|
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
|
|
37
40
|
|
|
38
|
-
|
|
41
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE ${ACCELERATE_FRAMEWORK})
|
|
39
42
|
else()
|
|
40
43
|
message(WARNING "Accelerate framework not found")
|
|
41
44
|
endif()
|
|
@@ -87,7 +90,7 @@ if (GGML_METAL)
|
|
|
87
90
|
COMMENT "Generate assembly for embedded Metal library"
|
|
88
91
|
)
|
|
89
92
|
|
|
90
|
-
|
|
93
|
+
list(APPEND GGML_SOURCES_METAL ${METALLIB_EMBED_ASM})
|
|
91
94
|
else()
|
|
92
95
|
if (GGML_METAL_SHADER_DEBUG)
|
|
93
96
|
# custom command to do the following:
|
|
@@ -132,7 +135,7 @@ if (GGML_METAL)
|
|
|
132
135
|
)
|
|
133
136
|
endif() # GGML_METAL_EMBED_LIBRARY
|
|
134
137
|
|
|
135
|
-
|
|
138
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE
|
|
136
139
|
${FOUNDATION_LIBRARY}
|
|
137
140
|
${METAL_FRAMEWORK}
|
|
138
141
|
${METALKIT_FRAMEWORK}
|
|
@@ -157,11 +160,11 @@ if (GGML_OPENMP)
|
|
|
157
160
|
|
|
158
161
|
add_compile_definitions(GGML_USE_OPENMP)
|
|
159
162
|
|
|
160
|
-
|
|
163
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
|
|
161
164
|
|
|
162
165
|
if (GGML_MUSA)
|
|
163
|
-
|
|
164
|
-
|
|
166
|
+
list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp")
|
|
167
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so")
|
|
165
168
|
endif()
|
|
166
169
|
else()
|
|
167
170
|
message(WARNING "OpenMP not found")
|
|
@@ -244,8 +247,8 @@ if (GGML_BLAS)
|
|
|
244
247
|
set(GGML_HEADERS_BLAS ../include/ggml-blas.h)
|
|
245
248
|
set(GGML_SOURCES_BLAS ggml-blas.cpp)
|
|
246
249
|
|
|
247
|
-
|
|
248
|
-
|
|
250
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE ${BLAS_LIBRARIES})
|
|
251
|
+
list(APPEND GGML_EXTRA_INCLUDES ${BLAS_INCLUDE_DIRS})
|
|
249
252
|
else()
|
|
250
253
|
message(WARNING "BLAS not found, please refer to "
|
|
251
254
|
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
|
@@ -326,7 +329,7 @@ if (GGML_CUDA)
|
|
|
326
329
|
add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER})
|
|
327
330
|
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
|
|
328
331
|
|
|
329
|
-
if (
|
|
332
|
+
if (GGML_CUDA_GRAPHS)
|
|
330
333
|
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
|
|
331
334
|
endif()
|
|
332
335
|
|
|
@@ -361,26 +364,26 @@ if (GGML_CUDA)
|
|
|
361
364
|
if (GGML_MUSA)
|
|
362
365
|
set_source_files_properties(${GGML_SOURCES_CUDA} PROPERTIES LANGUAGE CXX)
|
|
363
366
|
foreach(SOURCE ${GGML_SOURCES_CUDA})
|
|
364
|
-
set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_22")
|
|
367
|
+
set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
|
|
365
368
|
endforeach()
|
|
366
369
|
endif()
|
|
367
370
|
|
|
368
371
|
if (GGML_STATIC)
|
|
369
372
|
if (WIN32)
|
|
370
373
|
# As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
|
|
371
|
-
|
|
374
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
|
|
372
375
|
else ()
|
|
373
376
|
if (GGML_MUSA)
|
|
374
|
-
|
|
377
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart_static MUSA::mublas_static)
|
|
375
378
|
else()
|
|
376
|
-
|
|
379
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
|
|
377
380
|
endif()
|
|
378
381
|
endif()
|
|
379
382
|
else()
|
|
380
383
|
if (GGML_MUSA)
|
|
381
|
-
|
|
384
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart MUSA::mublas)
|
|
382
385
|
else()
|
|
383
|
-
|
|
386
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
|
|
384
387
|
endif()
|
|
385
388
|
endif()
|
|
386
389
|
|
|
@@ -388,9 +391,9 @@ if (GGML_CUDA)
|
|
|
388
391
|
# No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
|
|
389
392
|
else()
|
|
390
393
|
if (GGML_MUSA)
|
|
391
|
-
|
|
394
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
|
|
392
395
|
else()
|
|
393
|
-
|
|
396
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
|
|
394
397
|
endif()
|
|
395
398
|
endif()
|
|
396
399
|
else()
|
|
@@ -495,7 +498,7 @@ if (GGML_HIPBLAS)
|
|
|
495
498
|
|
|
496
499
|
if (CXX_IS_HIPCC)
|
|
497
500
|
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
|
|
498
|
-
|
|
501
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE hip::device)
|
|
499
502
|
else()
|
|
500
503
|
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
|
|
501
504
|
endif()
|
|
@@ -504,16 +507,17 @@ if (GGML_HIPBLAS)
|
|
|
504
507
|
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
|
505
508
|
endif()
|
|
506
509
|
|
|
507
|
-
|
|
510
|
+
list(APPEND GGML_EXTRA_LIBS_PUBLIC hip::host roc::rocblas roc::hipblas)
|
|
508
511
|
endif()
|
|
509
512
|
|
|
510
513
|
if (GGML_SYCL)
|
|
511
|
-
if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
|
|
512
|
-
message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or
|
|
514
|
+
if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$")
|
|
515
|
+
message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD")
|
|
513
516
|
endif()
|
|
514
517
|
|
|
515
518
|
check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL)
|
|
516
|
-
|
|
519
|
+
|
|
520
|
+
if (DEFINED ENV{ONEAPI_ROOT})
|
|
517
521
|
message(STATUS "Using oneAPI Release SYCL compiler (icpx).")
|
|
518
522
|
elseif(SUPPORTS_SYCL)
|
|
519
523
|
message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}.
|
|
@@ -528,6 +532,9 @@ if (GGML_SYCL)
|
|
|
528
532
|
list(APPEND GGML_CDEF_PUBLIC GGML_USE_SYCL)
|
|
529
533
|
|
|
530
534
|
if (GGML_SYCL_F16)
|
|
535
|
+
if (GGML_SYCL_TARGET STREQUAL "AMD")
|
|
536
|
+
message(WARNING "AMD target does not entirely support FP16 in the SYCL backend.")
|
|
537
|
+
endif()
|
|
531
538
|
add_compile_definitions(GGML_SYCL_F16)
|
|
532
539
|
endif()
|
|
533
540
|
|
|
@@ -539,6 +546,12 @@ if (GGML_SYCL)
|
|
|
539
546
|
|
|
540
547
|
if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
|
541
548
|
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
|
549
|
+
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
|
550
|
+
# INFO: Allowed Sub_group_sizes are not consistent through all
|
|
551
|
+
# hip targets. For example, 64 is used for certain models, but the backend
|
|
552
|
+
# does not support it.
|
|
553
|
+
# Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
|
|
554
|
+
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
|
542
555
|
else()
|
|
543
556
|
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
|
|
544
557
|
endif()
|
|
@@ -549,16 +562,35 @@ if (GGML_SYCL)
|
|
|
549
562
|
file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
|
|
550
563
|
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
|
|
551
564
|
|
|
565
|
+
find_package(DNNL)
|
|
566
|
+
message("-- DNNL found:" ${DNNL_FOUND})
|
|
567
|
+
|
|
568
|
+
if (GGML_SYCL_TARGET STREQUAL "INTEL")
|
|
569
|
+
add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
|
|
570
|
+
else()
|
|
571
|
+
add_compile_definitions(GGML_SYCL_DNNL=0)
|
|
572
|
+
endif()
|
|
573
|
+
|
|
574
|
+
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
|
|
575
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE DNNL::dnnl)
|
|
576
|
+
endif()
|
|
577
|
+
|
|
552
578
|
if (WIN32)
|
|
553
579
|
find_package(IntelSYCL REQUIRED)
|
|
554
580
|
find_package(MKL REQUIRED)
|
|
555
|
-
|
|
581
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
|
|
556
582
|
else()
|
|
557
583
|
if (GGML_SYCL_TARGET STREQUAL "INTEL")
|
|
558
|
-
|
|
584
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
|
559
585
|
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
|
560
586
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
|
|
561
|
-
|
|
587
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
|
|
588
|
+
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
|
589
|
+
if (GGML_SYCL_HIP_TARGET STREQUAL "")
|
|
590
|
+
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_HIP_TARGET has not been set.")
|
|
591
|
+
endif()
|
|
592
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${GGML_SYCL_HIP_TARGET}")
|
|
593
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
|
|
562
594
|
endif()
|
|
563
595
|
endif()
|
|
564
596
|
endif()
|
|
@@ -569,7 +601,7 @@ if (GGML_RPC)
|
|
|
569
601
|
list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC)
|
|
570
602
|
|
|
571
603
|
if (WIN32)
|
|
572
|
-
|
|
604
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE ws2_32)
|
|
573
605
|
endif()
|
|
574
606
|
|
|
575
607
|
set(GGML_HEADERS_RPC ../include/ggml-rpc.h)
|
|
@@ -602,6 +634,14 @@ if (GGML_VULKAN)
|
|
|
602
634
|
add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG)
|
|
603
635
|
endif()
|
|
604
636
|
|
|
637
|
+
if (GGML_VULKAN_SHADER_DEBUG_INFO)
|
|
638
|
+
add_compile_definitions(GGML_VULKAN_SHADER_DEBUG_INFO)
|
|
639
|
+
endif()
|
|
640
|
+
|
|
641
|
+
if (GGML_VULKAN_PERF)
|
|
642
|
+
add_compile_definitions(GGML_VULKAN_PERF)
|
|
643
|
+
endif()
|
|
644
|
+
|
|
605
645
|
if (GGML_VULKAN_VALIDATE)
|
|
606
646
|
add_compile_definitions(GGML_VULKAN_VALIDATE)
|
|
607
647
|
endif()
|
|
@@ -639,8 +679,8 @@ if (GGML_VULKAN)
|
|
|
639
679
|
set(GGML_HEADERS_VULKAN ${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml-vulkan.h ${_ggml_vk_header})
|
|
640
680
|
set(GGML_SOURCES_VULKAN ggml-vulkan.cpp ${_ggml_vk_source})
|
|
641
681
|
|
|
642
|
-
|
|
643
|
-
|
|
682
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE Vulkan::Vulkan)
|
|
683
|
+
list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
|
|
644
684
|
else()
|
|
645
685
|
message(WARNING "Vulkan not found")
|
|
646
686
|
endif()
|
|
@@ -799,8 +839,8 @@ if (GGML_KOMPUTE)
|
|
|
799
839
|
|
|
800
840
|
list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE)
|
|
801
841
|
|
|
802
|
-
|
|
803
|
-
|
|
842
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE kompute)
|
|
843
|
+
list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR})
|
|
804
844
|
else()
|
|
805
845
|
message(WARNING "Kompute not found")
|
|
806
846
|
endif()
|
|
@@ -849,11 +889,6 @@ if (GGML_CANN)
|
|
|
849
889
|
${CANN_INSTALL_DIR}/acllib/include
|
|
850
890
|
)
|
|
851
891
|
|
|
852
|
-
# TODO: find libs
|
|
853
|
-
link_directories(
|
|
854
|
-
${CANN_INSTALL_DIR}/lib64
|
|
855
|
-
)
|
|
856
|
-
|
|
857
892
|
add_subdirectory(ggml-cann/kernels)
|
|
858
893
|
list(APPEND CANN_LIBRARIES
|
|
859
894
|
ascendcl
|
|
@@ -870,8 +905,10 @@ if (GGML_CANN)
|
|
|
870
905
|
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
|
|
871
906
|
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
|
|
872
907
|
|
|
873
|
-
|
|
874
|
-
|
|
908
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE ${CANN_LIBRARIES} )
|
|
909
|
+
list(APPEND GGML_EXTRA_INCLUDES ${CANN_INCLUDE_DIRS})
|
|
910
|
+
list(APPEND GGML_EXTRA_LIBDIRS ${CANN_INSTALL_DIR}/lib64)
|
|
911
|
+
|
|
875
912
|
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
|
|
876
913
|
endif()
|
|
877
914
|
else()
|
|
@@ -1164,6 +1201,7 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
|
|
|
1164
1201
|
endif()
|
|
1165
1202
|
if (GGML_AVX512)
|
|
1166
1203
|
list(APPEND ARCH_FLAGS -mavx512f)
|
|
1204
|
+
list(APPEND ARCH_FLAGS -mavx512dq)
|
|
1167
1205
|
list(APPEND ARCH_FLAGS -mavx512bw)
|
|
1168
1206
|
endif()
|
|
1169
1207
|
if (GGML_AVX512_VBMI)
|
|
@@ -1237,7 +1275,7 @@ endif()
|
|
|
1237
1275
|
|
|
1238
1276
|
# Data types, macros and functions related to controlling CPU affinity and
|
|
1239
1277
|
# some memory allocation are available on Linux through GNU extensions in libc
|
|
1240
|
-
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
1278
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
|
|
1241
1279
|
add_compile_definitions(_GNU_SOURCE)
|
|
1242
1280
|
endif()
|
|
1243
1281
|
|
|
@@ -1287,7 +1325,7 @@ add_library(ggml
|
|
|
1287
1325
|
../include/ggml-backend.h
|
|
1288
1326
|
ggml.c
|
|
1289
1327
|
ggml-alloc.c
|
|
1290
|
-
ggml-backend.
|
|
1328
|
+
ggml-backend.cpp
|
|
1291
1329
|
ggml-quants.c
|
|
1292
1330
|
ggml-quants.h
|
|
1293
1331
|
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
|
|
@@ -1308,21 +1346,25 @@ if (EMSCRIPTEN)
|
|
|
1308
1346
|
set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
1309
1347
|
endif()
|
|
1310
1348
|
|
|
1311
|
-
target_compile_definitions(ggml PUBLIC
|
|
1312
|
-
target_include_directories(ggml PUBLIC
|
|
1349
|
+
target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC})
|
|
1350
|
+
target_include_directories(ggml PUBLIC ../include)
|
|
1313
1351
|
target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES})
|
|
1314
|
-
target_link_directories(ggml PRIVATE
|
|
1352
|
+
target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS})
|
|
1315
1353
|
target_compile_features (ggml PRIVATE c_std_11) # don't bump
|
|
1316
1354
|
|
|
1317
|
-
|
|
1355
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE Threads::Threads)
|
|
1318
1356
|
|
|
1319
1357
|
find_library(MATH_LIBRARY m)
|
|
1320
1358
|
if (MATH_LIBRARY)
|
|
1321
1359
|
if (NOT WIN32 OR NOT GGML_SYCL)
|
|
1322
|
-
|
|
1360
|
+
list(APPEND GGML_EXTRA_LIBS_PRIVATE m)
|
|
1323
1361
|
endif()
|
|
1324
1362
|
endif()
|
|
1325
1363
|
|
|
1364
|
+
list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PRIVATE)
|
|
1365
|
+
list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PUBLIC)
|
|
1366
|
+
target_link_libraries(ggml PRIVATE ${GGML_EXTRA_LIBS_PRIVATE} PUBLIC ${GGML_EXTRA_LIBS_PUBLIC})
|
|
1367
|
+
|
|
1326
1368
|
if (BUILD_SHARED_LIBS)
|
|
1327
1369
|
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
1328
1370
|
target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD)
|