@fugood/llama.node 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +1 -1
- package/src/LlamaContext.cpp +81 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -1,261 +1,336 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
ggml-cpu-quants.h
|
|
8
|
-
)
|
|
1
|
+
function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
2
|
+
if (tag_name)
|
|
3
|
+
set(GGML_CPU_NAME ggml-cpu-${tag_name})
|
|
4
|
+
else()
|
|
5
|
+
set(GGML_CPU_NAME ggml-cpu)
|
|
6
|
+
endif()
|
|
9
7
|
|
|
10
|
-
|
|
11
|
-
target_include_directories(ggml-cpu PRIVATE . ..)
|
|
8
|
+
ggml_add_backend_library(${GGML_CPU_NAME})
|
|
12
9
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
list (APPEND GGML_CPU_SOURCES
|
|
11
|
+
ggml-cpu/ggml-cpu.c
|
|
12
|
+
ggml-cpu/ggml-cpu.cpp
|
|
13
|
+
ggml-cpu/ggml-cpu-aarch64.cpp
|
|
14
|
+
ggml-cpu/ggml-cpu-aarch64.h
|
|
15
|
+
ggml-cpu/ggml-cpu-hbm.cpp
|
|
16
|
+
ggml-cpu/ggml-cpu-hbm.h
|
|
17
|
+
ggml-cpu/ggml-cpu-quants.c
|
|
18
|
+
ggml-cpu/ggml-cpu-quants.h
|
|
19
|
+
ggml-cpu/ggml-cpu-traits.cpp
|
|
20
|
+
ggml-cpu/ggml-cpu-traits.h
|
|
21
|
+
ggml-cpu/amx/amx.cpp
|
|
22
|
+
ggml-cpu/amx/amx.h
|
|
23
|
+
ggml-cpu/amx/mmq.cpp
|
|
24
|
+
ggml-cpu/amx/mmq.h
|
|
25
|
+
ggml-cpu/ggml-cpu-impl.h
|
|
26
|
+
)
|
|
17
27
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
|
|
28
|
+
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
|
|
29
|
+
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
|
|
21
30
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
endif()
|
|
31
|
+
if (APPLE AND GGML_ACCELERATE)
|
|
32
|
+
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
|
33
|
+
if (ACCELERATE_FRAMEWORK)
|
|
34
|
+
message(STATUS "Accelerate framework found")
|
|
27
35
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
message(STATUS "OpenMP found")
|
|
36
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
|
|
37
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
|
|
38
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
|
|
32
39
|
|
|
33
|
-
|
|
40
|
+
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
|
|
41
|
+
else()
|
|
42
|
+
message(WARNING "Accelerate framework not found")
|
|
43
|
+
endif()
|
|
44
|
+
endif()
|
|
34
45
|
|
|
35
|
-
|
|
46
|
+
if (GGML_OPENMP)
|
|
47
|
+
find_package(OpenMP)
|
|
48
|
+
if (OpenMP_FOUND)
|
|
49
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
|
|
36
50
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
#endif()
|
|
42
|
-
else()
|
|
43
|
-
message(WARNING "OpenMP not found")
|
|
51
|
+
target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
|
|
52
|
+
else()
|
|
53
|
+
message(WARNING "OpenMP not found")
|
|
54
|
+
endif()
|
|
44
55
|
endif()
|
|
45
|
-
endif()
|
|
46
56
|
|
|
47
|
-
if (GGML_LLAMAFILE)
|
|
48
|
-
|
|
57
|
+
if (GGML_LLAMAFILE)
|
|
58
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
|
|
49
59
|
|
|
50
|
-
|
|
60
|
+
list(APPEND GGML_CPU_SOURCES
|
|
61
|
+
ggml-cpu/llamafile/sgemm.cpp
|
|
62
|
+
ggml-cpu/llamafile/sgemm.h)
|
|
63
|
+
endif()
|
|
51
64
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
llamafile/sgemm.h)
|
|
55
|
-
endif()
|
|
65
|
+
if (GGML_CPU_HBM)
|
|
66
|
+
find_library(memkind memkind REQUIRED)
|
|
56
67
|
|
|
57
|
-
|
|
58
|
-
find_library(memkind memkind REQUIRED)
|
|
68
|
+
message(STATUS "Using memkind for CPU HBM")
|
|
59
69
|
|
|
60
|
-
|
|
70
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
|
|
61
71
|
|
|
62
|
-
|
|
72
|
+
target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
|
|
73
|
+
endif()
|
|
63
74
|
|
|
64
|
-
|
|
65
|
-
|
|
75
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
|
|
76
|
+
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
|
77
|
+
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
78
|
+
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
|
66
79
|
|
|
67
|
-
|
|
68
|
-
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
|
69
|
-
(NOT CMAKE_OSX_ARCHITECTURES AND
|
|
70
|
-
NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
71
|
-
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
|
80
|
+
message(STATUS "ARM detected")
|
|
72
81
|
|
|
73
|
-
|
|
82
|
+
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
83
|
+
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
|
|
84
|
+
else()
|
|
85
|
+
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
86
|
+
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
87
|
+
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
|
|
88
|
+
endif()
|
|
74
89
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
add_compile_definitions(__ARM_FEATURE_FMA)
|
|
90
|
+
if (GGML_NATIVE)
|
|
91
|
+
# -mcpu=native does not always enable all the features in some compilers,
|
|
92
|
+
# so we check for them manually and enable them if available
|
|
79
93
|
|
|
80
|
-
|
|
81
|
-
|
|
94
|
+
execute_process(
|
|
95
|
+
COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
|
|
96
|
+
INPUT_FILE "/dev/null"
|
|
97
|
+
OUTPUT_QUIET
|
|
98
|
+
ERROR_VARIABLE ARM_MCPU
|
|
99
|
+
RESULT_VARIABLE ARM_MCPU_RESULT
|
|
100
|
+
)
|
|
101
|
+
if (NOT ARM_MCPU_RESULT)
|
|
102
|
+
string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
|
|
103
|
+
endif()
|
|
104
|
+
if ("${ARM_MCPU_FLAG}" STREQUAL "")
|
|
105
|
+
set(ARM_MCPU_FLAG -mcpu=native)
|
|
106
|
+
message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
|
|
107
|
+
endif()
|
|
82
108
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
add_compile_definitions(__ARM_FEATURE_DOTPROD)
|
|
86
|
-
endif ()
|
|
109
|
+
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
|
110
|
+
include(CheckCXXSourceRuns)
|
|
87
111
|
|
|
88
|
-
|
|
112
|
+
set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+dotprod")
|
|
113
|
+
check_cxx_source_runs(
|
|
114
|
+
"#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
|
|
115
|
+
GGML_COMPILER_SUPPORT_DOTPROD)
|
|
116
|
+
if (GGML_COMPILER_SUPPORT_DOTPROD)
|
|
117
|
+
set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+dotprod")
|
|
118
|
+
endif()
|
|
89
119
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
120
|
+
set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+i8mm")
|
|
121
|
+
check_cxx_source_runs(
|
|
122
|
+
"#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
|
|
123
|
+
GGML_COMPILER_SUPPORT_I8MM)
|
|
124
|
+
if (GGML_COMPILER_SUPPORT_I8MM)
|
|
125
|
+
set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+i8mm")
|
|
126
|
+
endif()
|
|
93
127
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
97
|
-
endif ()
|
|
128
|
+
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
|
129
|
+
list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
|
|
98
130
|
|
|
99
|
-
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
|
|
100
|
-
else()
|
|
101
|
-
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
102
|
-
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
103
|
-
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
|
|
104
|
-
endif()
|
|
105
|
-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
|
106
|
-
# Raspberry Pi 1, Zero
|
|
107
|
-
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
|
|
108
|
-
endif()
|
|
109
|
-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
|
|
110
|
-
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
|
|
111
|
-
# Android armeabi-v7a
|
|
112
|
-
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
|
|
113
131
|
else()
|
|
114
|
-
|
|
115
|
-
|
|
132
|
+
if (GGML_CPU_ARM_ARCH)
|
|
133
|
+
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
|
|
134
|
+
endif()
|
|
135
|
+
endif()
|
|
136
|
+
|
|
137
|
+
# show enabled features
|
|
138
|
+
execute_process(
|
|
139
|
+
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
|
|
140
|
+
INPUT_FILE "/dev/null"
|
|
141
|
+
OUTPUT_VARIABLE ARM_FEATURE
|
|
142
|
+
RESULT_VARIABLE ARM_FEATURE_RESULT
|
|
143
|
+
)
|
|
144
|
+
if (ARM_FEATURE_RESULT)
|
|
145
|
+
message(FATAL_ERROR "Failed to get ARM features")
|
|
146
|
+
else()
|
|
147
|
+
foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
|
|
148
|
+
string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
|
|
149
|
+
if (NOT ${feature_pos} EQUAL -1)
|
|
150
|
+
message(STATUS "ARM feature ${feature} enabled")
|
|
151
|
+
endif()
|
|
152
|
+
endforeach()
|
|
116
153
|
endif()
|
|
117
154
|
endif()
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
155
|
+
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
|
156
|
+
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
157
|
+
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
|
|
158
|
+
|
|
159
|
+
message(STATUS "x86 detected")
|
|
160
|
+
|
|
161
|
+
if (MSVC)
|
|
162
|
+
# instruction set detection for MSVC only
|
|
163
|
+
if (GGML_NATIVE)
|
|
164
|
+
include(ggml-cpu/cmake/FindSIMD.cmake)
|
|
165
|
+
endif ()
|
|
166
|
+
if (GGML_AVX512)
|
|
167
|
+
list(APPEND ARCH_FLAGS /arch:AVX512)
|
|
168
|
+
# /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
|
|
169
|
+
# MSVC has no compile-time flags enabling specific
|
|
170
|
+
# AVX512 extensions, neither it defines the
|
|
171
|
+
# macros corresponding to the extensions.
|
|
172
|
+
# Do it manually.
|
|
173
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX512)
|
|
174
|
+
if (GGML_AVX512_VBMI)
|
|
175
|
+
list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
|
|
176
|
+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
177
|
+
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
|
178
|
+
endif()
|
|
179
|
+
endif()
|
|
180
|
+
if (GGML_AVX512_VNNI)
|
|
181
|
+
list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
|
|
182
|
+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
183
|
+
list(APPEND ARCH_FLAGS -mavx512vnni)
|
|
184
|
+
endif()
|
|
185
|
+
endif()
|
|
186
|
+
if (GGML_AVX512_BF16)
|
|
187
|
+
list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
|
|
188
|
+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
189
|
+
list(APPEND ARCH_FLAGS -mavx512bf16)
|
|
190
|
+
endif()
|
|
191
|
+
endif()
|
|
192
|
+
if (GGML_AMX_TILE)
|
|
193
|
+
list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
|
|
148
194
|
endif()
|
|
195
|
+
if (GGML_AMX_INT8)
|
|
196
|
+
list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
|
|
197
|
+
endif()
|
|
198
|
+
if (GGML_AMX_BF16)
|
|
199
|
+
list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
|
|
200
|
+
endif()
|
|
201
|
+
elseif (GGML_AVX2)
|
|
202
|
+
list(APPEND ARCH_FLAGS /arch:AVX2)
|
|
203
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
|
|
204
|
+
elseif (GGML_AVX)
|
|
205
|
+
list(APPEND ARCH_FLAGS /arch:AVX)
|
|
206
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX)
|
|
207
|
+
else ()
|
|
208
|
+
list(APPEND ARCH_FLAGS /arch:SSE4.2)
|
|
209
|
+
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
|
|
210
|
+
endif()
|
|
211
|
+
if (GGML_AVX_VNNI)
|
|
212
|
+
# MSVC generates AVX512 with AVX-VNNI intrinsics even with /arch:AVX2
|
|
213
|
+
#list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
|
|
149
214
|
endif()
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
215
|
+
else ()
|
|
216
|
+
if (GGML_NATIVE)
|
|
217
|
+
list(APPEND ARCH_FLAGS -march=native)
|
|
218
|
+
else ()
|
|
219
|
+
list(APPEND ARCH_FLAGS -msse4.2)
|
|
220
|
+
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
|
|
221
|
+
if (GGML_F16C)
|
|
222
|
+
list(APPEND ARCH_FLAGS -mf16c)
|
|
223
|
+
list(APPEND ARCH_DEFINITIONS GGML_F16C)
|
|
224
|
+
endif()
|
|
225
|
+
if (GGML_FMA)
|
|
226
|
+
list(APPEND ARCH_FLAGS -mfma)
|
|
227
|
+
list(APPEND ARCH_DEFINITIONS GGML_FMA)
|
|
228
|
+
endif()
|
|
229
|
+
if (GGML_AVX)
|
|
230
|
+
list(APPEND ARCH_FLAGS -mavx)
|
|
231
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX)
|
|
232
|
+
endif()
|
|
233
|
+
if (GGML_AVX2)
|
|
234
|
+
list(APPEND ARCH_FLAGS -mavx2)
|
|
235
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX2)
|
|
236
|
+
endif()
|
|
237
|
+
if (GGML_AVX_VNNI)
|
|
238
|
+
list(APPEND ARCH_FLAGS -mavxvnni)
|
|
239
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
|
|
240
|
+
endif()
|
|
241
|
+
if (GGML_AVX512)
|
|
242
|
+
list(APPEND ARCH_FLAGS -mavx512f)
|
|
243
|
+
list(APPEND ARCH_FLAGS -mavx512cd)
|
|
244
|
+
list(APPEND ARCH_FLAGS -mavx512vl)
|
|
245
|
+
list(APPEND ARCH_FLAGS -mavx512dq)
|
|
246
|
+
list(APPEND ARCH_FLAGS -mavx512bw)
|
|
247
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX512)
|
|
248
|
+
endif()
|
|
249
|
+
if (GGML_AVX512_VBMI)
|
|
250
|
+
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
|
251
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
|
|
252
|
+
endif()
|
|
253
|
+
if (GGML_AVX512_VNNI)
|
|
154
254
|
list(APPEND ARCH_FLAGS -mavx512vnni)
|
|
255
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
|
|
155
256
|
endif()
|
|
156
|
-
|
|
157
|
-
if (GGML_AVX512_BF16)
|
|
158
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
|
|
159
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
|
|
160
|
-
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
257
|
+
if (GGML_AVX512_BF16)
|
|
161
258
|
list(APPEND ARCH_FLAGS -mavx512bf16)
|
|
259
|
+
list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
|
|
260
|
+
endif()
|
|
261
|
+
if (GGML_AMX_TILE)
|
|
262
|
+
list(APPEND ARCH_FLAGS -mamx-tile)
|
|
263
|
+
list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
|
|
264
|
+
endif()
|
|
265
|
+
if (GGML_AMX_INT8)
|
|
266
|
+
list(APPEND ARCH_FLAGS -mamx-int8)
|
|
267
|
+
list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
|
|
268
|
+
endif()
|
|
269
|
+
if (GGML_AMX_BF16)
|
|
270
|
+
list(APPEND ARCH_FLAGS -mamx-bf16)
|
|
271
|
+
list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
|
|
162
272
|
endif()
|
|
163
273
|
endif()
|
|
164
|
-
if (GGML_AMX_TILE)
|
|
165
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
|
|
166
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
|
|
167
|
-
endif()
|
|
168
|
-
if (GGML_AMX_INT8)
|
|
169
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
|
|
170
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
|
|
171
|
-
endif()
|
|
172
|
-
if (GGML_AMX_BF16)
|
|
173
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
|
|
174
|
-
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
|
|
175
|
-
endif()
|
|
176
|
-
elseif (GGML_AVX2)
|
|
177
|
-
list(APPEND ARCH_FLAGS /arch:AVX2)
|
|
178
|
-
elseif (GGML_AVX)
|
|
179
|
-
list(APPEND ARCH_FLAGS /arch:AVX)
|
|
180
|
-
endif()
|
|
181
|
-
else()
|
|
182
|
-
if (GGML_NATIVE)
|
|
183
|
-
list(APPEND ARCH_FLAGS -march=native)
|
|
184
|
-
endif()
|
|
185
|
-
if (GGML_F16C)
|
|
186
|
-
list(APPEND ARCH_FLAGS -mf16c)
|
|
187
|
-
endif()
|
|
188
|
-
if (GGML_FMA)
|
|
189
|
-
list(APPEND ARCH_FLAGS -mfma)
|
|
190
|
-
endif()
|
|
191
|
-
if (GGML_AVX)
|
|
192
|
-
list(APPEND ARCH_FLAGS -mavx)
|
|
193
|
-
endif()
|
|
194
|
-
if (GGML_AVX2)
|
|
195
|
-
list(APPEND ARCH_FLAGS -mavx2)
|
|
196
|
-
endif()
|
|
197
|
-
if (GGML_AVX512)
|
|
198
|
-
list(APPEND ARCH_FLAGS -mavx512f)
|
|
199
|
-
list(APPEND ARCH_FLAGS -mavx512dq)
|
|
200
|
-
list(APPEND ARCH_FLAGS -mavx512bw)
|
|
201
|
-
endif()
|
|
202
|
-
if (GGML_AVX512_VBMI)
|
|
203
|
-
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
|
204
274
|
endif()
|
|
205
|
-
|
|
206
|
-
|
|
275
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
276
|
+
message(STATUS "PowerPC detected")
|
|
277
|
+
execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
|
|
278
|
+
string(FIND "${POWER10_M}" "POWER10" substring_index)
|
|
279
|
+
if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
|
|
280
|
+
set(substring_index -1)
|
|
207
281
|
endif()
|
|
208
|
-
|
|
209
|
-
|
|
282
|
+
|
|
283
|
+
if (${substring_index} GREATER_EQUAL 0)
|
|
284
|
+
list(APPEND ARCH_FLAGS -mcpu=power10)
|
|
285
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
|
286
|
+
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
|
|
287
|
+
else()
|
|
288
|
+
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
|
|
289
|
+
# TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
|
|
210
290
|
endif()
|
|
211
|
-
|
|
212
|
-
|
|
291
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
292
|
+
message(STATUS "loongarch64 detected")
|
|
293
|
+
|
|
294
|
+
list(APPEND ARCH_FLAGS -march=loongarch64)
|
|
295
|
+
if (GGML_LASX)
|
|
296
|
+
list(APPEND ARCH_FLAGS -mlasx)
|
|
213
297
|
endif()
|
|
214
|
-
if (
|
|
215
|
-
list(APPEND ARCH_FLAGS -
|
|
298
|
+
if (GGML_LSX)
|
|
299
|
+
list(APPEND ARCH_FLAGS -mlsx)
|
|
216
300
|
endif()
|
|
217
|
-
|
|
218
|
-
|
|
301
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
|
|
302
|
+
message(STATUS "RISC-V detected")
|
|
303
|
+
if (GGML_RVV)
|
|
304
|
+
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
|
|
219
305
|
endif()
|
|
220
|
-
endif()
|
|
221
|
-
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
222
|
-
message(STATUS "PowerPC detected")
|
|
223
|
-
execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
|
|
224
|
-
string(FIND "${POWER10_M}" "POWER10" substring_index)
|
|
225
|
-
if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
|
|
226
|
-
set(substring_index -1)
|
|
227
|
-
endif()
|
|
228
|
-
|
|
229
|
-
if (${substring_index} GREATER_EQUAL 0)
|
|
230
|
-
list(APPEND ARCH_FLAGS -mcpu=power10)
|
|
231
|
-
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
|
232
|
-
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
|
|
233
306
|
else()
|
|
234
|
-
|
|
235
|
-
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
|
|
307
|
+
message(STATUS "Unknown architecture")
|
|
236
308
|
endif()
|
|
237
|
-
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
238
|
-
message(STATUS "loongarch64 detected")
|
|
239
309
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
list(APPEND ARCH_FLAGS -mlasx)
|
|
243
|
-
endif()
|
|
244
|
-
if (GGML_LSX)
|
|
245
|
-
list(APPEND ARCH_FLAGS -mlsx)
|
|
310
|
+
if (GGML_CPU_AARCH64)
|
|
311
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
|
|
246
312
|
endif()
|
|
247
|
-
else()
|
|
248
|
-
message(STATUS "Unknown architecture")
|
|
249
|
-
endif()
|
|
250
313
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
314
|
+
message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
|
|
315
|
+
target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
|
|
316
|
+
target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
|
|
317
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
255
318
|
|
|
256
|
-
|
|
257
|
-
|
|
319
|
+
if (GGML_BACKEND_DL)
|
|
320
|
+
# The feature detection code is compiled as a separate target so that
|
|
321
|
+
# it can be built without the architecture flags
|
|
322
|
+
# Since multiple variants of the CPU backend may be included in the same
|
|
323
|
+
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
324
|
+
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
|
|
325
|
+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
|
|
326
|
+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
|
327
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
328
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
329
|
+
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
330
|
+
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
|
|
331
|
+
endif()
|
|
258
332
|
|
|
259
|
-
if (EMSCRIPTEN)
|
|
260
|
-
|
|
261
|
-
endif()
|
|
333
|
+
if (EMSCRIPTEN)
|
|
334
|
+
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
335
|
+
endif()
|
|
336
|
+
endfunction()
|