@fugood/llama.node 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +2 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +8 -9
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +43 -9
- package/src/llama.cpp/.github/workflows/docker.yml +3 -0
- package/src/llama.cpp/CMakeLists.txt +7 -4
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +0 -2
- package/src/llama.cpp/common/arg.cpp +642 -607
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +79 -281
- package/src/llama.cpp/common/common.h +130 -100
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +116 -108
- package/src/llama.cpp/common/sampling.h +20 -20
- package/src/llama.cpp/docs/build.md +37 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +14 -14
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +20 -11
- package/src/llama.cpp/examples/infill/infill.cpp +40 -86
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -151
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/clip.cpp +1 -0
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +37 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +14 -14
- package/src/llama.cpp/examples/lookup/lookup.cpp +29 -29
- package/src/llama.cpp/examples/main/main.cpp +64 -109
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +13 -13
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +34 -17
- package/src/llama.cpp/examples/server/CMakeLists.txt +4 -13
- package/src/llama.cpp/examples/server/server.cpp +553 -691
- package/src/llama.cpp/examples/server/utils.hpp +312 -25
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +128 -96
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +54 -51
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +2 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +15 -9
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +46 -33
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +53 -393
- package/src/llama.cpp/ggml/src/CMakeLists.txt +66 -1149
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +46 -3126
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +6 -25
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +303 -864
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +213 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +255 -149
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -243
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +667 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +366 -16
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +238 -72
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +187 -10692
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +475 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +40 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +258 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3584 -4142
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +69 -67
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +555 -623
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +125 -206
- package/src/llama.cpp/ggml/src/ggml.c +4032 -19890
- package/src/llama.cpp/include/llama.h +67 -33
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +745 -105
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +49 -9
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +2636 -2406
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/tests/CMakeLists.txt +1 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +14 -14
- package/src/llama.cpp/tests/test-backend-ops.cpp +185 -60
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +22 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +1 -0
- package/src/llama.cpp/tests/test-sampling.cpp +162 -137
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
- /package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +0 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
add_library(ggml-cpu
|
|
2
|
+
ggml-cpu.c
|
|
3
|
+
ggml-cpu.cpp
|
|
4
|
+
ggml-cpu-aarch64.c
|
|
5
|
+
ggml-cpu-aarch64.h
|
|
6
|
+
ggml-cpu-quants.c
|
|
7
|
+
ggml-cpu-quants.h
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
target_link_libraries(ggml-cpu PRIVATE ggml-base)
|
|
11
|
+
target_include_directories(ggml-cpu PRIVATE . ..)
|
|
12
|
+
|
|
13
|
+
if (APPLE AND GGML_ACCELERATE)
|
|
14
|
+
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
|
15
|
+
if (ACCELERATE_FRAMEWORK)
|
|
16
|
+
message(STATUS "Accelerate framework found")
|
|
17
|
+
|
|
18
|
+
add_compile_definitions(GGML_USE_ACCELERATE)
|
|
19
|
+
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
|
20
|
+
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
|
|
21
|
+
|
|
22
|
+
target_link_libraries(ggml-cpu PRIVATE ${ACCELERATE_FRAMEWORK})
|
|
23
|
+
else()
|
|
24
|
+
message(WARNING "Accelerate framework not found")
|
|
25
|
+
endif()
|
|
26
|
+
endif()
|
|
27
|
+
|
|
28
|
+
if (GGML_OPENMP)
|
|
29
|
+
find_package(OpenMP)
|
|
30
|
+
if (OpenMP_FOUND)
|
|
31
|
+
message(STATUS "OpenMP found")
|
|
32
|
+
|
|
33
|
+
add_compile_definitions(GGML_USE_OPENMP)
|
|
34
|
+
|
|
35
|
+
target_link_libraries(ggml-cpu PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
|
|
36
|
+
|
|
37
|
+
# FIXME: should be replaced with a compiler id check
|
|
38
|
+
#if (GGML_MUSA)
|
|
39
|
+
# list(APPEND GGML_CPU_EXTRA_INCLUDES "/usr/lib/llvm-14/lib/clang/14.0.0/include")
|
|
40
|
+
# list(APPEND GGML_CPU_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-14/lib/libomp.so")
|
|
41
|
+
#endif()
|
|
42
|
+
else()
|
|
43
|
+
message(WARNING "OpenMP not found")
|
|
44
|
+
endif()
|
|
45
|
+
endif()
|
|
46
|
+
|
|
47
|
+
if (GGML_LLAMAFILE)
|
|
48
|
+
message(STATUS "Using llamafile")
|
|
49
|
+
|
|
50
|
+
add_compile_definitions(GGML_USE_LLAMAFILE)
|
|
51
|
+
|
|
52
|
+
target_sources(ggml-cpu PRIVATE
|
|
53
|
+
llamafile/sgemm.cpp
|
|
54
|
+
llamafile/sgemm.h)
|
|
55
|
+
endif()
|
|
56
|
+
|
|
57
|
+
if (GGML_CPU_HBM)
|
|
58
|
+
find_library(memkind memkind REQUIRED)
|
|
59
|
+
|
|
60
|
+
message(STATUS "Using memkind for CPU HBM")
|
|
61
|
+
|
|
62
|
+
add_compile_definitions(GGML_USE_CPU_HBM)
|
|
63
|
+
|
|
64
|
+
target_link_libraries(ggml-cpu PUBLIC memkind)
|
|
65
|
+
endif()
|
|
66
|
+
|
|
67
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
|
|
68
|
+
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
|
69
|
+
(NOT CMAKE_OSX_ARCHITECTURES AND
|
|
70
|
+
NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
71
|
+
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
|
72
|
+
|
|
73
|
+
message(STATUS "ARM detected")
|
|
74
|
+
|
|
75
|
+
if (MSVC)
|
|
76
|
+
add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
|
|
77
|
+
add_compile_definitions(__ARM_NEON)
|
|
78
|
+
add_compile_definitions(__ARM_FEATURE_FMA)
|
|
79
|
+
|
|
80
|
+
set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
|
|
81
|
+
string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
|
|
82
|
+
|
|
83
|
+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
|
|
84
|
+
if (GGML_COMPILER_SUPPORT_DOTPROD)
|
|
85
|
+
add_compile_definitions(__ARM_FEATURE_DOTPROD)
|
|
86
|
+
endif ()
|
|
87
|
+
|
|
88
|
+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
89
|
+
|
|
90
|
+
if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
|
|
91
|
+
add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
|
|
92
|
+
endif ()
|
|
93
|
+
|
|
94
|
+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
|
|
95
|
+
if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
|
|
96
|
+
add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
|
|
97
|
+
endif ()
|
|
98
|
+
|
|
99
|
+
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
|
|
100
|
+
else()
|
|
101
|
+
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
102
|
+
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
103
|
+
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
|
|
104
|
+
endif()
|
|
105
|
+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
|
|
106
|
+
# Raspberry Pi 1, Zero
|
|
107
|
+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
|
|
108
|
+
endif()
|
|
109
|
+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
|
|
110
|
+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
|
|
111
|
+
# Android armeabi-v7a
|
|
112
|
+
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
|
|
113
|
+
else()
|
|
114
|
+
# Raspberry Pi 2
|
|
115
|
+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
|
|
116
|
+
endif()
|
|
117
|
+
endif()
|
|
118
|
+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
|
|
119
|
+
# Android arm64-v8a
|
|
120
|
+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
|
121
|
+
list(APPEND ARCH_FLAGS -mno-unaligned-access)
|
|
122
|
+
endif()
|
|
123
|
+
if (GGML_SVE)
|
|
124
|
+
list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
|
|
125
|
+
endif()
|
|
126
|
+
endif()
|
|
127
|
+
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
|
128
|
+
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
129
|
+
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
|
|
130
|
+
message(STATUS "x86 detected")
|
|
131
|
+
if (MSVC)
|
|
132
|
+
# instruction set detection for MSVC only
|
|
133
|
+
if (GGML_NATIVE)
|
|
134
|
+
# TODO: improve, should not reference files from the parent folder
|
|
135
|
+
include(cmake/FindSIMD.cmake)
|
|
136
|
+
endif ()
|
|
137
|
+
if (GGML_AVX512)
|
|
138
|
+
list(APPEND ARCH_FLAGS /arch:AVX512)
|
|
139
|
+
# MSVC has no compile-time flags enabling specific
|
|
140
|
+
# AVX512 extensions, neither it defines the
|
|
141
|
+
# macros corresponding to the extensions.
|
|
142
|
+
# Do it manually.
|
|
143
|
+
if (GGML_AVX512_VBMI)
|
|
144
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
|
|
145
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
|
|
146
|
+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
147
|
+
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
|
148
|
+
endif()
|
|
149
|
+
endif()
|
|
150
|
+
if (GGML_AVX512_VNNI)
|
|
151
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
|
|
152
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
|
|
153
|
+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
154
|
+
list(APPEND ARCH_FLAGS -mavx512vnni)
|
|
155
|
+
endif()
|
|
156
|
+
endif()
|
|
157
|
+
if (GGML_AVX512_BF16)
|
|
158
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
|
|
159
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
|
|
160
|
+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
161
|
+
list(APPEND ARCH_FLAGS -mavx512bf16)
|
|
162
|
+
endif()
|
|
163
|
+
endif()
|
|
164
|
+
if (GGML_AMX_TILE)
|
|
165
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
|
|
166
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
|
|
167
|
+
endif()
|
|
168
|
+
if (GGML_AMX_INT8)
|
|
169
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
|
|
170
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
|
|
171
|
+
endif()
|
|
172
|
+
if (GGML_AMX_BF16)
|
|
173
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
|
|
174
|
+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
|
|
175
|
+
endif()
|
|
176
|
+
elseif (GGML_AVX2)
|
|
177
|
+
list(APPEND ARCH_FLAGS /arch:AVX2)
|
|
178
|
+
elseif (GGML_AVX)
|
|
179
|
+
list(APPEND ARCH_FLAGS /arch:AVX)
|
|
180
|
+
endif()
|
|
181
|
+
else()
|
|
182
|
+
if (GGML_NATIVE)
|
|
183
|
+
list(APPEND ARCH_FLAGS -march=native)
|
|
184
|
+
endif()
|
|
185
|
+
if (GGML_F16C)
|
|
186
|
+
list(APPEND ARCH_FLAGS -mf16c)
|
|
187
|
+
endif()
|
|
188
|
+
if (GGML_FMA)
|
|
189
|
+
list(APPEND ARCH_FLAGS -mfma)
|
|
190
|
+
endif()
|
|
191
|
+
if (GGML_AVX)
|
|
192
|
+
list(APPEND ARCH_FLAGS -mavx)
|
|
193
|
+
endif()
|
|
194
|
+
if (GGML_AVX2)
|
|
195
|
+
list(APPEND ARCH_FLAGS -mavx2)
|
|
196
|
+
endif()
|
|
197
|
+
if (GGML_AVX512)
|
|
198
|
+
list(APPEND ARCH_FLAGS -mavx512f)
|
|
199
|
+
list(APPEND ARCH_FLAGS -mavx512dq)
|
|
200
|
+
list(APPEND ARCH_FLAGS -mavx512bw)
|
|
201
|
+
endif()
|
|
202
|
+
if (GGML_AVX512_VBMI)
|
|
203
|
+
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
|
204
|
+
endif()
|
|
205
|
+
if (GGML_AVX512_VNNI)
|
|
206
|
+
list(APPEND ARCH_FLAGS -mavx512vnni)
|
|
207
|
+
endif()
|
|
208
|
+
if (GGML_AVX512_BF16)
|
|
209
|
+
list(APPEND ARCH_FLAGS -mavx512bf16)
|
|
210
|
+
endif()
|
|
211
|
+
if (GGML_AMX_TILE)
|
|
212
|
+
list(APPEND ARCH_FLAGS -mamx-tile)
|
|
213
|
+
endif()
|
|
214
|
+
if (GGML_AMX_INT8)
|
|
215
|
+
list(APPEND ARCH_FLAGS -mamx-int8)
|
|
216
|
+
endif()
|
|
217
|
+
if (GGML_AMX_BF16)
|
|
218
|
+
list(APPEND ARCH_FLAGS -mamx-bf16)
|
|
219
|
+
endif()
|
|
220
|
+
endif()
|
|
221
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
222
|
+
message(STATUS "PowerPC detected")
|
|
223
|
+
execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
|
|
224
|
+
string(FIND "${POWER10_M}" "POWER10" substring_index)
|
|
225
|
+
if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
|
|
226
|
+
set(substring_index -1)
|
|
227
|
+
endif()
|
|
228
|
+
|
|
229
|
+
if (${substring_index} GREATER_EQUAL 0)
|
|
230
|
+
list(APPEND ARCH_FLAGS -mcpu=power10)
|
|
231
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
|
232
|
+
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
|
|
233
|
+
else()
|
|
234
|
+
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
|
|
235
|
+
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
|
|
236
|
+
endif()
|
|
237
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
238
|
+
message(STATUS "loongarch64 detected")
|
|
239
|
+
|
|
240
|
+
list(APPEND ARCH_FLAGS -march=loongarch64)
|
|
241
|
+
if (GGML_LASX)
|
|
242
|
+
list(APPEND ARCH_FLAGS -mlasx)
|
|
243
|
+
endif()
|
|
244
|
+
if (GGML_LSX)
|
|
245
|
+
list(APPEND ARCH_FLAGS -mlsx)
|
|
246
|
+
endif()
|
|
247
|
+
else()
|
|
248
|
+
message(STATUS "Unknown architecture")
|
|
249
|
+
endif()
|
|
250
|
+
|
|
251
|
+
if (GGML_CPU_AARCH64)
|
|
252
|
+
message(STATUS "Using runtime weight conversion of Q4_0 to Q4_0_x_x to enable optimized GEMM/GEMV kernels")
|
|
253
|
+
add_compile_definitions(GGML_USE_CPU_AARCH64)
|
|
254
|
+
endif()
|
|
255
|
+
|
|
256
|
+
target_compile_options(ggml-cpu PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
|
|
257
|
+
target_compile_options(ggml-cpu PRIVATE "$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
|
|
258
|
+
|
|
259
|
+
if (EMSCRIPTEN)
|
|
260
|
+
set_target_properties(ggml-cpu PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
261
|
+
endif()
|