@fugood/llama.node 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/CMakeLists.txt +2 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +1 -1
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +8 -8
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +8 -9
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +4 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +43 -9
  25. package/src/llama.cpp/.github/workflows/docker.yml +3 -0
  26. package/src/llama.cpp/CMakeLists.txt +7 -4
  27. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  28. package/src/llama.cpp/common/CMakeLists.txt +0 -2
  29. package/src/llama.cpp/common/arg.cpp +642 -607
  30. package/src/llama.cpp/common/arg.h +22 -22
  31. package/src/llama.cpp/common/common.cpp +79 -281
  32. package/src/llama.cpp/common/common.h +130 -100
  33. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  34. package/src/llama.cpp/common/log.cpp +50 -50
  35. package/src/llama.cpp/common/log.h +18 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  37. package/src/llama.cpp/common/ngram-cache.h +19 -19
  38. package/src/llama.cpp/common/sampling.cpp +116 -108
  39. package/src/llama.cpp/common/sampling.h +20 -20
  40. package/src/llama.cpp/docs/build.md +37 -17
  41. package/src/llama.cpp/examples/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +14 -14
  43. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  46. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  47. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  48. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  49. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  50. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  51. package/src/llama.cpp/examples/imatrix/imatrix.cpp +20 -11
  52. package/src/llama.cpp/examples/infill/infill.cpp +40 -86
  53. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -151
  54. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  55. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  56. package/src/llama.cpp/examples/llava/clip.cpp +1 -0
  57. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  58. package/src/llama.cpp/examples/llava/llava.cpp +37 -3
  59. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  60. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  61. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  62. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  63. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +14 -14
  64. package/src/llama.cpp/examples/lookup/lookup.cpp +29 -29
  65. package/src/llama.cpp/examples/main/main.cpp +64 -109
  66. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  67. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  68. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  69. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  70. package/src/llama.cpp/examples/retrieval/retrieval.cpp +13 -13
  71. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  72. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +34 -17
  73. package/src/llama.cpp/examples/server/CMakeLists.txt +4 -13
  74. package/src/llama.cpp/examples/server/server.cpp +553 -691
  75. package/src/llama.cpp/examples/server/utils.hpp +312 -25
  76. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  77. package/src/llama.cpp/examples/simple/simple.cpp +128 -96
  78. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  79. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  80. package/src/llama.cpp/examples/speculative/speculative.cpp +54 -51
  81. package/src/llama.cpp/examples/tokenize/tokenize.cpp +2 -2
  82. package/src/llama.cpp/ggml/CMakeLists.txt +15 -9
  83. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  84. package/src/llama.cpp/ggml/include/ggml-backend.h +46 -33
  85. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  86. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  87. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  88. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  89. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  90. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  91. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  92. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  93. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  94. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  95. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  96. package/src/llama.cpp/ggml/include/ggml.h +53 -393
  97. package/src/llama.cpp/ggml/src/CMakeLists.txt +66 -1149
  98. package/src/llama.cpp/ggml/src/ggml-aarch64.c +46 -3126
  99. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  100. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -27
  101. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  102. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  103. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  104. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  105. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  106. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +6 -25
  107. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  108. package/src/llama.cpp/ggml/src/ggml-backend.cpp +303 -864
  109. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  110. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +213 -65
  111. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  112. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +255 -149
  113. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  114. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  115. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  116. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -243
  117. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  118. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  120. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  121. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +667 -1
  122. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  123. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  124. package/src/llama.cpp/ggml/src/ggml-impl.h +366 -16
  125. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  126. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +238 -72
  127. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  128. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  129. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  130. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  131. package/src/llama.cpp/ggml/src/ggml-quants.c +187 -10692
  132. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  133. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  134. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +475 -300
  135. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  136. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  137. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +40 -0
  138. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +258 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2 -22
  141. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  142. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  143. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3584 -4142
  144. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +69 -67
  145. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +3 -3
  146. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  148. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  149. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  150. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  151. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  152. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  153. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  154. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  155. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +555 -623
  156. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +125 -206
  157. package/src/llama.cpp/ggml/src/ggml.c +4032 -19890
  158. package/src/llama.cpp/include/llama.h +67 -33
  159. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  160. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  161. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  162. package/src/llama.cpp/src/llama-sampling.cpp +745 -105
  163. package/src/llama.cpp/src/llama-sampling.h +21 -2
  164. package/src/llama.cpp/src/llama-vocab.cpp +49 -9
  165. package/src/llama.cpp/src/llama-vocab.h +35 -11
  166. package/src/llama.cpp/src/llama.cpp +2636 -2406
  167. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  168. package/src/llama.cpp/tests/CMakeLists.txt +1 -2
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +14 -14
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +185 -60
  171. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  172. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  173. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  174. package/src/llama.cpp/tests/test-log.cpp +2 -2
  175. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  176. package/src/llama.cpp/tests/test-quantize-fns.cpp +22 -19
  177. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  178. package/src/llama.cpp/tests/test-rope.cpp +1 -0
  179. package/src/llama.cpp/tests/test-sampling.cpp +162 -137
  180. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  181. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  182. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  183. package/src/llama.cpp/common/train.cpp +0 -1515
  184. package/src/llama.cpp/common/train.h +0 -233
  185. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  186. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  187. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  188. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  189. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
  190. /package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +0 -0
@@ -0,0 +1,261 @@
1
+ add_library(ggml-cpu
2
+ ggml-cpu.c
3
+ ggml-cpu.cpp
4
+ ggml-cpu-aarch64.c
5
+ ggml-cpu-aarch64.h
6
+ ggml-cpu-quants.c
7
+ ggml-cpu-quants.h
8
+ )
9
+
10
+ target_link_libraries(ggml-cpu PRIVATE ggml-base)
11
+ target_include_directories(ggml-cpu PRIVATE . ..)
12
+
13
+ if (APPLE AND GGML_ACCELERATE)
14
+ find_library(ACCELERATE_FRAMEWORK Accelerate)
15
+ if (ACCELERATE_FRAMEWORK)
16
+ message(STATUS "Accelerate framework found")
17
+
18
+ add_compile_definitions(GGML_USE_ACCELERATE)
19
+ add_compile_definitions(ACCELERATE_NEW_LAPACK)
20
+ add_compile_definitions(ACCELERATE_LAPACK_ILP64)
21
+
22
+ target_link_libraries(ggml-cpu PRIVATE ${ACCELERATE_FRAMEWORK})
23
+ else()
24
+ message(WARNING "Accelerate framework not found")
25
+ endif()
26
+ endif()
27
+
28
+ if (GGML_OPENMP)
29
+ find_package(OpenMP)
30
+ if (OpenMP_FOUND)
31
+ message(STATUS "OpenMP found")
32
+
33
+ add_compile_definitions(GGML_USE_OPENMP)
34
+
35
+ target_link_libraries(ggml-cpu PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
36
+
37
+ # FIXME: should be replaced with a compiler id check
38
+ #if (GGML_MUSA)
39
+ # list(APPEND GGML_CPU_EXTRA_INCLUDES "/usr/lib/llvm-14/lib/clang/14.0.0/include")
40
+ # list(APPEND GGML_CPU_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-14/lib/libomp.so")
41
+ #endif()
42
+ else()
43
+ message(WARNING "OpenMP not found")
44
+ endif()
45
+ endif()
46
+
47
+ if (GGML_LLAMAFILE)
48
+ message(STATUS "Using llamafile")
49
+
50
+ add_compile_definitions(GGML_USE_LLAMAFILE)
51
+
52
+ target_sources(ggml-cpu PRIVATE
53
+ llamafile/sgemm.cpp
54
+ llamafile/sgemm.h)
55
+ endif()
56
+
57
+ if (GGML_CPU_HBM)
58
+ find_library(memkind memkind REQUIRED)
59
+
60
+ message(STATUS "Using memkind for CPU HBM")
61
+
62
+ add_compile_definitions(GGML_USE_CPU_HBM)
63
+
64
+ target_link_libraries(ggml-cpu PUBLIC memkind)
65
+ endif()
66
+
67
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
68
+ CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
69
+ (NOT CMAKE_OSX_ARCHITECTURES AND
70
+ NOT CMAKE_GENERATOR_PLATFORM_LWR AND
71
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
72
+
73
+ message(STATUS "ARM detected")
74
+
75
+ if (MSVC)
76
+ add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
77
+ add_compile_definitions(__ARM_NEON)
78
+ add_compile_definitions(__ARM_FEATURE_FMA)
79
+
80
+ set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
81
+ string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
82
+
83
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
84
+ if (GGML_COMPILER_SUPPORT_DOTPROD)
85
+ add_compile_definitions(__ARM_FEATURE_DOTPROD)
86
+ endif ()
87
+
88
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
89
+
90
+ if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
91
+ add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
92
+ endif ()
93
+
94
+ check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
95
+ if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
96
+ add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
97
+ endif ()
98
+
99
+ set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
100
+ else()
101
+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
102
+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
103
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
104
+ endif()
105
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
106
+ # Raspberry Pi 1, Zero
107
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
108
+ endif()
109
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
110
+ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
111
+ # Android armeabi-v7a
112
+ list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
113
+ else()
114
+ # Raspberry Pi 2
115
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
116
+ endif()
117
+ endif()
118
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
119
+ # Android arm64-v8a
120
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
121
+ list(APPEND ARCH_FLAGS -mno-unaligned-access)
122
+ endif()
123
+ if (GGML_SVE)
124
+ list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
125
+ endif()
126
+ endif()
127
+ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
128
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
129
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
130
+ message(STATUS "x86 detected")
131
+ if (MSVC)
132
+ # instruction set detection for MSVC only
133
+ if (GGML_NATIVE)
134
+ # TODO: improve, should not reference files from the parent folder
135
+ include(cmake/FindSIMD.cmake)
136
+ endif ()
137
+ if (GGML_AVX512)
138
+ list(APPEND ARCH_FLAGS /arch:AVX512)
139
+ # MSVC has no compile-time flags enabling specific
140
+ # AVX512 extensions, neither it defines the
141
+ # macros corresponding to the extensions.
142
+ # Do it manually.
143
+ if (GGML_AVX512_VBMI)
144
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
145
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
146
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
147
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
148
+ endif()
149
+ endif()
150
+ if (GGML_AVX512_VNNI)
151
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
152
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
153
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
154
+ list(APPEND ARCH_FLAGS -mavx512vnni)
155
+ endif()
156
+ endif()
157
+ if (GGML_AVX512_BF16)
158
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
159
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
160
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
161
+ list(APPEND ARCH_FLAGS -mavx512bf16)
162
+ endif()
163
+ endif()
164
+ if (GGML_AMX_TILE)
165
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
166
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
167
+ endif()
168
+ if (GGML_AMX_INT8)
169
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
170
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
171
+ endif()
172
+ if (GGML_AMX_BF16)
173
+ add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
174
+ add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
175
+ endif()
176
+ elseif (GGML_AVX2)
177
+ list(APPEND ARCH_FLAGS /arch:AVX2)
178
+ elseif (GGML_AVX)
179
+ list(APPEND ARCH_FLAGS /arch:AVX)
180
+ endif()
181
+ else()
182
+ if (GGML_NATIVE)
183
+ list(APPEND ARCH_FLAGS -march=native)
184
+ endif()
185
+ if (GGML_F16C)
186
+ list(APPEND ARCH_FLAGS -mf16c)
187
+ endif()
188
+ if (GGML_FMA)
189
+ list(APPEND ARCH_FLAGS -mfma)
190
+ endif()
191
+ if (GGML_AVX)
192
+ list(APPEND ARCH_FLAGS -mavx)
193
+ endif()
194
+ if (GGML_AVX2)
195
+ list(APPEND ARCH_FLAGS -mavx2)
196
+ endif()
197
+ if (GGML_AVX512)
198
+ list(APPEND ARCH_FLAGS -mavx512f)
199
+ list(APPEND ARCH_FLAGS -mavx512dq)
200
+ list(APPEND ARCH_FLAGS -mavx512bw)
201
+ endif()
202
+ if (GGML_AVX512_VBMI)
203
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
204
+ endif()
205
+ if (GGML_AVX512_VNNI)
206
+ list(APPEND ARCH_FLAGS -mavx512vnni)
207
+ endif()
208
+ if (GGML_AVX512_BF16)
209
+ list(APPEND ARCH_FLAGS -mavx512bf16)
210
+ endif()
211
+ if (GGML_AMX_TILE)
212
+ list(APPEND ARCH_FLAGS -mamx-tile)
213
+ endif()
214
+ if (GGML_AMX_INT8)
215
+ list(APPEND ARCH_FLAGS -mamx-int8)
216
+ endif()
217
+ if (GGML_AMX_BF16)
218
+ list(APPEND ARCH_FLAGS -mamx-bf16)
219
+ endif()
220
+ endif()
221
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
222
+ message(STATUS "PowerPC detected")
223
+ execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
224
+ string(FIND "${POWER10_M}" "POWER10" substring_index)
225
+ if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
226
+ set(substring_index -1)
227
+ endif()
228
+
229
+ if (${substring_index} GREATER_EQUAL 0)
230
+ list(APPEND ARCH_FLAGS -mcpu=power10)
231
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
232
+ list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
233
+ else()
234
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
235
+ #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
236
+ endif()
237
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
238
+ message(STATUS "loongarch64 detected")
239
+
240
+ list(APPEND ARCH_FLAGS -march=loongarch64)
241
+ if (GGML_LASX)
242
+ list(APPEND ARCH_FLAGS -mlasx)
243
+ endif()
244
+ if (GGML_LSX)
245
+ list(APPEND ARCH_FLAGS -mlsx)
246
+ endif()
247
+ else()
248
+ message(STATUS "Unknown architecture")
249
+ endif()
250
+
251
+ if (GGML_CPU_AARCH64)
252
+ message(STATUS "Using runtime weight conversion of Q4_0 to Q4_0_x_x to enable optimized GEMM/GEMV kernels")
253
+ add_compile_definitions(GGML_USE_CPU_AARCH64)
254
+ endif()
255
+
256
+ target_compile_options(ggml-cpu PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
257
+ target_compile_options(ggml-cpu PRIVATE "$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
258
+
259
+ if (EMSCRIPTEN)
260
+ set_target_properties(ggml-cpu PROPERTIES COMPILE_FLAGS "-msimd128")
261
+ endif()