@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +3 -1
- package/lib/index.js +2 -0
- package/lib/index.ts +3 -1
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +27 -26
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +28 -7
- package/src/LlamaCompletionWorker.h +4 -0
- package/src/LlamaContext.cpp +14 -17
- package/src/common.hpp +7 -6
- package/src/llama.cpp/CMakeLists.txt +15 -4
- package/src/llama.cpp/common/CMakeLists.txt +15 -24
- package/src/llama.cpp/common/arg.cpp +172 -110
- package/src/llama.cpp/common/chat-parser.cpp +385 -0
- package/src/llama.cpp/common/chat-parser.h +120 -0
- package/src/llama.cpp/common/chat.cpp +726 -596
- package/src/llama.cpp/common/chat.h +74 -8
- package/src/llama.cpp/common/common.cpp +56 -38
- package/src/llama.cpp/common/common.h +9 -3
- package/src/llama.cpp/common/json-partial.cpp +256 -0
- package/src/llama.cpp/common/json-partial.h +38 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/src/llama.cpp/common/sampling.cpp +7 -8
- package/src/llama.cpp/common/speculative.cpp +6 -4
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
- package/src/llama.cpp/ggml/include/ggml.h +22 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
- package/src/llama.cpp/include/llama.h +145 -40
- package/src/llama.cpp/src/CMakeLists.txt +5 -1
- package/src/llama.cpp/src/llama-arch.cpp +99 -3
- package/src/llama.cpp/src/llama-arch.h +10 -1
- package/src/llama.cpp/src/llama-batch.cpp +728 -272
- package/src/llama.cpp/src/llama-batch.h +112 -54
- package/src/llama.cpp/src/llama-chat.cpp +19 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +525 -339
- package/src/llama.cpp/src/llama-context.h +38 -17
- package/src/llama.cpp/src/llama-cparams.cpp +4 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-grammar.cpp +12 -2
- package/src/llama.cpp/src/llama-graph.cpp +413 -353
- package/src/llama.cpp/src/llama-graph.h +112 -56
- package/src/llama.cpp/src/llama-hparams.cpp +10 -2
- package/src/llama.cpp/src/llama-hparams.h +13 -2
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
- package/src/llama.cpp/src/llama-kv-cells.h +415 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
- package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
- package/src/llama.cpp/src/llama-memory.cpp +41 -0
- package/src/llama.cpp/src/llama-memory.h +86 -5
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1137 -528
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +69 -32
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +11 -7
- package/src/llama.cpp/src/unicode.cpp +5 -0
- package/src/tts_utils.h +1 -1
- package/src/llama.cpp/common/json.hpp +0 -24766
- package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
- package/src/llama.cpp/common/minja/minja.hpp +0 -2974
- package/src/llama.cpp/common/stb_image.h +0 -7988
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
- package/src/llama.cpp/src/llama-kv-cache.h +0 -515
- /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
function(ggml_add_cpu_backend_features cpu_name arch)
|
|
2
|
+
# The feature detection code is compiled as a separate target so that
|
|
3
|
+
# it can be built without the architecture flags
|
|
4
|
+
# Since multiple variants of the CPU backend may be included in the same
|
|
5
|
+
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
6
|
+
set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
|
|
7
|
+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
|
|
8
|
+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
|
9
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
|
|
10
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
11
|
+
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
12
|
+
target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
|
|
13
|
+
endfunction()
|
|
14
|
+
|
|
1
15
|
function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
2
16
|
if (tag_name)
|
|
3
17
|
set(GGML_CPU_NAME ggml-cpu-${tag_name})
|
|
@@ -10,14 +24,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
10
24
|
list (APPEND GGML_CPU_SOURCES
|
|
11
25
|
ggml-cpu/ggml-cpu.c
|
|
12
26
|
ggml-cpu/ggml-cpu.cpp
|
|
13
|
-
ggml-cpu/
|
|
14
|
-
ggml-cpu/
|
|
15
|
-
ggml-cpu/
|
|
16
|
-
ggml-cpu/
|
|
17
|
-
ggml-cpu/
|
|
18
|
-
ggml-cpu/
|
|
19
|
-
ggml-cpu/
|
|
20
|
-
ggml-cpu/
|
|
27
|
+
ggml-cpu/repack.cpp
|
|
28
|
+
ggml-cpu/repack.h
|
|
29
|
+
ggml-cpu/hbm.cpp
|
|
30
|
+
ggml-cpu/hbm.h
|
|
31
|
+
ggml-cpu/quants.c
|
|
32
|
+
ggml-cpu/quants.h
|
|
33
|
+
ggml-cpu/traits.cpp
|
|
34
|
+
ggml-cpu/traits.h
|
|
21
35
|
ggml-cpu/amx/amx.cpp
|
|
22
36
|
ggml-cpu/amx/amx.h
|
|
23
37
|
ggml-cpu/amx/mmq.cpp
|
|
@@ -82,12 +96,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
82
96
|
target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
|
|
83
97
|
endif()
|
|
84
98
|
|
|
85
|
-
if (
|
|
86
|
-
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
|
87
|
-
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
88
|
-
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
|
89
|
-
|
|
99
|
+
if (GGML_SYSTEM_ARCH STREQUAL "ARM")
|
|
90
100
|
message(STATUS "ARM detected")
|
|
101
|
+
list(APPEND GGML_CPU_SOURCES
|
|
102
|
+
ggml-cpu/arch/arm/quants.c
|
|
103
|
+
ggml-cpu/arch/arm/repack.cpp
|
|
104
|
+
)
|
|
91
105
|
|
|
92
106
|
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
93
107
|
list(APPEND ARCH_FLAGS /arch:armv8.7)
|
|
@@ -143,6 +157,49 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
143
157
|
else()
|
|
144
158
|
if (GGML_CPU_ARM_ARCH)
|
|
145
159
|
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
|
|
160
|
+
elseif(GGML_CPU_ALL_VARIANTS)
|
|
161
|
+
# Begin with the lowest baseline
|
|
162
|
+
set(ARM_MCPU "armv8-a")
|
|
163
|
+
set(ARCH_TAGS "")
|
|
164
|
+
set(ARCH_DEFINITIONS "")
|
|
165
|
+
|
|
166
|
+
# When a feature is selected, bump the MCPU to the first
|
|
167
|
+
# version that supported it
|
|
168
|
+
if (GGML_INTERNAL_DOTPROD)
|
|
169
|
+
set(ARM_MCPU "armv8.2-a")
|
|
170
|
+
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
|
|
171
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
|
|
172
|
+
endif()
|
|
173
|
+
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
|
|
174
|
+
set(ARM_MCPU "armv8.2-a")
|
|
175
|
+
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
|
|
176
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
|
|
177
|
+
endif()
|
|
178
|
+
if (GGML_INTERNAL_SVE)
|
|
179
|
+
set(ARM_MCPU "armv8.2-a")
|
|
180
|
+
set(ARCH_TAGS "${ARCH_TAGS}+sve")
|
|
181
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
|
|
182
|
+
endif()
|
|
183
|
+
if (GGML_INTERNAL_MATMUL_INT8)
|
|
184
|
+
set(ARM_MCPU "armv8.6-a")
|
|
185
|
+
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
|
|
186
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
|
|
187
|
+
endif()
|
|
188
|
+
if (GGML_INTERNAL_SVE2)
|
|
189
|
+
set(ARM_MCPU "armv8.6-a")
|
|
190
|
+
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
|
|
191
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
|
|
192
|
+
endif()
|
|
193
|
+
if (GGML_INTERNAL_NOSVE)
|
|
194
|
+
set(ARCH_TAGS "${ARCH_TAGS}+nosve")
|
|
195
|
+
endif()
|
|
196
|
+
if (GGML_INTERNAL_SME)
|
|
197
|
+
set(ARM_MCPU "armv9.2-a")
|
|
198
|
+
set(ARCH_TAGS "${ARCH_TAGS}+sme")
|
|
199
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
|
|
200
|
+
endif()
|
|
201
|
+
list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
|
|
202
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
|
|
146
203
|
endif()
|
|
147
204
|
endif()
|
|
148
205
|
|
|
@@ -170,11 +227,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
170
227
|
endforeach()
|
|
171
228
|
endif()
|
|
172
229
|
endif()
|
|
173
|
-
elseif (
|
|
174
|
-
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
175
|
-
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
|
|
176
|
-
|
|
230
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
|
|
177
231
|
message(STATUS "x86 detected")
|
|
232
|
+
list(APPEND GGML_CPU_SOURCES
|
|
233
|
+
ggml-cpu/arch/x86/quants.c
|
|
234
|
+
ggml-cpu/arch/x86/repack.cpp
|
|
235
|
+
)
|
|
178
236
|
|
|
179
237
|
if (MSVC)
|
|
180
238
|
# instruction set detection for MSVC only
|
|
@@ -299,8 +357,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
299
357
|
endif()
|
|
300
358
|
endif()
|
|
301
359
|
endif()
|
|
302
|
-
|
|
360
|
+
|
|
361
|
+
if (GGML_BACKEND_DL)
|
|
362
|
+
if (GGML_NATIVE)
|
|
363
|
+
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
|
|
364
|
+
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
|
|
365
|
+
endif()
|
|
366
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
|
|
367
|
+
endif()
|
|
368
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
303
369
|
message(STATUS "PowerPC detected")
|
|
370
|
+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
|
|
304
371
|
if (GGML_NATIVE)
|
|
305
372
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
306
373
|
file(READ "/proc/cpuinfo" POWER10_M)
|
|
@@ -308,7 +375,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
308
375
|
execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
|
|
309
376
|
endif()
|
|
310
377
|
|
|
311
|
-
string(
|
|
378
|
+
string(TOUPPER "${POWER10_M}" POWER10_M_UPPER)
|
|
379
|
+
string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}")
|
|
312
380
|
string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
|
|
313
381
|
|
|
314
382
|
if (EXTRACTED_NUMBER GREATER_EQUAL 10)
|
|
@@ -320,13 +388,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
320
388
|
else()
|
|
321
389
|
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
|
322
390
|
endif()
|
|
391
|
+
elseif(GGML_CPU_ALL_VARIANTS)
|
|
392
|
+
# Begin with the lowest baseline
|
|
393
|
+
set(ARCH_DEFINITIONS "")
|
|
394
|
+
|
|
395
|
+
# When a feature is selected, bump the MCPU to the first
|
|
396
|
+
# version that supported it
|
|
397
|
+
foreach(PVER RANGE 7 11)
|
|
398
|
+
if(DEFINED GGML_INTERNAL_POWER${PVER})
|
|
399
|
+
set(POWERPC_MCPU "power${PVER}")
|
|
400
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
|
|
401
|
+
endif()
|
|
402
|
+
endforeach()
|
|
403
|
+
if (GGML_INTERNAL_VSX)
|
|
404
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
|
|
405
|
+
list(APPEND ARCH_FLAGS -mvsx)
|
|
406
|
+
endif()
|
|
407
|
+
|
|
408
|
+
if (DEFINED POWERPC_MCPU)
|
|
409
|
+
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
|
|
410
|
+
endif()
|
|
411
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
|
|
323
412
|
else()
|
|
324
413
|
if (GGML_CPU_POWERPC_CPUTYPE)
|
|
325
414
|
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
|
326
415
|
endif()
|
|
327
416
|
endif()
|
|
328
|
-
elseif (
|
|
417
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
|
|
329
418
|
message(STATUS "loongarch64 detected")
|
|
419
|
+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
|
|
330
420
|
|
|
331
421
|
list(APPEND ARCH_FLAGS -march=loongarch64)
|
|
332
422
|
if (GGML_LASX)
|
|
@@ -335,17 +425,24 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
335
425
|
if (GGML_LSX)
|
|
336
426
|
list(APPEND ARCH_FLAGS -mlsx)
|
|
337
427
|
endif()
|
|
338
|
-
elseif (
|
|
339
|
-
message(STATUS "
|
|
428
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
|
|
429
|
+
message(STATUS "riscv64 detected")
|
|
430
|
+
list(APPEND GGML_CPU_SOURCES
|
|
431
|
+
ggml-cpu/arch/riscv/quants.c
|
|
432
|
+
ggml-cpu/arch/riscv/repack.cpp
|
|
433
|
+
)
|
|
340
434
|
if (GGML_RVV)
|
|
341
|
-
if (
|
|
342
|
-
list(APPEND ARCH_FLAGS -march=
|
|
435
|
+
if (GGML_XTHEADVECTOR)
|
|
436
|
+
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
|
|
437
|
+
elseif (GGML_RV_ZFH)
|
|
438
|
+
list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -mabi=lp64d)
|
|
343
439
|
else()
|
|
344
440
|
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
|
|
345
441
|
endif()
|
|
346
442
|
endif()
|
|
347
|
-
elseif (
|
|
443
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
348
444
|
message(STATUS "s390x detected")
|
|
445
|
+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
|
|
349
446
|
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
|
|
350
447
|
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
|
|
351
448
|
|
|
@@ -369,12 +466,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
369
466
|
if (GGML_VXE)
|
|
370
467
|
list(APPEND ARCH_FLAGS -mvx -mzvector)
|
|
371
468
|
endif()
|
|
469
|
+
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
|
|
470
|
+
message(STATUS "Wasm detected")
|
|
471
|
+
list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
|
|
372
472
|
else()
|
|
373
|
-
message(
|
|
473
|
+
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
|
|
474
|
+
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
|
|
374
475
|
endif()
|
|
375
476
|
|
|
376
|
-
if (
|
|
377
|
-
target_compile_definitions(${GGML_CPU_NAME} PRIVATE
|
|
477
|
+
if (GGML_CPU_REPACK)
|
|
478
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
|
|
378
479
|
endif()
|
|
379
480
|
|
|
380
481
|
if (GGML_CPU_KLEIDIAI)
|
|
@@ -385,9 +486,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
385
486
|
|
|
386
487
|
# Fetch KleidiAI sources:
|
|
387
488
|
include(FetchContent)
|
|
388
|
-
set(KLEIDIAI_COMMIT_TAG "v1.
|
|
489
|
+
set(KLEIDIAI_COMMIT_TAG "v1.9.0")
|
|
389
490
|
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
|
|
390
|
-
set(KLEIDIAI_ARCHIVE_MD5 "
|
|
491
|
+
set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017")
|
|
391
492
|
|
|
392
493
|
if (POLICY CMP0135)
|
|
393
494
|
cmake_policy(SET CMP0135 NEW)
|
|
@@ -477,25 +578,6 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
477
578
|
target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
|
|
478
579
|
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
479
580
|
|
|
480
|
-
if (GGML_BACKEND_DL)
|
|
481
|
-
if (GGML_NATIVE)
|
|
482
|
-
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
|
|
483
|
-
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
|
|
484
|
-
endif()
|
|
485
|
-
|
|
486
|
-
# The feature detection code is compiled as a separate target so that
|
|
487
|
-
# it can be built without the architecture flags
|
|
488
|
-
# Since multiple variants of the CPU backend may be included in the same
|
|
489
|
-
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
490
|
-
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
|
|
491
|
-
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
|
|
492
|
-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
|
493
|
-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
494
|
-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
495
|
-
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
496
|
-
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
|
|
497
|
-
endif()
|
|
498
|
-
|
|
499
581
|
if (EMSCRIPTEN)
|
|
500
582
|
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
501
583
|
endif()
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#include "ggml-backend-impl.h"
|
|
2
|
+
|
|
3
|
+
#if defined(__aarch64__)
|
|
4
|
+
|
|
5
|
+
#if defined(__linux__)
|
|
6
|
+
#include <sys/auxv.h>
|
|
7
|
+
#elif defined(__APPLE__)
|
|
8
|
+
#include <sys/sysctl.h>
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#if !defined(HWCAP2_I8MM)
|
|
12
|
+
#define HWCAP2_I8MM (1 << 13)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#if !defined(HWCAP2_SME)
|
|
16
|
+
#define HWCAP2_SME (1 << 23)
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
struct aarch64_features {
|
|
20
|
+
// has_neon not needed, aarch64 has NEON guaranteed
|
|
21
|
+
bool has_dotprod = false;
|
|
22
|
+
bool has_fp16_va = false;
|
|
23
|
+
bool has_sve = false;
|
|
24
|
+
bool has_sve2 = false;
|
|
25
|
+
bool has_i8mm = false;
|
|
26
|
+
bool has_sme = false;
|
|
27
|
+
|
|
28
|
+
aarch64_features() {
|
|
29
|
+
#if defined(__linux__)
|
|
30
|
+
uint32_t hwcap = getauxval(AT_HWCAP);
|
|
31
|
+
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
|
32
|
+
|
|
33
|
+
has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
|
34
|
+
has_fp16_va = !!(hwcap & HWCAP_FPHP);
|
|
35
|
+
has_sve = !!(hwcap & HWCAP_SVE);
|
|
36
|
+
has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
|
|
37
|
+
has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
|
38
|
+
has_sme = !!(hwcap2 & HWCAP2_SME);
|
|
39
|
+
#elif defined(__APPLE__)
|
|
40
|
+
int oldp = 0;
|
|
41
|
+
size_t size = sizeof(oldp);
|
|
42
|
+
|
|
43
|
+
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
|
|
44
|
+
has_dotprod = static_cast<bool>(oldp);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
|
|
48
|
+
has_i8mm = static_cast<bool>(oldp);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
|
|
52
|
+
has_sme = static_cast<bool>(oldp);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Apple apparently does not implement SVE yet
|
|
56
|
+
#endif
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
static int ggml_backend_cpu_aarch64_score() {
|
|
61
|
+
int score = 1;
|
|
62
|
+
aarch64_features af;
|
|
63
|
+
|
|
64
|
+
#ifdef GGML_USE_DOTPROD
|
|
65
|
+
if (!af.has_dotprod) { return 0; }
|
|
66
|
+
score += 1<<1;
|
|
67
|
+
#endif
|
|
68
|
+
#ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
|
|
69
|
+
if (!af.has_fp16_va) { return 0; }
|
|
70
|
+
score += 1<<2;
|
|
71
|
+
#endif
|
|
72
|
+
#ifdef GGML_USE_SVE
|
|
73
|
+
if (!af.has_sve) { return 0; }
|
|
74
|
+
score += 1<<3;
|
|
75
|
+
#endif
|
|
76
|
+
#ifdef GGML_USE_MATMUL_INT8
|
|
77
|
+
if (!af.has_i8mm) { return 0; }
|
|
78
|
+
score += 1<<4;
|
|
79
|
+
#endif
|
|
80
|
+
#ifdef GGML_USE_SVE2
|
|
81
|
+
if (!af.has_sve2) { return 0; }
|
|
82
|
+
score += 1<<5;
|
|
83
|
+
#endif
|
|
84
|
+
#ifdef GGML_USE_SME
|
|
85
|
+
if (!af.has_sme) { return 0; }
|
|
86
|
+
score += 1<<6;
|
|
87
|
+
#endif
|
|
88
|
+
|
|
89
|
+
return score;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
|
|
93
|
+
|
|
94
|
+
# endif // defined(__aarch64__)
|