@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/lib/binding.ts +3 -1
  2. package/lib/index.js +2 -0
  3. package/lib/index.ts +3 -1
  4. package/package.json +14 -14
  5. package/scripts/llama.cpp.patch +27 -26
  6. package/src/EmbeddingWorker.cpp +1 -1
  7. package/src/LlamaCompletionWorker.cpp +28 -7
  8. package/src/LlamaCompletionWorker.h +4 -0
  9. package/src/LlamaContext.cpp +14 -17
  10. package/src/common.hpp +7 -6
  11. package/src/llama.cpp/CMakeLists.txt +15 -4
  12. package/src/llama.cpp/common/CMakeLists.txt +15 -24
  13. package/src/llama.cpp/common/arg.cpp +172 -110
  14. package/src/llama.cpp/common/chat-parser.cpp +385 -0
  15. package/src/llama.cpp/common/chat-parser.h +120 -0
  16. package/src/llama.cpp/common/chat.cpp +726 -596
  17. package/src/llama.cpp/common/chat.h +74 -8
  18. package/src/llama.cpp/common/common.cpp +56 -38
  19. package/src/llama.cpp/common/common.h +9 -3
  20. package/src/llama.cpp/common/json-partial.cpp +256 -0
  21. package/src/llama.cpp/common/json-partial.h +38 -0
  22. package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
  23. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
  24. package/src/llama.cpp/common/sampling.cpp +7 -8
  25. package/src/llama.cpp/common/speculative.cpp +6 -4
  26. package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
  27. package/src/llama.cpp/ggml/include/ggml.h +22 -3
  28. package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
  29. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
  30. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  31. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
  32. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  33. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  34. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
  35. package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  36. package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  37. package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  39. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  40. package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  41. package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  42. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  43. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
  44. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  45. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
  46. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
  47. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
  48. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
  49. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  50. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  51. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
  52. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  53. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
  54. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
  55. package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  56. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  57. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
  58. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  59. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
  60. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  61. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  62. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
  63. package/src/llama.cpp/include/llama.h +145 -40
  64. package/src/llama.cpp/src/CMakeLists.txt +5 -1
  65. package/src/llama.cpp/src/llama-arch.cpp +99 -3
  66. package/src/llama.cpp/src/llama-arch.h +10 -1
  67. package/src/llama.cpp/src/llama-batch.cpp +728 -272
  68. package/src/llama.cpp/src/llama-batch.h +112 -54
  69. package/src/llama.cpp/src/llama-chat.cpp +19 -2
  70. package/src/llama.cpp/src/llama-chat.h +1 -0
  71. package/src/llama.cpp/src/llama-context.cpp +525 -339
  72. package/src/llama.cpp/src/llama-context.h +38 -17
  73. package/src/llama.cpp/src/llama-cparams.cpp +4 -0
  74. package/src/llama.cpp/src/llama-cparams.h +2 -0
  75. package/src/llama.cpp/src/llama-grammar.cpp +12 -2
  76. package/src/llama.cpp/src/llama-graph.cpp +413 -353
  77. package/src/llama.cpp/src/llama-graph.h +112 -56
  78. package/src/llama.cpp/src/llama-hparams.cpp +10 -2
  79. package/src/llama.cpp/src/llama-hparams.h +13 -2
  80. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
  81. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
  82. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
  83. package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
  84. package/src/llama.cpp/src/llama-kv-cells.h +415 -0
  85. package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
  86. package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
  87. package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
  88. package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
  89. package/src/llama.cpp/src/llama-memory.cpp +41 -0
  90. package/src/llama.cpp/src/llama-memory.h +86 -5
  91. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  92. package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
  93. package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
  94. package/src/llama.cpp/src/llama-model.cpp +1137 -528
  95. package/src/llama.cpp/src/llama-model.h +4 -0
  96. package/src/llama.cpp/src/llama-quant.cpp +2 -1
  97. package/src/llama.cpp/src/llama-sampling.cpp +2 -2
  98. package/src/llama.cpp/src/llama-vocab.cpp +69 -32
  99. package/src/llama.cpp/src/llama-vocab.h +1 -0
  100. package/src/llama.cpp/src/llama.cpp +11 -7
  101. package/src/llama.cpp/src/unicode.cpp +5 -0
  102. package/src/tts_utils.h +1 -1
  103. package/src/llama.cpp/common/json.hpp +0 -24766
  104. package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
  105. package/src/llama.cpp/common/minja/minja.hpp +0 -2974
  106. package/src/llama.cpp/common/stb_image.h +0 -7988
  107. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  108. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
  109. package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
  110. package/src/llama.cpp/src/llama-kv-cache.h +0 -515
  111. /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  112. /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  113. /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -1,3 +1,17 @@
1
+ function(ggml_add_cpu_backend_features cpu_name arch)
2
+ # The feature detection code is compiled as a separate target so that
3
+ # it can be built without the architecture flags
4
+ # Since multiple variants of the CPU backend may be included in the same
5
+ # build, using set_source_files_properties() to set the arch flags is not possible
6
+ set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
7
+ add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
8
+ target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
9
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
10
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
11
+ set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
12
+ target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
13
+ endfunction()
14
+
1
15
  function(ggml_add_cpu_backend_variant_impl tag_name)
2
16
  if (tag_name)
3
17
  set(GGML_CPU_NAME ggml-cpu-${tag_name})
@@ -10,14 +24,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
10
24
  list (APPEND GGML_CPU_SOURCES
11
25
  ggml-cpu/ggml-cpu.c
12
26
  ggml-cpu/ggml-cpu.cpp
13
- ggml-cpu/ggml-cpu-aarch64.cpp
14
- ggml-cpu/ggml-cpu-aarch64.h
15
- ggml-cpu/ggml-cpu-hbm.cpp
16
- ggml-cpu/ggml-cpu-hbm.h
17
- ggml-cpu/ggml-cpu-quants.c
18
- ggml-cpu/ggml-cpu-quants.h
19
- ggml-cpu/ggml-cpu-traits.cpp
20
- ggml-cpu/ggml-cpu-traits.h
27
+ ggml-cpu/repack.cpp
28
+ ggml-cpu/repack.h
29
+ ggml-cpu/hbm.cpp
30
+ ggml-cpu/hbm.h
31
+ ggml-cpu/quants.c
32
+ ggml-cpu/quants.h
33
+ ggml-cpu/traits.cpp
34
+ ggml-cpu/traits.h
21
35
  ggml-cpu/amx/amx.cpp
22
36
  ggml-cpu/amx/amx.h
23
37
  ggml-cpu/amx/mmq.cpp
@@ -82,12 +96,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
82
96
  target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
83
97
  endif()
84
98
 
85
- if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
86
- CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
87
- (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
88
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
89
-
99
+ if (GGML_SYSTEM_ARCH STREQUAL "ARM")
90
100
  message(STATUS "ARM detected")
101
+ list(APPEND GGML_CPU_SOURCES
102
+ ggml-cpu/arch/arm/quants.c
103
+ ggml-cpu/arch/arm/repack.cpp
104
+ )
91
105
 
92
106
  if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
93
107
  list(APPEND ARCH_FLAGS /arch:armv8.7)
@@ -143,6 +157,49 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
143
157
  else()
144
158
  if (GGML_CPU_ARM_ARCH)
145
159
  list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
160
+ elseif(GGML_CPU_ALL_VARIANTS)
161
+ # Begin with the lowest baseline
162
+ set(ARM_MCPU "armv8-a")
163
+ set(ARCH_TAGS "")
164
+ set(ARCH_DEFINITIONS "")
165
+
166
+ # When a feature is selected, bump the MCPU to the first
167
+ # version that supported it
168
+ if (GGML_INTERNAL_DOTPROD)
169
+ set(ARM_MCPU "armv8.2-a")
170
+ set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
171
+ list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
172
+ endif()
173
+ if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
174
+ set(ARM_MCPU "armv8.2-a")
175
+ set(ARCH_TAGS "${ARCH_TAGS}+fp16")
176
+ list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
177
+ endif()
178
+ if (GGML_INTERNAL_SVE)
179
+ set(ARM_MCPU "armv8.2-a")
180
+ set(ARCH_TAGS "${ARCH_TAGS}+sve")
181
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
182
+ endif()
183
+ if (GGML_INTERNAL_MATMUL_INT8)
184
+ set(ARM_MCPU "armv8.6-a")
185
+ set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
186
+ list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
187
+ endif()
188
+ if (GGML_INTERNAL_SVE2)
189
+ set(ARM_MCPU "armv8.6-a")
190
+ set(ARCH_TAGS "${ARCH_TAGS}+sve2")
191
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
192
+ endif()
193
+ if (GGML_INTERNAL_NOSVE)
194
+ set(ARCH_TAGS "${ARCH_TAGS}+nosve")
195
+ endif()
196
+ if (GGML_INTERNAL_SME)
197
+ set(ARM_MCPU "armv9.2-a")
198
+ set(ARCH_TAGS "${ARCH_TAGS}+sme")
199
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
200
+ endif()
201
+ list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
202
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
146
203
  endif()
147
204
  endif()
148
205
 
@@ -170,11 +227,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
170
227
  endforeach()
171
228
  endif()
172
229
  endif()
173
- elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
174
- (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
175
- CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
176
-
230
+ elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
177
231
  message(STATUS "x86 detected")
232
+ list(APPEND GGML_CPU_SOURCES
233
+ ggml-cpu/arch/x86/quants.c
234
+ ggml-cpu/arch/x86/repack.cpp
235
+ )
178
236
 
179
237
  if (MSVC)
180
238
  # instruction set detection for MSVC only
@@ -299,8 +357,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
299
357
  endif()
300
358
  endif()
301
359
  endif()
302
- elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
360
+
361
+ if (GGML_BACKEND_DL)
362
+ if (GGML_NATIVE)
363
+ # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
364
+ message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
365
+ endif()
366
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
367
+ endif()
368
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
303
369
  message(STATUS "PowerPC detected")
370
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
304
371
  if (GGML_NATIVE)
305
372
  if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
306
373
  file(READ "/proc/cpuinfo" POWER10_M)
@@ -308,7 +375,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
308
375
  execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
309
376
  endif()
310
377
 
311
- string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M}")
378
+ string(TOUPPER "${POWER10_M}" POWER10_M_UPPER)
379
+ string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}")
312
380
  string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
313
381
 
314
382
  if (EXTRACTED_NUMBER GREATER_EQUAL 10)
@@ -320,13 +388,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
320
388
  else()
321
389
  list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
322
390
  endif()
391
+ elseif(GGML_CPU_ALL_VARIANTS)
392
+ # Begin with the lowest baseline
393
+ set(ARCH_DEFINITIONS "")
394
+
395
+ # When a feature is selected, bump the MCPU to the first
396
+ # version that supported it
397
+ foreach(PVER RANGE 7 11)
398
+ if(DEFINED GGML_INTERNAL_POWER${PVER})
399
+ set(POWERPC_MCPU "power${PVER}")
400
+ list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
401
+ endif()
402
+ endforeach()
403
+ if (GGML_INTERNAL_VSX)
404
+ list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
405
+ list(APPEND ARCH_FLAGS -mvsx)
406
+ endif()
407
+
408
+ if (DEFINED POWERPC_MCPU)
409
+ list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
410
+ endif()
411
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
323
412
  else()
324
413
  if (GGML_CPU_POWERPC_CPUTYPE)
325
414
  list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
326
415
  endif()
327
416
  endif()
328
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
417
+ elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
329
418
  message(STATUS "loongarch64 detected")
419
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
330
420
 
331
421
  list(APPEND ARCH_FLAGS -march=loongarch64)
332
422
  if (GGML_LASX)
@@ -335,17 +425,24 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
335
425
  if (GGML_LSX)
336
426
  list(APPEND ARCH_FLAGS -mlsx)
337
427
  endif()
338
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
339
- message(STATUS "RISC-V detected")
428
+ elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
429
+ message(STATUS "riscv64 detected")
430
+ list(APPEND GGML_CPU_SOURCES
431
+ ggml-cpu/arch/riscv/quants.c
432
+ ggml-cpu/arch/riscv/repack.cpp
433
+ )
340
434
  if (GGML_RVV)
341
- if (GGML_RV_ZFH)
342
- list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -DGGML_RV_ZFH -mabi=lp64d)
435
+ if (GGML_XTHEADVECTOR)
436
+ list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
437
+ elseif (GGML_RV_ZFH)
438
+ list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -mabi=lp64d)
343
439
  else()
344
440
  list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
345
441
  endif()
346
442
  endif()
347
- elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
443
+ elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
348
444
  message(STATUS "s390x detected")
445
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
349
446
  file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
350
447
  string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
351
448
 
@@ -369,12 +466,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
369
466
  if (GGML_VXE)
370
467
  list(APPEND ARCH_FLAGS -mvx -mzvector)
371
468
  endif()
469
+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
470
+ message(STATUS "Wasm detected")
471
+ list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
372
472
  else()
373
- message(STATUS "Unknown architecture")
473
+ message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
474
+ list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
374
475
  endif()
375
476
 
376
- if (GGML_CPU_AARCH64)
377
- target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
477
+ if (GGML_CPU_REPACK)
478
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
378
479
  endif()
379
480
 
380
481
  if (GGML_CPU_KLEIDIAI)
@@ -385,9 +486,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
385
486
 
386
487
  # Fetch KleidiAI sources:
387
488
  include(FetchContent)
388
- set(KLEIDIAI_COMMIT_TAG "v1.6.0")
489
+ set(KLEIDIAI_COMMIT_TAG "v1.9.0")
389
490
  set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
390
- set(KLEIDIAI_ARCHIVE_MD5 "75b4ad68f25ab673dcc01065e5a0b05f")
491
+ set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017")
391
492
 
392
493
  if (POLICY CMP0135)
393
494
  cmake_policy(SET CMP0135 NEW)
@@ -477,25 +578,6 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
477
578
  target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
478
579
  target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
479
580
 
480
- if (GGML_BACKEND_DL)
481
- if (GGML_NATIVE)
482
- # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
483
- message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
484
- endif()
485
-
486
- # The feature detection code is compiled as a separate target so that
487
- # it can be built without the architecture flags
488
- # Since multiple variants of the CPU backend may be included in the same
489
- # build, using set_source_files_properties() to set the arch flags is not possible
490
- set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
491
- add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
492
- target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
493
- target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
494
- target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
495
- set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
496
- target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
497
- endif()
498
-
499
581
  if (EMSCRIPTEN)
500
582
  set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
501
583
  endif()
@@ -5,7 +5,7 @@
5
5
  #include "ggml-backend.h"
6
6
  #include "ggml-impl.h"
7
7
  #include "ggml-cpu.h"
8
- #include "ggml-cpu-traits.h"
8
+ #include "traits.h"
9
9
 
10
10
  #if defined(__gnu_linux__)
11
11
  #include <sys/syscall.h>
@@ -8,7 +8,7 @@
8
8
  #include "mmq.h"
9
9
  #include "ggml-impl.h"
10
10
  #include "ggml-cpu-impl.h"
11
- #include "ggml-cpu-quants.h"
11
+ #include "quants.h"
12
12
  #include "ggml-quants.h"
13
13
  #include <algorithm>
14
14
  #include <type_traits>
@@ -0,0 +1,94 @@
1
+ #include "ggml-backend-impl.h"
2
+
3
+ #if defined(__aarch64__)
4
+
5
+ #if defined(__linux__)
6
+ #include <sys/auxv.h>
7
+ #elif defined(__APPLE__)
8
+ #include <sys/sysctl.h>
9
+ #endif
10
+
11
+ #if !defined(HWCAP2_I8MM)
12
+ #define HWCAP2_I8MM (1 << 13)
13
+ #endif
14
+
15
+ #if !defined(HWCAP2_SME)
16
+ #define HWCAP2_SME (1 << 23)
17
+ #endif
18
+
19
+ struct aarch64_features {
20
+ // has_neon not needed, aarch64 has NEON guaranteed
21
+ bool has_dotprod = false;
22
+ bool has_fp16_va = false;
23
+ bool has_sve = false;
24
+ bool has_sve2 = false;
25
+ bool has_i8mm = false;
26
+ bool has_sme = false;
27
+
28
+ aarch64_features() {
29
+ #if defined(__linux__)
30
+ uint32_t hwcap = getauxval(AT_HWCAP);
31
+ uint32_t hwcap2 = getauxval(AT_HWCAP2);
32
+
33
+ has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
34
+ has_fp16_va = !!(hwcap & HWCAP_FPHP);
35
+ has_sve = !!(hwcap & HWCAP_SVE);
36
+ has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
37
+ has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
38
+ has_sme = !!(hwcap2 & HWCAP2_SME);
39
+ #elif defined(__APPLE__)
40
+ int oldp = 0;
41
+ size_t size = sizeof(oldp);
42
+
43
+ if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
44
+ has_dotprod = static_cast<bool>(oldp);
45
+ }
46
+
47
+ if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
48
+ has_i8mm = static_cast<bool>(oldp);
49
+ }
50
+
51
+ if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
52
+ has_sme = static_cast<bool>(oldp);
53
+ }
54
+
55
+ // Apple apparently does not implement SVE yet
56
+ #endif
57
+ }
58
+ };
59
+
60
+ static int ggml_backend_cpu_aarch64_score() {
61
+ int score = 1;
62
+ aarch64_features af;
63
+
64
+ #ifdef GGML_USE_DOTPROD
65
+ if (!af.has_dotprod) { return 0; }
66
+ score += 1<<1;
67
+ #endif
68
+ #ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
69
+ if (!af.has_fp16_va) { return 0; }
70
+ score += 1<<2;
71
+ #endif
72
+ #ifdef GGML_USE_SVE
73
+ if (!af.has_sve) { return 0; }
74
+ score += 1<<3;
75
+ #endif
76
+ #ifdef GGML_USE_MATMUL_INT8
77
+ if (!af.has_i8mm) { return 0; }
78
+ score += 1<<4;
79
+ #endif
80
+ #ifdef GGML_USE_SVE2
81
+ if (!af.has_sve2) { return 0; }
82
+ score += 1<<5;
83
+ #endif
84
+ #ifdef GGML_USE_SME
85
+ if (!af.has_sme) { return 0; }
86
+ score += 1<<6;
87
+ #endif
88
+
89
+ return score;
90
+ }
91
+
92
+ GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
93
+
94
+ # endif // defined(__aarch64__)