@novastera-oss/llamarn 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +8 -3
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +56 -22
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/common/arg.cpp +7 -0
- package/cpp/llama.cpp/common/common.cpp +3 -0
- package/cpp/llama.cpp/common/common.h +1 -0
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/convert_hf_to_gguf.py +118 -20
- package/cpp/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +83 -102
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +192 -67
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +211 -33
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +54 -29
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +84 -31
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +227 -41
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +362 -182
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +240 -535
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +45 -54
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +57 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +69 -13
- package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +76 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +21 -0
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +64 -0
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +8 -3
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +55 -0
- package/cpp/llama.cpp/src/llama-arch.h +18 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +570 -359
- package/cpp/llama.cpp/src/llama-batch.h +98 -70
- package/cpp/llama.cpp/src/llama-chat.cpp +11 -6
- package/cpp/llama.cpp/src/llama-context.cpp +101 -107
- package/cpp/llama.cpp/src/llama-context.h +13 -13
- package/cpp/llama.cpp/src/llama-graph.cpp +199 -252
- package/cpp/llama.cpp/src/llama-graph.h +44 -32
- package/cpp/llama.cpp/src/llama-hparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-hparams.h +8 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +51 -53
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +19 -24
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +110 -104
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +17 -22
- package/cpp/llama.cpp/src/llama-kv-cells.h +35 -11
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +66 -67
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +16 -21
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +69 -68
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
- package/cpp/llama.cpp/src/llama-memory.h +18 -22
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1006 -472
- package/cpp/llama.cpp/src/llama-model.h +22 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +87 -5
- package/cpp/llama.cpp/src/llama-vocab.cpp +26 -3
- package/cpp/llama.cpp/src/llama-vocab.h +1 -0
- package/cpp/rn-utils.h +3 -0
- package/ios/include/common.h +1 -0
- package/ios/include/llama.h +8 -3
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3744
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4863
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4834
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3742
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4900
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4871
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3773
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -3
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
|
@@ -470,6 +470,7 @@ extern "C" {
|
|
|
470
470
|
GGML_OP_TRANSPOSE,
|
|
471
471
|
GGML_OP_GET_ROWS,
|
|
472
472
|
GGML_OP_GET_ROWS_BACK,
|
|
473
|
+
GGML_OP_SET_ROWS,
|
|
473
474
|
GGML_OP_DIAG,
|
|
474
475
|
GGML_OP_DIAG_MASK_INF,
|
|
475
476
|
GGML_OP_DIAG_MASK_ZERO,
|
|
@@ -489,6 +490,7 @@ extern "C" {
|
|
|
489
490
|
GGML_OP_UPSCALE, // nearest interpolate
|
|
490
491
|
GGML_OP_PAD,
|
|
491
492
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
|
+
GGML_OP_ROLL,
|
|
492
494
|
GGML_OP_ARANGE,
|
|
493
495
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
494
496
|
GGML_OP_ARGSORT,
|
|
@@ -686,6 +688,9 @@ extern "C" {
|
|
|
686
688
|
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
|
687
689
|
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
|
688
690
|
|
|
691
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
|
692
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
|
693
|
+
|
|
689
694
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
690
695
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
691
696
|
|
|
@@ -1374,6 +1379,23 @@ extern "C" {
|
|
|
1374
1379
|
struct ggml_tensor * b, // row indices
|
|
1375
1380
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
|
1376
1381
|
|
|
1382
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
|
1383
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
|
1384
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
|
1385
|
+
//
|
|
1386
|
+
// undefined behavior if destination rows overlap
|
|
1387
|
+
//
|
|
1388
|
+
// broadcast:
|
|
1389
|
+
// ne2 % ne11 == 0
|
|
1390
|
+
// ne3 % ne12 == 0
|
|
1391
|
+
//
|
|
1392
|
+
// return view(a)
|
|
1393
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
|
1394
|
+
struct ggml_context * ctx,
|
|
1395
|
+
struct ggml_tensor * a, // destination
|
|
1396
|
+
struct ggml_tensor * b, // source
|
|
1397
|
+
struct ggml_tensor * c); // row indices
|
|
1398
|
+
|
|
1377
1399
|
GGML_API struct ggml_tensor * ggml_diag(
|
|
1378
1400
|
struct ggml_context * ctx,
|
|
1379
1401
|
struct ggml_tensor * a);
|
|
@@ -1801,6 +1823,17 @@ extern "C" {
|
|
|
1801
1823
|
int p0,
|
|
1802
1824
|
int p1);
|
|
1803
1825
|
|
|
1826
|
+
// Move tensor elements by an offset given for each dimension. Elements that
|
|
1827
|
+
// are shifted beyond the last position are wrapped around to the beginning.
|
|
1828
|
+
GGML_API struct ggml_tensor * ggml_roll(
|
|
1829
|
+
struct ggml_context * ctx,
|
|
1830
|
+
struct ggml_tensor * a,
|
|
1831
|
+
int shift0,
|
|
1832
|
+
int shift1,
|
|
1833
|
+
int shift2,
|
|
1834
|
+
int shift3);
|
|
1835
|
+
|
|
1836
|
+
|
|
1804
1837
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1805
1838
|
// timesteps: [N,]
|
|
1806
1839
|
// return: [N, dim]
|
|
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
286
286
|
foreach (feat ${ARGN})
|
|
287
287
|
set(GGML_INTERNAL_${feat} ON)
|
|
288
288
|
endforeach()
|
|
289
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
290
|
+
foreach (feat ${ARGN})
|
|
291
|
+
set(GGML_INTERNAL_${feat} ON)
|
|
292
|
+
endforeach()
|
|
289
293
|
endif()
|
|
290
294
|
|
|
291
295
|
ggml_add_cpu_backend_variant_impl(${tag_name})
|
|
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
337
341
|
else()
|
|
338
342
|
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
|
|
339
343
|
endif()
|
|
344
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
345
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
346
|
+
ggml_add_cpu_backend_variant(power0)
|
|
347
|
+
ggml_add_cpu_backend_variant(power7_1 POWER7)
|
|
348
|
+
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
|
|
349
|
+
ggml_add_cpu_backend_variant(power8_1 POWER8)
|
|
350
|
+
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
|
|
351
|
+
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
|
|
352
|
+
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
|
|
353
|
+
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
|
|
354
|
+
else()
|
|
355
|
+
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
|
|
356
|
+
endif()
|
|
340
357
|
else()
|
|
341
358
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
|
342
359
|
endif()
|
|
@@ -359,7 +359,7 @@ struct ggml_backend_cann_context {
|
|
|
359
359
|
ggml_cann_set_device(device);
|
|
360
360
|
description = aclrtGetSocName();
|
|
361
361
|
|
|
362
|
-
|
|
362
|
+
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
|
|
363
363
|
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
|
|
364
364
|
device, async_mode ? "ON" : "OFF");
|
|
365
365
|
}
|
|
@@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
388
388
|
else()
|
|
389
389
|
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
|
|
390
390
|
endif()
|
|
391
|
+
elseif(GGML_CPU_ALL_VARIANTS)
|
|
392
|
+
# Begin with the lowest baseline
|
|
393
|
+
set(ARCH_DEFINITIONS "")
|
|
394
|
+
|
|
395
|
+
# When a feature is selected, bump the MCPU to the first
|
|
396
|
+
# version that supported it
|
|
397
|
+
foreach(PVER RANGE 7 11)
|
|
398
|
+
if(DEFINED GGML_INTERNAL_POWER${PVER})
|
|
399
|
+
set(POWERPC_MCPU "power${PVER}")
|
|
400
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
|
|
401
|
+
endif()
|
|
402
|
+
endforeach()
|
|
403
|
+
if (GGML_INTERNAL_VSX)
|
|
404
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
|
|
405
|
+
list(APPEND ARCH_FLAGS -mvsx)
|
|
406
|
+
endif()
|
|
407
|
+
|
|
408
|
+
if (DEFINED POWERPC_MCPU)
|
|
409
|
+
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
|
|
410
|
+
endif()
|
|
411
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
|
|
391
412
|
else()
|
|
392
413
|
if (GGML_CPU_POWERPC_CPUTYPE)
|
|
393
414
|
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
|
@@ -427,6 +448,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
427
448
|
|
|
428
449
|
# TODO: Separation to determine activation of VX/VXE/VXE2
|
|
429
450
|
if (${S390X_M} MATCHES "8561|8562")
|
|
451
|
+
set(GGML_NNPA OFF)
|
|
430
452
|
message(STATUS "z15 target")
|
|
431
453
|
list(APPEND ARCH_FLAGS -march=z15)
|
|
432
454
|
elseif (${S390X_M} MATCHES "3931")
|
|
@@ -443,7 +465,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
443
465
|
endif()
|
|
444
466
|
|
|
445
467
|
if (GGML_VXE)
|
|
468
|
+
message(STATUS "VX/VXE/VXE2 enabled")
|
|
446
469
|
list(APPEND ARCH_FLAGS -mvx -mzvector)
|
|
470
|
+
list(APPEND ARCH_DEFINITIONS GGML_VXE)
|
|
471
|
+
endif()
|
|
472
|
+
|
|
473
|
+
if (GGML_NNPA)
|
|
474
|
+
message(STATUS "NNPA enabled")
|
|
475
|
+
list(APPEND ARCH_DEFINITIONS GGML_NNPA)
|
|
447
476
|
endif()
|
|
448
477
|
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
|
|
449
478
|
message(STATUS "Wasm detected")
|
|
@@ -465,9 +494,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
465
494
|
|
|
466
495
|
# Fetch KleidiAI sources:
|
|
467
496
|
include(FetchContent)
|
|
468
|
-
set(KLEIDIAI_COMMIT_TAG "v1.
|
|
497
|
+
set(KLEIDIAI_COMMIT_TAG "v1.9.0")
|
|
469
498
|
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
|
|
470
|
-
set(KLEIDIAI_ARCHIVE_MD5 "
|
|
499
|
+
set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017")
|
|
471
500
|
|
|
472
501
|
if (POLICY CMP0135)
|
|
473
502
|
cmake_policy(SET CMP0135 NEW)
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include "mmq.h"
|
|
9
9
|
#include "ggml-impl.h"
|
|
10
10
|
#include "ggml-cpu-impl.h"
|
|
11
|
+
#include "simd-mappings.h"
|
|
11
12
|
#include "quants.h"
|
|
12
13
|
#include "ggml-quants.h"
|
|
13
14
|
#include <algorithm>
|
|
@@ -453,7 +454,7 @@ void quantize_row_q8_K_vnni(const float * RESTRICT x, void * RESTRICT vy, int64_
|
|
|
453
454
|
|
|
454
455
|
// Quantize these floats
|
|
455
456
|
const float iscale = 127.f / amax;
|
|
456
|
-
y[i].d =
|
|
457
|
+
y[i].d = GGML_CPU_FP32_TO_FP16(1 / iscale);
|
|
457
458
|
const float id = ( amax != 0.0f ) ? iscale : 0.f;
|
|
458
459
|
const __m512 vscale = _mm512_set1_ps(id);
|
|
459
460
|
|
|
@@ -1090,7 +1091,7 @@ struct acc_C<block_q8_0, block_q4_0, is_acc> {
|
|
|
1090
1091
|
const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
|
|
1091
1092
|
|
|
1092
1093
|
for (int m = 0; m < nr; ++m) {
|
|
1093
|
-
const __m512 vd1 = _mm512_set1_ps(
|
|
1094
|
+
const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d));
|
|
1094
1095
|
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
|
|
1095
1096
|
|
|
1096
1097
|
__m512 vsum;
|
|
@@ -1113,8 +1114,8 @@ struct acc_C<block_q8_1, block_q4_1, is_acc> {
|
|
|
1113
1114
|
const __m512 vm0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset + TILE_N * sizeof(ggml_half))));
|
|
1114
1115
|
|
|
1115
1116
|
for (int m = 0; m < nr; ++m) {
|
|
1116
|
-
const __m512 vd1 = _mm512_set1_ps(
|
|
1117
|
-
const __m512 vs1 = _mm512_set1_ps(
|
|
1117
|
+
const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d));
|
|
1118
|
+
const __m512 vs1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].s));
|
|
1118
1119
|
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
|
|
1119
1120
|
|
|
1120
1121
|
__m512 vsum;
|
|
@@ -1137,7 +1138,7 @@ struct acc_C<block_q8_0, block_q8_0, is_acc> {
|
|
|
1137
1138
|
const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
|
|
1138
1139
|
|
|
1139
1140
|
for (int m = 0; m < nr; ++m) {
|
|
1140
|
-
const __m512 vd1 = _mm512_set1_ps(
|
|
1141
|
+
const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d));
|
|
1141
1142
|
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
|
|
1142
1143
|
|
|
1143
1144
|
__m512 vsum;
|
|
@@ -1437,7 +1438,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLO
|
|
|
1437
1438
|
va[k] = _mm512_set1_epi32(a_ptr[k]);
|
|
1438
1439
|
vcomp = _mm512_dpbusd_epi32(vcomp, off, va[k]);
|
|
1439
1440
|
}
|
|
1440
|
-
vd1 = _mm512_set1_ps(
|
|
1441
|
+
vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
|
|
1441
1442
|
}
|
|
1442
1443
|
|
|
1443
1444
|
// load b
|
|
@@ -1498,8 +1499,8 @@ struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K>
|
|
|
1498
1499
|
for (int k = 0; k < 8; ++k) {
|
|
1499
1500
|
va[k] = _mm512_set1_epi32(a_ptr[k]);
|
|
1500
1501
|
}
|
|
1501
|
-
vd1 = _mm512_set1_ps(
|
|
1502
|
-
vs1 = _mm512_set1_ps(
|
|
1502
|
+
vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
|
|
1503
|
+
vs1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].s));
|
|
1503
1504
|
}
|
|
1504
1505
|
|
|
1505
1506
|
// load b
|
|
@@ -1571,7 +1572,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLO
|
|
|
1571
1572
|
va[k] = _mm512_set1_epi32(a_ptr[k]);
|
|
1572
1573
|
va[k] = _mm512_add_epi8(va[k], off);
|
|
1573
1574
|
}
|
|
1574
|
-
vd1 = _mm512_set1_ps(
|
|
1575
|
+
vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
|
|
1575
1576
|
}
|
|
1576
1577
|
|
|
1577
1578
|
// load b
|