@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -271,6 +271,14 @@ static std::string var_to_str(ggml_op_pool pool) {
|
|
|
271
271
|
}
|
|
272
272
|
}
|
|
273
273
|
|
|
274
|
+
static std::string var_to_str(ggml_scale_mode mode) {
|
|
275
|
+
switch (mode) {
|
|
276
|
+
case GGML_SCALE_MODE_NEAREST: return "nearest";
|
|
277
|
+
case GGML_SCALE_MODE_BILINEAR: return "bilinear";
|
|
278
|
+
default: return std::to_string(mode);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
274
282
|
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
|
275
283
|
|
|
276
284
|
#define VARS_TO_STR1(a) VAR_TO_STR(a)
|
|
@@ -815,7 +823,7 @@ struct test_case {
|
|
|
815
823
|
|
|
816
824
|
ggml_build_forward_expand(gf, out);
|
|
817
825
|
ggml_graph_cpy(gf, gb);
|
|
818
|
-
ggml_build_backward_expand(ctx.get(),
|
|
826
|
+
ggml_build_backward_expand(ctx.get(), gb, nullptr);
|
|
819
827
|
if (expect.size() != 1 || expect[0] != 0.0f) {
|
|
820
828
|
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
|
|
821
829
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
|
@@ -1018,7 +1026,7 @@ struct test_example : public test_case {
|
|
|
1018
1026
|
// Step 3: return the output tensor.
|
|
1019
1027
|
return out;
|
|
1020
1028
|
}
|
|
1021
|
-
// In order to also check the gradients for your op, add calls like ggml_set_param(
|
|
1029
|
+
// In order to also check the gradients for your op, add calls like ggml_set_param(a)
|
|
1022
1030
|
// immediately after you create the tensors.
|
|
1023
1031
|
// This is optional and only makes sense if a backward pass has actually been implemented for the new op.
|
|
1024
1032
|
};
|
|
@@ -1050,7 +1058,7 @@ struct test_unary : public test_case {
|
|
|
1050
1058
|
auto ne = ne_a; ne[0] *= 3;
|
|
1051
1059
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1052
1060
|
if (grad_supported) {
|
|
1053
|
-
ggml_set_param(
|
|
1061
|
+
ggml_set_param(a);
|
|
1054
1062
|
}
|
|
1055
1063
|
ggml_set_name(a, "a");
|
|
1056
1064
|
|
|
@@ -1059,7 +1067,7 @@ struct test_unary : public test_case {
|
|
|
1059
1067
|
} else {
|
|
1060
1068
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
1061
1069
|
if (grad_supported) {
|
|
1062
|
-
ggml_set_param(
|
|
1070
|
+
ggml_set_param(a);
|
|
1063
1071
|
}
|
|
1064
1072
|
ggml_set_name(a, "a");
|
|
1065
1073
|
}
|
|
@@ -1125,7 +1133,7 @@ struct test_get_rows : public test_case {
|
|
|
1125
1133
|
|
|
1126
1134
|
const bool grad_supported = ggml_is_matrix(in) && ggml_is_vector(rows);
|
|
1127
1135
|
if (grad_supported) {
|
|
1128
|
-
ggml_set_param(
|
|
1136
|
+
ggml_set_param(in);
|
|
1129
1137
|
// rows is a constant input -> no gradients
|
|
1130
1138
|
}
|
|
1131
1139
|
|
|
@@ -1314,7 +1322,7 @@ struct test_repeat : public test_case {
|
|
|
1314
1322
|
ggml_set_name(target, "target");
|
|
1315
1323
|
|
|
1316
1324
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1317
|
-
ggml_set_param(
|
|
1325
|
+
ggml_set_param(src);
|
|
1318
1326
|
ggml_set_name(src, "src");
|
|
1319
1327
|
|
|
1320
1328
|
ggml_tensor * out = ggml_repeat(ctx, src, target);
|
|
@@ -1398,7 +1406,7 @@ struct test_dup : public test_case {
|
|
|
1398
1406
|
|
|
1399
1407
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1400
1408
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1401
|
-
ggml_set_param(
|
|
1409
|
+
ggml_set_param(src);
|
|
1402
1410
|
ggml_set_name(src, "src");
|
|
1403
1411
|
|
|
1404
1412
|
if (_use_permute) {
|
|
@@ -1434,7 +1442,7 @@ struct test_set : public test_case {
|
|
|
1434
1442
|
|
|
1435
1443
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1436
1444
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1437
|
-
ggml_set_param(
|
|
1445
|
+
ggml_set_param(src);
|
|
1438
1446
|
ggml_set_name(src, "src");
|
|
1439
1447
|
|
|
1440
1448
|
auto ne_dst = ne;
|
|
@@ -1442,7 +1450,7 @@ struct test_set : public test_case {
|
|
|
1442
1450
|
ne_dst[i] *= 2;
|
|
1443
1451
|
}
|
|
1444
1452
|
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
|
1445
|
-
ggml_set_param(
|
|
1453
|
+
ggml_set_param(dst);
|
|
1446
1454
|
ggml_set_name(dst, "dst");
|
|
1447
1455
|
|
|
1448
1456
|
size_t offset = 0;
|
|
@@ -1490,7 +1498,7 @@ struct test_cpy : public test_case {
|
|
|
1490
1498
|
|
|
1491
1499
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1492
1500
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
1493
|
-
ggml_set_param(
|
|
1501
|
+
ggml_set_param(src);
|
|
1494
1502
|
ggml_set_name(src, "src");
|
|
1495
1503
|
|
|
1496
1504
|
if (_src_use_permute) {
|
|
@@ -1528,7 +1536,7 @@ struct test_cont : public test_case {
|
|
|
1528
1536
|
|
|
1529
1537
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1530
1538
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1531
|
-
ggml_set_param(
|
|
1539
|
+
ggml_set_param(src);
|
|
1532
1540
|
ggml_set_name(src, "src");
|
|
1533
1541
|
|
|
1534
1542
|
src = ggml_transpose(ctx, src);
|
|
@@ -1575,8 +1583,8 @@ struct test_bin_bcast : public test_case {
|
|
|
1575
1583
|
// The backward pass supports broadcasting only for GGML_ADD:
|
|
1576
1584
|
const bool grad_supported = op == ggml_add || ggml_are_same_shape(a, b);
|
|
1577
1585
|
if (grad_supported) {
|
|
1578
|
-
ggml_set_param(
|
|
1579
|
-
ggml_set_param(
|
|
1586
|
+
ggml_set_param(a);
|
|
1587
|
+
ggml_set_param(b);
|
|
1580
1588
|
}
|
|
1581
1589
|
|
|
1582
1590
|
ggml_tensor * out = op(ctx, a, b);
|
|
@@ -1624,11 +1632,11 @@ struct test_add1 : public test_case {
|
|
|
1624
1632
|
|
|
1625
1633
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1626
1634
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1627
|
-
ggml_set_param(
|
|
1635
|
+
ggml_set_param(a);
|
|
1628
1636
|
ggml_set_name(a, "a");
|
|
1629
1637
|
|
|
1630
1638
|
ggml_tensor * b = ggml_new_tensor_1d(ctx, type, 1);
|
|
1631
|
-
// ggml_set_param(
|
|
1639
|
+
// ggml_set_param(b); // TODO: implement
|
|
1632
1640
|
ggml_set_name(b, "b");
|
|
1633
1641
|
|
|
1634
1642
|
ggml_tensor * out = ggml_add1(ctx, a, b);
|
|
@@ -1659,7 +1667,7 @@ struct test_scale : public test_case {
|
|
|
1659
1667
|
|
|
1660
1668
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1661
1669
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1662
|
-
ggml_set_param(
|
|
1670
|
+
ggml_set_param(a);
|
|
1663
1671
|
ggml_set_name(a, "a");
|
|
1664
1672
|
|
|
1665
1673
|
ggml_tensor * out = ggml_scale(ctx, a, scale);
|
|
@@ -1754,7 +1762,7 @@ struct test_rms_norm : public test_case {
|
|
|
1754
1762
|
|
|
1755
1763
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
1756
1764
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1757
|
-
ggml_set_param(
|
|
1765
|
+
ggml_set_param(a);
|
|
1758
1766
|
ggml_set_name(a, "a");
|
|
1759
1767
|
|
|
1760
1768
|
if (v) {
|
|
@@ -1973,7 +1981,7 @@ struct test_mul_mat : public test_case {
|
|
|
1973
1981
|
const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
1974
1982
|
const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
|
1975
1983
|
const std::array<int64_t, 4> per; // permutation of dimensions
|
|
1976
|
-
const bool v; // whether a
|
|
1984
|
+
const bool v; // whether a and b are non-contiguous views
|
|
1977
1985
|
|
|
1978
1986
|
std::string vars() override {
|
|
1979
1987
|
return VARS_TO_STR9(type_a, type_b, m, n, k, bs, nr, per, v);
|
|
@@ -2020,9 +2028,9 @@ struct test_mul_mat : public test_case {
|
|
|
2020
2028
|
b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
|
|
2021
2029
|
if (!ggml_is_quantized(type_a)) {
|
|
2022
2030
|
if (bs[1] == 1 && nr[1] == 1) {
|
|
2023
|
-
ggml_set_param(
|
|
2031
|
+
ggml_set_param(a);
|
|
2024
2032
|
}
|
|
2025
|
-
ggml_set_param(
|
|
2033
|
+
ggml_set_param(b);
|
|
2026
2034
|
}
|
|
2027
2035
|
ggml_set_name(a, "a");
|
|
2028
2036
|
ggml_set_name(b, "b");
|
|
@@ -2032,19 +2040,29 @@ struct test_mul_mat : public test_case {
|
|
|
2032
2040
|
ggml_set_name(a, "a_permuted");
|
|
2033
2041
|
ggml_set_name(b, "b_permuted");
|
|
2034
2042
|
} else {
|
|
2035
|
-
|
|
2036
2043
|
if (v) {
|
|
2037
|
-
a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0],
|
|
2038
|
-
|
|
2044
|
+
a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0], bs[1]);
|
|
2045
|
+
b = ggml_new_tensor_4d(ctx, type_b, k*2, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
2046
|
+
|
|
2047
|
+
if (!ggml_is_quantized(type_a)) {
|
|
2048
|
+
if (bs[1] == 1 && nr[1] == 1) {
|
|
2049
|
+
ggml_set_param(a);
|
|
2050
|
+
}
|
|
2051
|
+
ggml_set_param(b);
|
|
2052
|
+
}
|
|
2053
|
+
|
|
2054
|
+
a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
2055
|
+
b = ggml_view_4d(ctx, b, k, n, bs[0]*nr[0], bs[1]*nr[1], b->nb[1], b->nb[2], b->nb[3], 0);
|
|
2039
2056
|
} else {
|
|
2040
2057
|
a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2058
|
+
b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
2059
|
+
|
|
2060
|
+
if (!ggml_is_quantized(type_a)) {
|
|
2061
|
+
if (bs[1] == 1 && nr[1] == 1) {
|
|
2062
|
+
ggml_set_param(a);
|
|
2063
|
+
}
|
|
2064
|
+
ggml_set_param(b);
|
|
2046
2065
|
}
|
|
2047
|
-
ggml_set_param(ctx, b);
|
|
2048
2066
|
}
|
|
2049
2067
|
ggml_set_name(a, "a");
|
|
2050
2068
|
ggml_set_name(b, "b");
|
|
@@ -2063,7 +2081,7 @@ struct test_mul_mat_id : public test_case {
|
|
|
2063
2081
|
const ggml_type type_b;
|
|
2064
2082
|
const int n_mats;
|
|
2065
2083
|
const int n_used;
|
|
2066
|
-
const bool b; //
|
|
2084
|
+
const bool b; // broadcast b matrix
|
|
2067
2085
|
const int64_t m;
|
|
2068
2086
|
const int64_t n;
|
|
2069
2087
|
const int64_t k;
|
|
@@ -2193,7 +2211,7 @@ struct test_sqr : public test_case {
|
|
|
2193
2211
|
|
|
2194
2212
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2195
2213
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2196
|
-
ggml_set_param(
|
|
2214
|
+
ggml_set_param(a);
|
|
2197
2215
|
ggml_set_name(a, "a");
|
|
2198
2216
|
|
|
2199
2217
|
ggml_tensor * out = ggml_sqr(ctx, a);
|
|
@@ -2222,7 +2240,7 @@ struct test_sqrt : public test_case {
|
|
|
2222
2240
|
|
|
2223
2241
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2224
2242
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2225
|
-
ggml_set_param(
|
|
2243
|
+
ggml_set_param(a);
|
|
2226
2244
|
ggml_set_name(a, "a");
|
|
2227
2245
|
|
|
2228
2246
|
ggml_tensor * out = ggml_sqrt(ctx, a);
|
|
@@ -2262,7 +2280,7 @@ struct test_log : public test_case {
|
|
|
2262
2280
|
|
|
2263
2281
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2264
2282
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2265
|
-
ggml_set_param(
|
|
2283
|
+
ggml_set_param(a);
|
|
2266
2284
|
ggml_set_name(a, "a");
|
|
2267
2285
|
|
|
2268
2286
|
ggml_tensor * out = ggml_log(ctx, a);
|
|
@@ -2298,7 +2316,7 @@ struct test_sin : public test_case {
|
|
|
2298
2316
|
|
|
2299
2317
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2300
2318
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2301
|
-
ggml_set_param(
|
|
2319
|
+
ggml_set_param(a);
|
|
2302
2320
|
ggml_set_name(a, "a");
|
|
2303
2321
|
|
|
2304
2322
|
ggml_tensor * out = ggml_sin(ctx, a);
|
|
@@ -2341,7 +2359,7 @@ struct test_cos : public test_case {
|
|
|
2341
2359
|
|
|
2342
2360
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2343
2361
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2344
|
-
ggml_set_param(
|
|
2362
|
+
ggml_set_param(a);
|
|
2345
2363
|
ggml_set_name(a, "a");
|
|
2346
2364
|
|
|
2347
2365
|
ggml_tensor * out = ggml_cos(ctx, a);
|
|
@@ -2421,7 +2439,7 @@ struct test_diag_mask_inf : public test_case {
|
|
|
2421
2439
|
|
|
2422
2440
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2423
2441
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2424
|
-
ggml_set_param(
|
|
2442
|
+
ggml_set_param(a);
|
|
2425
2443
|
ggml_set_name(a, "a");
|
|
2426
2444
|
|
|
2427
2445
|
ggml_tensor * out = ggml_diag_mask_inf(ctx, a, n_past);
|
|
@@ -2460,7 +2478,7 @@ struct test_soft_max : public test_case {
|
|
|
2460
2478
|
|
|
2461
2479
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2462
2480
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2463
|
-
ggml_set_param(
|
|
2481
|
+
ggml_set_param(a);
|
|
2464
2482
|
ggml_set_name(a, "a");
|
|
2465
2483
|
|
|
2466
2484
|
ggml_tensor * mask = nullptr;
|
|
@@ -2542,7 +2560,7 @@ struct test_rope : public test_case {
|
|
|
2542
2560
|
auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
2543
2561
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2544
2562
|
if (forward) {
|
|
2545
|
-
ggml_set_param(
|
|
2563
|
+
ggml_set_param(a);
|
|
2546
2564
|
}
|
|
2547
2565
|
ggml_set_name(a, "a");
|
|
2548
2566
|
|
|
@@ -2551,7 +2569,7 @@ struct test_rope : public test_case {
|
|
|
2551
2569
|
} else {
|
|
2552
2570
|
a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
2553
2571
|
if (forward) {
|
|
2554
|
-
ggml_set_param(
|
|
2572
|
+
ggml_set_param(a);
|
|
2555
2573
|
}
|
|
2556
2574
|
ggml_set_name(a, "a");
|
|
2557
2575
|
}
|
|
@@ -2598,6 +2616,8 @@ struct test_rope : public test_case {
|
|
|
2598
2616
|
} else {
|
|
2599
2617
|
out = ggml_rope_ext_back(ctx, a, pos, freq, n_dims, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
2600
2618
|
}
|
|
2619
|
+
|
|
2620
|
+
// TODO: add test with a non-contiguous view as input ; this case is needed for build_rope_2d in clip.cpp
|
|
2601
2621
|
}
|
|
2602
2622
|
ggml_set_name(out, "out");
|
|
2603
2623
|
|
|
@@ -2663,7 +2683,7 @@ struct test_pool2d : public test_case {
|
|
|
2663
2683
|
|
|
2664
2684
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2665
2685
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2666
|
-
ggml_set_param(
|
|
2686
|
+
ggml_set_param(input);
|
|
2667
2687
|
ggml_set_name(input, "input");
|
|
2668
2688
|
|
|
2669
2689
|
ggml_tensor * out = ggml_pool_2d(ctx, input, pool_type, k0, k1, s0, s1, p0, p1);
|
|
@@ -2739,7 +2759,7 @@ struct test_im2col : public test_case {
|
|
|
2739
2759
|
|
|
2740
2760
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2741
2761
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
2742
|
-
ggml_set_param(
|
|
2762
|
+
ggml_set_param(input);
|
|
2743
2763
|
ggml_set_name(input, "input");
|
|
2744
2764
|
|
|
2745
2765
|
ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data());
|
|
@@ -2752,6 +2772,48 @@ struct test_im2col : public test_case {
|
|
|
2752
2772
|
}
|
|
2753
2773
|
};
|
|
2754
2774
|
|
|
2775
|
+
// GGML_OP_CONV_2D_DW
|
|
2776
|
+
struct test_conv_2d_dw : public test_case {
|
|
2777
|
+
const std::array<int64_t, 4> ne_input;
|
|
2778
|
+
const std::array<int64_t, 4> ne_kernel;
|
|
2779
|
+
const int stride;
|
|
2780
|
+
const int padding;
|
|
2781
|
+
const int dilation;
|
|
2782
|
+
const bool cwhn;
|
|
2783
|
+
|
|
2784
|
+
std::string vars() override {
|
|
2785
|
+
return VARS_TO_STR6(ne_input, ne_kernel, stride, padding, dilation, cwhn);
|
|
2786
|
+
}
|
|
2787
|
+
|
|
2788
|
+
test_conv_2d_dw(std::array<int64_t, 4> ne_input = {64, 64, 16, 1},
|
|
2789
|
+
std::array<int64_t, 4> ne_kernel = {3, 3, 1, 16},
|
|
2790
|
+
int stride = 1, int padding = 0, int dilation = 1, bool cwhn = false)
|
|
2791
|
+
: ne_input(ne_input), ne_kernel(ne_kernel), stride(stride), padding(padding), dilation(dilation), cwhn(cwhn) {}
|
|
2792
|
+
|
|
2793
|
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2794
|
+
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
|
|
2795
|
+
ggml_set_name(input, "input");
|
|
2796
|
+
|
|
2797
|
+
ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
|
|
2798
|
+
ggml_set_name(kernel, "kernel");
|
|
2799
|
+
|
|
2800
|
+
if (cwhn) {
|
|
2801
|
+
// change memory layout to channel-most-contiguous (CWHN),
|
|
2802
|
+
// then permute it back so NE matches the original input
|
|
2803
|
+
input = ggml_cont(ctx, ggml_permute(ctx, input, 1, 2, 0, 3));
|
|
2804
|
+
input = ggml_permute(ctx, input, 2, 0, 1, 3);
|
|
2805
|
+
kernel = ggml_cont(ctx, ggml_permute(ctx, kernel, 2, 3, 1, 0));
|
|
2806
|
+
kernel = ggml_permute(ctx, kernel, 3, 2, 0, 1);
|
|
2807
|
+
}
|
|
2808
|
+
|
|
2809
|
+
ggml_tensor * out = ggml_conv_2d_dw_direct(
|
|
2810
|
+
ctx, kernel, input,
|
|
2811
|
+
stride, stride, padding, padding, dilation, dilation);
|
|
2812
|
+
ggml_set_name(out, "out");
|
|
2813
|
+
return out;
|
|
2814
|
+
}
|
|
2815
|
+
};
|
|
2816
|
+
|
|
2755
2817
|
// GGML_OP_CONCAT
|
|
2756
2818
|
struct test_concat : public test_case {
|
|
2757
2819
|
const ggml_type type;
|
|
@@ -2874,7 +2936,7 @@ struct test_sum : public test_case {
|
|
|
2874
2936
|
|
|
2875
2937
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2876
2938
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2877
|
-
ggml_set_param(
|
|
2939
|
+
ggml_set_param(a);
|
|
2878
2940
|
ggml_set_name(a, "a");
|
|
2879
2941
|
|
|
2880
2942
|
ggml_tensor * out = ggml_sum(ctx, a);
|
|
@@ -2903,7 +2965,7 @@ struct test_sum_rows : public test_case {
|
|
|
2903
2965
|
|
|
2904
2966
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2905
2967
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2906
|
-
ggml_set_param(
|
|
2968
|
+
ggml_set_param(a);
|
|
2907
2969
|
ggml_set_name(a, "a");
|
|
2908
2970
|
|
|
2909
2971
|
ggml_tensor * out = ggml_sum_rows(ctx, a);
|
|
@@ -2928,7 +2990,7 @@ struct test_mean : public test_case {
|
|
|
2928
2990
|
|
|
2929
2991
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2930
2992
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2931
|
-
ggml_set_param(
|
|
2993
|
+
ggml_set_param(a);
|
|
2932
2994
|
ggml_set_name(a, "a");
|
|
2933
2995
|
|
|
2934
2996
|
ggml_tensor * out = ggml_mean(ctx, a);
|
|
@@ -2948,15 +3010,16 @@ struct test_upscale : public test_case {
|
|
|
2948
3010
|
const std::array<int64_t, 4> ne;
|
|
2949
3011
|
const int32_t scale_factor;
|
|
2950
3012
|
const bool transpose;
|
|
3013
|
+
const ggml_scale_mode mode;
|
|
2951
3014
|
|
|
2952
3015
|
std::string vars() override {
|
|
2953
|
-
return
|
|
3016
|
+
return VARS_TO_STR5(type, ne, scale_factor, mode, transpose);
|
|
2954
3017
|
}
|
|
2955
3018
|
|
|
2956
3019
|
test_upscale(ggml_type type = GGML_TYPE_F32,
|
|
2957
3020
|
std::array<int64_t, 4> ne = {512, 512, 3, 1},
|
|
2958
|
-
int32_t scale_factor = 2, bool transpose = false)
|
|
2959
|
-
: type(type), ne(ne), scale_factor(scale_factor), transpose(transpose) {}
|
|
3021
|
+
int32_t scale_factor = 2, ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST, bool transpose = false)
|
|
3022
|
+
: type(type), ne(ne), scale_factor(scale_factor), transpose(transpose), mode(mode) {}
|
|
2960
3023
|
|
|
2961
3024
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2962
3025
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2967,7 +3030,7 @@ struct test_upscale : public test_case {
|
|
|
2967
3030
|
ggml_set_name(a, "a_transposed");
|
|
2968
3031
|
}
|
|
2969
3032
|
|
|
2970
|
-
ggml_tensor * out = ggml_upscale(ctx, a, scale_factor);
|
|
3033
|
+
ggml_tensor * out = ggml_upscale(ctx, a, scale_factor, mode);
|
|
2971
3034
|
ggml_set_name(out, "out");
|
|
2972
3035
|
|
|
2973
3036
|
return out;
|
|
@@ -2979,21 +3042,23 @@ struct test_upscale_ext : public test_case {
|
|
|
2979
3042
|
const ggml_type type;
|
|
2980
3043
|
const std::array<int64_t, 4> ne;
|
|
2981
3044
|
const std::array<int64_t, 4> ne_tgt;
|
|
3045
|
+
const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
|
|
2982
3046
|
|
|
2983
3047
|
std::string vars() override {
|
|
2984
|
-
return
|
|
3048
|
+
return VARS_TO_STR4(type, ne, ne_tgt, mode);
|
|
2985
3049
|
}
|
|
2986
3050
|
|
|
2987
3051
|
test_upscale_ext(ggml_type type = GGML_TYPE_F32,
|
|
2988
3052
|
std::array<int64_t, 4> ne = {2, 5, 7, 11},
|
|
2989
|
-
std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13}
|
|
2990
|
-
|
|
3053
|
+
std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
|
|
3054
|
+
ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
|
|
3055
|
+
: type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
|
|
2991
3056
|
|
|
2992
3057
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
2993
3058
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
2994
3059
|
ggml_set_name(a, "a");
|
|
2995
3060
|
|
|
2996
|
-
ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3]);
|
|
3061
|
+
ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
|
|
2997
3062
|
ggml_set_name(out, "out");
|
|
2998
3063
|
|
|
2999
3064
|
return out;
|
|
@@ -3071,11 +3136,11 @@ struct test_acc : public test_case {
|
|
|
3071
3136
|
|
|
3072
3137
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
3073
3138
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
3074
|
-
ggml_set_param(
|
|
3139
|
+
ggml_set_param(a);
|
|
3075
3140
|
ggml_set_name(a, "a");
|
|
3076
3141
|
|
|
3077
3142
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
|
|
3078
|
-
ggml_set_param(
|
|
3143
|
+
ggml_set_param(b);
|
|
3079
3144
|
ggml_set_name(b, "b");
|
|
3080
3145
|
|
|
3081
3146
|
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
|
|
@@ -3217,7 +3282,8 @@ struct test_leaky_relu : public test_case {
|
|
|
3217
3282
|
|
|
3218
3283
|
// GGML_OP_FLASH_ATTN_EXT
|
|
3219
3284
|
struct test_flash_attn_ext : public test_case {
|
|
3220
|
-
const int64_t
|
|
3285
|
+
const int64_t hsk; // K head size
|
|
3286
|
+
const int64_t hsv; // V head size
|
|
3221
3287
|
const int64_t nh; // num heads
|
|
3222
3288
|
const int64_t nr; // repeat in Q, tests for grouped-query attention
|
|
3223
3289
|
const int64_t kv; // kv size
|
|
@@ -3233,7 +3299,7 @@ struct test_flash_attn_ext : public test_case {
|
|
|
3233
3299
|
std::array<int32_t, 4> permute;
|
|
3234
3300
|
|
|
3235
3301
|
std::string vars() override {
|
|
3236
|
-
return
|
|
3302
|
+
return VARS_TO_STR12(hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, permute);
|
|
3237
3303
|
}
|
|
3238
3304
|
|
|
3239
3305
|
double max_nmse_err() override {
|
|
@@ -3243,17 +3309,18 @@ struct test_flash_attn_ext : public test_case {
|
|
|
3243
3309
|
uint64_t op_flops(ggml_tensor * t) override {
|
|
3244
3310
|
GGML_UNUSED(t);
|
|
3245
3311
|
// Just counting matmul costs:
|
|
3246
|
-
// Q*K^T is nb x
|
|
3247
|
-
return 2 *
|
|
3312
|
+
// Q*K^T is nb x hsk x kv, P*V is nb x kv x hsv, per head
|
|
3313
|
+
return 2 * nh*nr * nb * (hsk + hsv) * kv;
|
|
3248
3314
|
}
|
|
3249
3315
|
|
|
3250
|
-
test_flash_attn_ext(int64_t
|
|
3316
|
+
test_flash_attn_ext(int64_t hsk = 128, int64_t hsv = 128, int64_t nh = 32, int64_t nr = 1, int64_t kv = 96, int64_t nb = 8,
|
|
3251
3317
|
bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f, ggml_prec prec = GGML_PREC_F32,
|
|
3252
3318
|
ggml_type type_KV = GGML_TYPE_F16, std::array<int32_t, 4> permute = {0, 1, 2, 3})
|
|
3253
|
-
:
|
|
3319
|
+
: hsk(hsk), hsv(hsv), nh(nh), nr(nr), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), prec(prec), type_KV(type_KV), permute(permute) {}
|
|
3254
3320
|
|
|
3255
3321
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
3256
|
-
const int64_t
|
|
3322
|
+
const int64_t hsk_padded = GGML_PAD(hsk, ggml_blck_size(type_KV));
|
|
3323
|
+
const int64_t hsv_padded = GGML_PAD(hsv, ggml_blck_size(type_KV));
|
|
3257
3324
|
|
|
3258
3325
|
auto const &create_permuted = [&](ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) -> ggml_tensor * {
|
|
3259
3326
|
int64_t ne[4] = {ne0, ne1, ne2, ne3};
|
|
@@ -3268,13 +3335,13 @@ struct test_flash_attn_ext : public test_case {
|
|
|
3268
3335
|
return t;
|
|
3269
3336
|
};
|
|
3270
3337
|
|
|
3271
|
-
ggml_tensor * q = create_permuted(GGML_TYPE_F32,
|
|
3338
|
+
ggml_tensor * q = create_permuted(GGML_TYPE_F32, hsk_padded, nb, nh*nr, 1);
|
|
3272
3339
|
ggml_set_name(q, "q");
|
|
3273
3340
|
|
|
3274
|
-
ggml_tensor * k = create_permuted(type_KV,
|
|
3341
|
+
ggml_tensor * k = create_permuted(type_KV, hsk_padded, kv, nh, 1);
|
|
3275
3342
|
ggml_set_name(k, "k");
|
|
3276
3343
|
|
|
3277
|
-
ggml_tensor * v = create_permuted(type_KV,
|
|
3344
|
+
ggml_tensor * v = create_permuted(type_KV, hsv_padded, kv, nh, 1);
|
|
3278
3345
|
ggml_set_name(v, "v");
|
|
3279
3346
|
|
|
3280
3347
|
ggml_tensor * m = nullptr;
|
|
@@ -3283,7 +3350,7 @@ struct test_flash_attn_ext : public test_case {
|
|
|
3283
3350
|
ggml_set_name(m, "m");
|
|
3284
3351
|
}
|
|
3285
3352
|
|
|
3286
|
-
ggml_tensor * out = ggml_flash_attn_ext(ctx, q, k, v, m, 1.0f/sqrtf(
|
|
3353
|
+
ggml_tensor * out = ggml_flash_attn_ext(ctx, q, k, v, m, 1.0f/sqrtf(hsk), max_bias, logit_softcap);
|
|
3287
3354
|
ggml_flash_attn_ext_set_prec(out, prec);
|
|
3288
3355
|
ggml_set_name(out, "out");
|
|
3289
3356
|
|
|
@@ -3310,7 +3377,7 @@ struct test_cross_entropy_loss : public test_case {
|
|
|
3310
3377
|
|
|
3311
3378
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
3312
3379
|
ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
3313
|
-
ggml_set_param(
|
|
3380
|
+
ggml_set_param(logits);
|
|
3314
3381
|
ggml_set_name(logits, "logits");
|
|
3315
3382
|
|
|
3316
3383
|
ggml_tensor * labels = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -3392,7 +3459,7 @@ struct test_opt_step_adamw : public test_case {
|
|
|
3392
3459
|
|
|
3393
3460
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
3394
3461
|
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
3395
|
-
ggml_set_param(
|
|
3462
|
+
ggml_set_param(a); // Despite tensor a having gradients the output tensor will not.
|
|
3396
3463
|
ggml_set_name(a, "a");
|
|
3397
3464
|
|
|
3398
3465
|
ggml_tensor * grad = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
@@ -3957,6 +4024,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3957
4024
|
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
3958
4025
|
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
3959
4026
|
|
|
4027
|
+
test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, false));
|
|
4028
|
+
test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, true));
|
|
4029
|
+
test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, false));
|
|
4030
|
+
test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, true));
|
|
4031
|
+
|
|
3960
4032
|
test_cases.emplace_back(new test_conv_transpose_1d());
|
|
3961
4033
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
|
|
3962
4034
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
|
|
@@ -4169,6 +4241,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
4169
4241
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
4170
4242
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 1, 3, 2}));
|
|
4171
4243
|
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 3, 2, 1}));
|
|
4244
|
+
|
|
4245
|
+
// test cases with large ne00/ne10 to cover stream-k fixup
|
|
4246
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 1024, {3, 2}, {1, 1}));
|
|
4247
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 1024, {3, 2}, {1, 1}));
|
|
4248
|
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 1024, {3, 2}, {1, 1}));
|
|
4172
4249
|
}
|
|
4173
4250
|
}
|
|
4174
4251
|
for (ggml_type type_a : other_types) {
|
|
@@ -4204,6 +4281,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
4204
4281
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 64, { 8, 1}, {4, 1}));
|
|
4205
4282
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
|
|
4206
4283
|
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
|
|
4284
|
+
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 193, {1, 1}, {4, 1}, {0, 2, 1, 3}));
|
|
4285
|
+
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 67, {1, 1}, {4, 1}, {0, 2, 1, 3}));
|
|
4207
4286
|
|
|
4208
4287
|
for (auto bs : {1,2,4,8}) {
|
|
4209
4288
|
for (auto nr : {1,4}) {
|
|
@@ -4395,12 +4474,15 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
4395
4474
|
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {60, 10, 10, 10}, order)); // qwen
|
|
4396
4475
|
}
|
|
4397
4476
|
|
|
4477
|
+
for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
|
|
4478
|
+
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
|
|
4479
|
+
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
|
|
4480
|
+
test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode));
|
|
4481
|
+
}
|
|
4482
|
+
|
|
4398
4483
|
test_cases.emplace_back(new test_sum());
|
|
4399
4484
|
test_cases.emplace_back(new test_sum_rows());
|
|
4400
4485
|
test_cases.emplace_back(new test_mean());
|
|
4401
|
-
test_cases.emplace_back(new test_upscale());
|
|
4402
|
-
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, { 512, 512, 3, 1 }, 2, true));
|
|
4403
|
-
test_cases.emplace_back(new test_upscale_ext());
|
|
4404
4486
|
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {64, 64, 320, 1}));
|
|
4405
4487
|
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
|
|
4406
4488
|
test_cases.emplace_back(new test_acc());
|
|
@@ -4410,27 +4492,33 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
4410
4492
|
test_cases.emplace_back(new test_timestep_embedding());
|
|
4411
4493
|
test_cases.emplace_back(new test_leaky_relu());
|
|
4412
4494
|
|
|
4413
|
-
for (int
|
|
4414
|
-
for (
|
|
4415
|
-
|
|
4416
|
-
|
|
4417
|
-
|
|
4418
|
-
|
|
4419
|
-
|
|
4420
|
-
|
|
4421
|
-
|
|
4422
|
-
|
|
4423
|
-
|
|
4424
|
-
|
|
4425
|
-
|
|
4426
|
-
|
|
4427
|
-
|
|
4428
|
-
|
|
4429
|
-
|
|
4430
|
-
|
|
4431
|
-
if (
|
|
4495
|
+
for (int hsk : { 64, 80, 128, 192, 256, 576 }) {
|
|
4496
|
+
for (int hsv : { 64, 80, 128, 192, 256, 512 }) {
|
|
4497
|
+
if (hsk != 192 && hsk != 576 && hsk != hsv) continue;
|
|
4498
|
+
if (hsk == 192 && (hsv != 128 && hsv != 192)) continue;
|
|
4499
|
+
if (hsk == 576 && hsv != 512) continue; // DeepSeek MLA
|
|
4500
|
+
|
|
4501
|
+
for (bool mask : { true, false } ) {
|
|
4502
|
+
for (float max_bias : { 0.0f, 8.0f }) {
|
|
4503
|
+
if (!mask && max_bias > 0.0f) continue;
|
|
4504
|
+
for (float logit_softcap : {0.0f, 10.0f}) {
|
|
4505
|
+
if (hsk != 128 && logit_softcap != 0.0f) continue;
|
|
4506
|
+
for (int nh : { 4, }) {
|
|
4507
|
+
for (int nr : { 1, 4, 16 }) {
|
|
4508
|
+
if (nr == 16 && hsk != 128) continue;
|
|
4509
|
+
for (int kv : { 512, 1024, }) {
|
|
4510
|
+
if (nr != 1 && kv != 512) continue;
|
|
4511
|
+
for (int nb : { 1, 3, 32, 35, }) {
|
|
4512
|
+
for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) {
|
|
4513
|
+
if (hsk != 128 && prec == GGML_PREC_DEFAULT) continue;
|
|
4514
|
+
for (ggml_type type_KV : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) {
|
|
4432
4515
|
test_cases.emplace_back(new test_flash_attn_ext(
|
|
4433
|
-
|
|
4516
|
+
hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV));
|
|
4517
|
+
// run fewer test cases permuted
|
|
4518
|
+
if (mask == true && max_bias == 0.0f && logit_softcap == 0 && kv == 512) {
|
|
4519
|
+
test_cases.emplace_back(new test_flash_attn_ext(
|
|
4520
|
+
hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, {0, 2, 1, 3}));
|
|
4521
|
+
}
|
|
4434
4522
|
}
|
|
4435
4523
|
}
|
|
4436
4524
|
}
|
|
@@ -4507,6 +4595,17 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
|
4507
4595
|
}
|
|
4508
4596
|
}
|
|
4509
4597
|
|
|
4598
|
+
for (int kv : { 4096, 8192, 16384, }) {
|
|
4599
|
+
for (int hs : { 64, 128, }) {
|
|
4600
|
+
for (int nr : { 1, 4, }) {
|
|
4601
|
+
test_cases.emplace_back(new test_flash_attn_ext(hs, hs, 8, nr, kv, 1, true, 0, 0, GGML_PREC_F32, GGML_TYPE_F16));
|
|
4602
|
+
}
|
|
4603
|
+
}
|
|
4604
|
+
}
|
|
4605
|
+
|
|
4606
|
+
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
|
|
4607
|
+
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));
|
|
4608
|
+
|
|
4510
4609
|
return test_cases;
|
|
4511
4610
|
}
|
|
4512
4611
|
|