@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -37,13 +37,16 @@ extern "C" {
|
|
|
37
37
|
// ====== Dataset ======
|
|
38
38
|
|
|
39
39
|
GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
int64_t
|
|
43
|
-
int64_t
|
|
40
|
+
enum ggml_type type_data, // the type for the internal data tensor
|
|
41
|
+
enum ggml_type type_label, // the type for the internal labels tensor
|
|
42
|
+
int64_t ne_datapoint, // number of elements per datapoint
|
|
43
|
+
int64_t ne_label, // number of elements per label
|
|
44
|
+
int64_t ndata, // total number of datapoints/labels
|
|
45
|
+
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
|
|
44
46
|
GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
|
|
45
47
|
|
|
46
48
|
// get underlying tensors that store the data
|
|
49
|
+
GGML_API int64_t ggml_opt_dataset_ndata (ggml_opt_dataset_t dataset);
|
|
47
50
|
GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
|
|
48
51
|
GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
|
|
49
52
|
|
|
@@ -56,13 +59,19 @@ extern "C" {
|
|
|
56
59
|
struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
|
|
57
60
|
struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
|
|
58
61
|
int64_t ibatch);
|
|
62
|
+
GGML_API void ggml_opt_dataset_get_batch_host(
|
|
63
|
+
ggml_opt_dataset_t dataset,
|
|
64
|
+
void * data_batch,
|
|
65
|
+
size_t nb_data_batch,
|
|
66
|
+
void * labels_batch,
|
|
67
|
+
int64_t ibatch);
|
|
59
68
|
|
|
60
69
|
// ====== Model / Context ======
|
|
61
70
|
|
|
62
71
|
enum ggml_opt_build_type {
|
|
63
|
-
GGML_OPT_BUILD_TYPE_FORWARD,
|
|
64
|
-
GGML_OPT_BUILD_TYPE_GRAD,
|
|
65
|
-
GGML_OPT_BUILD_TYPE_OPT,
|
|
72
|
+
GGML_OPT_BUILD_TYPE_FORWARD = 10,
|
|
73
|
+
GGML_OPT_BUILD_TYPE_GRAD = 20,
|
|
74
|
+
GGML_OPT_BUILD_TYPE_OPT = 30,
|
|
66
75
|
};
|
|
67
76
|
|
|
68
77
|
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
|
|
@@ -81,20 +90,22 @@ extern "C" {
|
|
|
81
90
|
// userdata can be used to pass arbitrary data
|
|
82
91
|
typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
|
|
83
92
|
|
|
84
|
-
// returns the default optimizer params (constant)
|
|
93
|
+
// returns the default optimizer params (constant, hard-coded values)
|
|
85
94
|
// userdata is not used
|
|
86
95
|
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
|
|
87
96
|
|
|
97
|
+
// casts userdata to ggml_opt_optimizer_params and returns it
|
|
98
|
+
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_constant_optimizer_params(void * userdata);
|
|
99
|
+
|
|
88
100
|
// parameters for initializing a new optimization context
|
|
89
101
|
struct ggml_opt_params {
|
|
90
102
|
ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
|
|
91
103
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
struct ggml_tensor
|
|
97
|
-
struct ggml_tensor * outputs;
|
|
104
|
+
// by default the forward graph needs to be reconstructed for each eval
|
|
105
|
+
// if ctx_compute, inputs, and outputs are set the graphs are instead allocated statically
|
|
106
|
+
struct ggml_context * ctx_compute;
|
|
107
|
+
struct ggml_tensor * inputs;
|
|
108
|
+
struct ggml_tensor * outputs;
|
|
98
109
|
|
|
99
110
|
enum ggml_opt_loss_type loss_type;
|
|
100
111
|
enum ggml_opt_build_type build_type;
|
|
@@ -107,12 +118,9 @@ extern "C" {
|
|
|
107
118
|
|
|
108
119
|
// get parameters for an optimization context with defaults set where possible
|
|
109
120
|
// parameters for which no sensible defaults exist are supplied as arguments to this function
|
|
110
|
-
GGML_API ggml_opt_params ggml_opt_default_params(
|
|
111
|
-
ggml_backend_sched_t
|
|
112
|
-
|
|
113
|
-
struct ggml_tensor * inputs,
|
|
114
|
-
struct ggml_tensor * outputs,
|
|
115
|
-
enum ggml_opt_loss_type loss_type);
|
|
121
|
+
GGML_API struct ggml_opt_params ggml_opt_default_params(
|
|
122
|
+
ggml_backend_sched_t backend_sched,
|
|
123
|
+
enum ggml_opt_loss_type loss_type);
|
|
116
124
|
|
|
117
125
|
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
|
|
118
126
|
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
|
|
@@ -121,6 +129,7 @@ extern "C" {
|
|
|
121
129
|
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
|
|
122
130
|
|
|
123
131
|
// get underlying tensors that store data
|
|
132
|
+
// if not using static graphs these pointers become invalid with the next call to ggml_opt_alloc
|
|
124
133
|
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
|
|
125
134
|
GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
|
|
126
135
|
GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
|
|
@@ -128,11 +137,12 @@ extern "C" {
|
|
|
128
137
|
GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
|
|
129
138
|
GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
|
|
130
139
|
|
|
140
|
+
// get the gradient accumulator for a node from the forward graph
|
|
131
141
|
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
|
|
132
142
|
|
|
133
143
|
// ====== Optimization Result ======
|
|
134
144
|
|
|
135
|
-
GGML_API ggml_opt_result_t ggml_opt_result_init();
|
|
145
|
+
GGML_API ggml_opt_result_t ggml_opt_result_init(void);
|
|
136
146
|
GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
|
|
137
147
|
GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
|
|
138
148
|
|
|
@@ -144,11 +154,20 @@ extern "C" {
|
|
|
144
154
|
|
|
145
155
|
// ====== Computation ======
|
|
146
156
|
|
|
147
|
-
//
|
|
148
|
-
GGML_API void
|
|
157
|
+
// if not using static graphs, this function must be called prior to ggml_opt_alloc
|
|
158
|
+
GGML_API void ggml_opt_prepare_alloc(
|
|
159
|
+
ggml_opt_context_t opt_ctx,
|
|
160
|
+
struct ggml_context * ctx_compute,
|
|
161
|
+
struct ggml_cgraph * gf,
|
|
162
|
+
struct ggml_tensor * inputs,
|
|
163
|
+
struct ggml_tensor * outputs);
|
|
164
|
+
|
|
165
|
+
// allocate the next graph for evaluation, either forward or forward + backward
|
|
166
|
+
// must be called exactly once prior to calling ggml_opt_eval
|
|
167
|
+
GGML_API void ggml_opt_alloc(ggml_opt_context_t opt_ctx, bool backward);
|
|
149
168
|
|
|
150
|
-
// do forward pass, increment result if not NULL, do backward pass
|
|
151
|
-
GGML_API void
|
|
169
|
+
// do forward pass, increment result if not NULL, do backward pass if allocated
|
|
170
|
+
GGML_API void ggml_opt_eval(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
|
152
171
|
|
|
153
172
|
// ############################################################################
|
|
154
173
|
// ## The high-level functions start here. They do not depend on any private ##
|
|
@@ -200,9 +219,9 @@ extern "C" {
|
|
|
200
219
|
// fit model defined by inputs and outputs to dataset
|
|
201
220
|
GGML_API void ggml_opt_fit(
|
|
202
221
|
ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
|
|
203
|
-
ggml_context
|
|
204
|
-
ggml_tensor
|
|
205
|
-
ggml_tensor
|
|
222
|
+
struct ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
|
|
223
|
+
struct ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
|
|
224
|
+
struct ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
|
|
206
225
|
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
|
|
207
226
|
enum ggml_opt_loss_type loss_type, // loss to minimize
|
|
208
227
|
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
extern "C" {
|
|
8
8
|
#endif
|
|
9
9
|
|
|
10
|
+
#define RPC_PROTO_MAJOR_VERSION 2
|
|
11
|
+
#define RPC_PROTO_MINOR_VERSION 0
|
|
12
|
+
#define RPC_PROTO_PATCH_VERSION 0
|
|
10
13
|
#define GGML_RPC_MAX_SERVERS 16
|
|
11
14
|
|
|
12
15
|
// backend API
|
|
@@ -17,7 +20,9 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
|
|
|
17
20
|
|
|
18
21
|
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
|
|
19
22
|
|
|
20
|
-
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
|
|
23
|
+
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
|
|
24
|
+
const char * cache_dir,
|
|
25
|
+
size_t free_mem, size_t total_mem);
|
|
21
26
|
|
|
22
27
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
|
|
23
28
|
|
|
@@ -393,8 +393,8 @@ extern "C" {
|
|
|
393
393
|
|
|
394
394
|
// precision
|
|
395
395
|
enum ggml_prec {
|
|
396
|
-
GGML_PREC_DEFAULT,
|
|
397
|
-
GGML_PREC_F32,
|
|
396
|
+
GGML_PREC_DEFAULT = 0, // stored as ggml_tensor.op_params, 0 by default
|
|
397
|
+
GGML_PREC_F32 = 10,
|
|
398
398
|
};
|
|
399
399
|
|
|
400
400
|
// model file types
|
|
@@ -481,6 +481,7 @@ extern "C" {
|
|
|
481
481
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
482
482
|
GGML_OP_IM2COL,
|
|
483
483
|
GGML_OP_IM2COL_BACK,
|
|
484
|
+
GGML_OP_CONV_2D_DW,
|
|
484
485
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
485
486
|
GGML_OP_POOL_1D,
|
|
486
487
|
GGML_OP_POOL_2D,
|
|
@@ -507,17 +508,12 @@ extern "C" {
|
|
|
507
508
|
|
|
508
509
|
GGML_OP_UNARY,
|
|
509
510
|
|
|
510
|
-
GGML_OP_MAP_UNARY,
|
|
511
|
-
GGML_OP_MAP_BINARY,
|
|
512
|
-
|
|
513
|
-
GGML_OP_MAP_CUSTOM1_F32,
|
|
514
|
-
GGML_OP_MAP_CUSTOM2_F32,
|
|
515
|
-
GGML_OP_MAP_CUSTOM3_F32,
|
|
516
|
-
|
|
517
511
|
GGML_OP_MAP_CUSTOM1,
|
|
518
512
|
GGML_OP_MAP_CUSTOM2,
|
|
519
513
|
GGML_OP_MAP_CUSTOM3,
|
|
520
514
|
|
|
515
|
+
GGML_OP_CUSTOM,
|
|
516
|
+
|
|
521
517
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
|
522
518
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
523
519
|
GGML_OP_OPT_STEP_ADAMW,
|
|
@@ -677,11 +673,18 @@ extern "C" {
|
|
|
677
673
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
|
678
674
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
|
679
675
|
|
|
676
|
+
// returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
|
|
680
677
|
GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
|
|
681
678
|
GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
|
|
682
679
|
GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
|
|
683
680
|
GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
|
|
684
681
|
|
|
682
|
+
// returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
|
|
683
|
+
GGML_API bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor);
|
|
684
|
+
|
|
685
|
+
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
|
686
|
+
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
|
687
|
+
|
|
685
688
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
686
689
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
687
690
|
|
|
@@ -765,7 +768,7 @@ extern "C" {
|
|
|
765
768
|
// Tensor flags
|
|
766
769
|
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
|
767
770
|
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
|
768
|
-
GGML_API void ggml_set_param(struct
|
|
771
|
+
GGML_API void ggml_set_param(struct ggml_tensor * tensor);
|
|
769
772
|
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
|
770
773
|
|
|
771
774
|
//
|
|
@@ -935,7 +938,7 @@ extern "C" {
|
|
|
935
938
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
|
936
939
|
struct ggml_context * ctx,
|
|
937
940
|
struct ggml_tensor * a,
|
|
938
|
-
struct ggml_tensor * b);
|
|
941
|
+
struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
|
|
939
942
|
|
|
940
943
|
// concat a and b along dim
|
|
941
944
|
// used in stable-diffusion
|
|
@@ -1665,7 +1668,7 @@ extern "C" {
|
|
|
1665
1668
|
struct ggml_tensor * a,
|
|
1666
1669
|
struct ggml_tensor * b);
|
|
1667
1670
|
|
|
1668
|
-
// depthwise
|
|
1671
|
+
// depthwise (via im2col and mul_mat)
|
|
1669
1672
|
GGML_API struct ggml_tensor * ggml_conv_2d_dw(
|
|
1670
1673
|
struct ggml_context * ctx,
|
|
1671
1674
|
struct ggml_tensor * a, // convolution kernel
|
|
@@ -1677,6 +1680,22 @@ extern "C" {
|
|
|
1677
1680
|
int d0, // dilation dimension 0
|
|
1678
1681
|
int d1); // dilation dimension 1
|
|
1679
1682
|
|
|
1683
|
+
// Depthwise 2D convolution
|
|
1684
|
+
// may be faster than ggml_conv_2d_dw, but not available in all backends
|
|
1685
|
+
// a: KW KH 1 C convolution kernel
|
|
1686
|
+
// b: W H C N input data
|
|
1687
|
+
// res: W_out H_out C N
|
|
1688
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
|
|
1689
|
+
struct ggml_context * ctx,
|
|
1690
|
+
struct ggml_tensor * a,
|
|
1691
|
+
struct ggml_tensor * b,
|
|
1692
|
+
int stride0,
|
|
1693
|
+
int stride1,
|
|
1694
|
+
int pad0,
|
|
1695
|
+
int pad1,
|
|
1696
|
+
int dilation0,
|
|
1697
|
+
int dilation1);
|
|
1698
|
+
|
|
1680
1699
|
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
1681
1700
|
struct ggml_context * ctx,
|
|
1682
1701
|
struct ggml_tensor * a,
|
|
@@ -1722,24 +1741,29 @@ extern "C" {
|
|
|
1722
1741
|
float p0,
|
|
1723
1742
|
float p1);
|
|
1724
1743
|
|
|
1725
|
-
|
|
1744
|
+
enum ggml_scale_mode {
|
|
1745
|
+
GGML_SCALE_MODE_NEAREST = 0,
|
|
1746
|
+
GGML_SCALE_MODE_BILINEAR = 1,
|
|
1747
|
+
};
|
|
1748
|
+
|
|
1749
|
+
// interpolate
|
|
1726
1750
|
// multiplies ne0 and ne1 by scale factor
|
|
1727
|
-
// used in stable-diffusion
|
|
1728
1751
|
GGML_API struct ggml_tensor * ggml_upscale(
|
|
1729
1752
|
struct ggml_context * ctx,
|
|
1730
1753
|
struct ggml_tensor * a,
|
|
1731
|
-
int scale_factor
|
|
1754
|
+
int scale_factor,
|
|
1755
|
+
enum ggml_scale_mode mode);
|
|
1732
1756
|
|
|
1733
|
-
//
|
|
1734
|
-
//
|
|
1735
|
-
// used in tortoise.cpp
|
|
1757
|
+
// interpolate
|
|
1758
|
+
// interpolate scale to specified dimensions
|
|
1736
1759
|
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1737
1760
|
struct ggml_context * ctx,
|
|
1738
1761
|
struct ggml_tensor * a,
|
|
1739
1762
|
int ne0,
|
|
1740
1763
|
int ne1,
|
|
1741
1764
|
int ne2,
|
|
1742
|
-
int ne3
|
|
1765
|
+
int ne3,
|
|
1766
|
+
enum ggml_scale_mode mode);
|
|
1743
1767
|
|
|
1744
1768
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1745
1769
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
@@ -1791,11 +1815,11 @@ extern "C" {
|
|
|
1791
1815
|
|
|
1792
1816
|
#define GGML_KQ_MASK_PAD 64
|
|
1793
1817
|
|
|
1794
|
-
// q: [
|
|
1795
|
-
// k: [
|
|
1796
|
-
// v: [
|
|
1797
|
-
// mask: [n_kv,
|
|
1798
|
-
// res: [
|
|
1818
|
+
// q: [n_embd_k, n_batch, n_head, 1]
|
|
1819
|
+
// k: [n_embd_k, n_kv, n_head_kv, 1]
|
|
1820
|
+
// v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
|
|
1821
|
+
// mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
|
1822
|
+
// res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
|
|
1799
1823
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1800
1824
|
struct ggml_context * ctx,
|
|
1801
1825
|
struct ggml_tensor * q,
|
|
@@ -1916,83 +1940,6 @@ extern "C" {
|
|
|
1916
1940
|
|
|
1917
1941
|
// custom operators
|
|
1918
1942
|
|
|
1919
|
-
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
1920
|
-
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
|
1921
|
-
|
|
1922
|
-
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
|
1923
|
-
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
|
1924
|
-
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
|
1925
|
-
|
|
1926
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
|
1927
|
-
struct ggml_context * ctx,
|
|
1928
|
-
struct ggml_tensor * a,
|
|
1929
|
-
ggml_unary_op_f32_t fun),
|
|
1930
|
-
"use ggml_map_custom1 instead");
|
|
1931
|
-
|
|
1932
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
1933
|
-
struct ggml_context * ctx,
|
|
1934
|
-
struct ggml_tensor * a,
|
|
1935
|
-
ggml_unary_op_f32_t fun),
|
|
1936
|
-
"use ggml_map_custom1_inplace instead");
|
|
1937
|
-
|
|
1938
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
|
1939
|
-
struct ggml_context * ctx,
|
|
1940
|
-
struct ggml_tensor * a,
|
|
1941
|
-
struct ggml_tensor * b,
|
|
1942
|
-
ggml_binary_op_f32_t fun),
|
|
1943
|
-
"use ggml_map_custom2 instead");
|
|
1944
|
-
|
|
1945
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
1946
|
-
struct ggml_context * ctx,
|
|
1947
|
-
struct ggml_tensor * a,
|
|
1948
|
-
struct ggml_tensor * b,
|
|
1949
|
-
ggml_binary_op_f32_t fun),
|
|
1950
|
-
"use ggml_map_custom2_inplace instead");
|
|
1951
|
-
|
|
1952
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
|
1953
|
-
struct ggml_context * ctx,
|
|
1954
|
-
struct ggml_tensor * a,
|
|
1955
|
-
ggml_custom1_op_f32_t fun),
|
|
1956
|
-
"use ggml_map_custom1 instead");
|
|
1957
|
-
|
|
1958
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
1959
|
-
struct ggml_context * ctx,
|
|
1960
|
-
struct ggml_tensor * a,
|
|
1961
|
-
ggml_custom1_op_f32_t fun),
|
|
1962
|
-
"use ggml_map_custom1_inplace instead");
|
|
1963
|
-
|
|
1964
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
|
1965
|
-
struct ggml_context * ctx,
|
|
1966
|
-
struct ggml_tensor * a,
|
|
1967
|
-
struct ggml_tensor * b,
|
|
1968
|
-
ggml_custom2_op_f32_t fun),
|
|
1969
|
-
"use ggml_map_custom2 instead");
|
|
1970
|
-
|
|
1971
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
1972
|
-
struct ggml_context * ctx,
|
|
1973
|
-
struct ggml_tensor * a,
|
|
1974
|
-
struct ggml_tensor * b,
|
|
1975
|
-
ggml_custom2_op_f32_t fun),
|
|
1976
|
-
"use ggml_map_custom2_inplace instead");
|
|
1977
|
-
|
|
1978
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
|
1979
|
-
struct ggml_context * ctx,
|
|
1980
|
-
struct ggml_tensor * a,
|
|
1981
|
-
struct ggml_tensor * b,
|
|
1982
|
-
struct ggml_tensor * c,
|
|
1983
|
-
ggml_custom3_op_f32_t fun),
|
|
1984
|
-
"use ggml_map_custom3 instead");
|
|
1985
|
-
|
|
1986
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
|
1987
|
-
struct ggml_context * ctx,
|
|
1988
|
-
struct ggml_tensor * a,
|
|
1989
|
-
struct ggml_tensor * b,
|
|
1990
|
-
struct ggml_tensor * c,
|
|
1991
|
-
ggml_custom3_op_f32_t fun),
|
|
1992
|
-
"use ggml_map_custom3_inplace instead");
|
|
1993
|
-
|
|
1994
|
-
// custom operators v2
|
|
1995
|
-
|
|
1996
1943
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
|
1997
1944
|
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
|
1998
1945
|
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
|
@@ -2048,6 +1995,30 @@ extern "C" {
|
|
|
2048
1995
|
int n_tasks,
|
|
2049
1996
|
void * userdata);
|
|
2050
1997
|
|
|
1998
|
+
typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata);
|
|
1999
|
+
|
|
2000
|
+
GGML_API struct ggml_tensor * ggml_custom_4d(
|
|
2001
|
+
struct ggml_context * ctx,
|
|
2002
|
+
enum ggml_type type,
|
|
2003
|
+
int64_t ne0,
|
|
2004
|
+
int64_t ne1,
|
|
2005
|
+
int64_t ne2,
|
|
2006
|
+
int64_t ne3,
|
|
2007
|
+
struct ggml_tensor ** args,
|
|
2008
|
+
int n_args,
|
|
2009
|
+
ggml_custom_op_t fun,
|
|
2010
|
+
int n_tasks,
|
|
2011
|
+
void * userdata);
|
|
2012
|
+
|
|
2013
|
+
GGML_API struct ggml_tensor * ggml_custom_inplace(
|
|
2014
|
+
struct ggml_context * ctx,
|
|
2015
|
+
struct ggml_tensor * a,
|
|
2016
|
+
struct ggml_tensor ** args,
|
|
2017
|
+
int n_args,
|
|
2018
|
+
ggml_custom_op_t fun,
|
|
2019
|
+
int n_tasks,
|
|
2020
|
+
void * userdata);
|
|
2021
|
+
|
|
2051
2022
|
// loss function
|
|
2052
2023
|
|
|
2053
2024
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
|
@@ -2078,15 +2049,14 @@ extern "C" {
|
|
|
2078
2049
|
|
|
2079
2050
|
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
2080
2051
|
GGML_API void ggml_build_backward_expand(
|
|
2081
|
-
struct ggml_context *
|
|
2082
|
-
struct
|
|
2083
|
-
struct
|
|
2084
|
-
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
|
2052
|
+
struct ggml_context * ctx, // context for gradient computation
|
|
2053
|
+
struct ggml_cgraph * cgraph,
|
|
2054
|
+
struct ggml_tensor ** grad_accs);
|
|
2085
2055
|
|
|
2086
2056
|
// graph allocation in a context
|
|
2087
2057
|
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
|
2088
2058
|
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
|
2089
|
-
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
|
2059
|
+
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
|
|
2090
2060
|
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
|
2091
2061
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
|
2092
2062
|
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
|
@@ -65,7 +65,7 @@ if (GGML_LTO)
|
|
|
65
65
|
endif()
|
|
66
66
|
endif()
|
|
67
67
|
|
|
68
|
-
if (GGML_CCACHE)
|
|
68
|
+
if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
|
|
69
69
|
find_program(GGML_CCACHE_FOUND ccache)
|
|
70
70
|
find_program(GGML_SCCACHE_FOUND sccache)
|
|
71
71
|
|
|
@@ -214,7 +214,7 @@ add_library(ggml
|
|
|
214
214
|
target_link_libraries(ggml PUBLIC ggml-base)
|
|
215
215
|
|
|
216
216
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
217
|
-
target_link_libraries(ggml PRIVATE dl
|
|
217
|
+
target_link_libraries(ggml PRIVATE dl)
|
|
218
218
|
endif()
|
|
219
219
|
|
|
220
220
|
function(ggml_add_backend_library backend)
|
|
@@ -267,6 +267,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
267
267
|
set(GGML_CPU_TAG_NAME ${tag_name})
|
|
268
268
|
# other: OPENMP LLAMAFILE CPU_HBM
|
|
269
269
|
foreach (feat NATIVE
|
|
270
|
+
SSE42
|
|
270
271
|
AVX AVX2 BMI2 AVX_VNNI FMA F16C
|
|
271
272
|
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
|
272
273
|
AMX_TILE AMX_INT8 AMX_BF16)
|
|
@@ -286,14 +287,16 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
286
287
|
if (NOT GGML_BACKEND_DL)
|
|
287
288
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
|
288
289
|
endif()
|
|
289
|
-
ggml_add_cpu_backend_variant(
|
|
290
|
-
ggml_add_cpu_backend_variant(
|
|
291
|
-
ggml_add_cpu_backend_variant(
|
|
292
|
-
ggml_add_cpu_backend_variant(
|
|
293
|
-
ggml_add_cpu_backend_variant(
|
|
290
|
+
ggml_add_cpu_backend_variant(x64)
|
|
291
|
+
ggml_add_cpu_backend_variant(sse42 SSE42)
|
|
292
|
+
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
|
|
293
|
+
ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
|
|
294
|
+
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
|
|
295
|
+
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
296
|
+
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
|
294
297
|
if (NOT MSVC)
|
|
295
298
|
# MSVC doesn't support AMX
|
|
296
|
-
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
299
|
+
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
297
300
|
endif()
|
|
298
301
|
elseif (GGML_CPU)
|
|
299
302
|
ggml_add_cpu_backend_variant_impl("")
|
|
@@ -816,7 +816,10 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
|
|
|
816
816
|
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
|
|
817
817
|
size_t node_size = 0;
|
|
818
818
|
if (!node->data && !node->view_src) {
|
|
819
|
-
|
|
819
|
+
// If we previously had data but don't now then reallocate
|
|
820
|
+
if (talloc->buffer_id < 0) {
|
|
821
|
+
return false;
|
|
822
|
+
}
|
|
820
823
|
node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
|
|
821
824
|
}
|
|
822
825
|
return talloc->size_max >= node_size;
|
|
@@ -56,7 +56,7 @@ size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
|
|
|
56
56
|
return SIZE_MAX;
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
|
|
59
|
+
size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) {
|
|
60
60
|
// get_alloc_size is optional, defaults to ggml_nbytes
|
|
61
61
|
if (buft->iface.get_alloc_size) {
|
|
62
62
|
size_t size = buft->iface.get_alloc_size(buft, tensor);
|
|
@@ -152,7 +152,7 @@ size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) {
|
|
|
152
152
|
return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer));
|
|
153
153
|
}
|
|
154
154
|
|
|
155
|
-
size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
|
155
|
+
size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor) {
|
|
156
156
|
return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor);
|
|
157
157
|
}
|
|
158
158
|
|
|
@@ -674,6 +674,8 @@ struct ggml_backend_sched {
|
|
|
674
674
|
char * context_buffer;
|
|
675
675
|
size_t context_buffer_size;
|
|
676
676
|
|
|
677
|
+
bool op_offload;
|
|
678
|
+
|
|
677
679
|
int debug;
|
|
678
680
|
};
|
|
679
681
|
|
|
@@ -766,7 +768,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
|
|
766
768
|
if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
|
767
769
|
int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
|
|
768
770
|
// check if a backend with higher prio wants to offload the op
|
|
769
|
-
if (src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
|
|
771
|
+
if (sched->op_offload && src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
|
|
770
772
|
for (int b = 0; b < src_backend_id; b++) {
|
|
771
773
|
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
|
|
772
774
|
SET_CAUSE(tensor, "1.off");
|
|
@@ -1109,7 +1111,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
|
1109
1111
|
|
|
1110
1112
|
const int node_backend_id = tensor_backend_id(node);
|
|
1111
1113
|
|
|
1112
|
-
assert(node_backend_id != -1); // all nodes should be assigned by now
|
|
1114
|
+
assert(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
|
|
1113
1115
|
|
|
1114
1116
|
// check if we should start a new split based on the sources of the current node
|
|
1115
1117
|
bool need_new_split = false;
|
|
@@ -1452,7 +1454,8 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
|
|
1452
1454
|
ggml_backend_buffer_type_t * bufts,
|
|
1453
1455
|
int n_backends,
|
|
1454
1456
|
size_t graph_size,
|
|
1455
|
-
bool parallel
|
|
1457
|
+
bool parallel,
|
|
1458
|
+
bool op_offload) {
|
|
1456
1459
|
GGML_ASSERT(n_backends > 0);
|
|
1457
1460
|
GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
|
|
1458
1461
|
GGML_ASSERT(ggml_backend_dev_type(ggml_backend_get_device(backends[n_backends - 1])) == GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
@@ -1497,6 +1500,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
|
|
1497
1500
|
}
|
|
1498
1501
|
|
|
1499
1502
|
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
|
|
1503
|
+
sched->op_offload = op_offload;
|
|
1500
1504
|
|
|
1501
1505
|
ggml_backend_sched_reset(sched);
|
|
1502
1506
|
|
|
@@ -51,13 +51,11 @@ if (CANN_INSTALL_DIR)
|
|
|
51
51
|
${CANN_INSTALL_DIR}/acllib/include
|
|
52
52
|
)
|
|
53
53
|
|
|
54
|
-
add_subdirectory(kernels)
|
|
55
54
|
list(APPEND CANN_LIBRARIES
|
|
56
55
|
ascendcl
|
|
57
56
|
nnopbase
|
|
58
57
|
opapi
|
|
59
58
|
acl_op_compiler
|
|
60
|
-
ascendc_kernels
|
|
61
59
|
)
|
|
62
60
|
|
|
63
61
|
file(GLOB GGML_SOURCES_CANN "*.cpp")
|
|
@@ -41,6 +41,8 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
|
|
|
41
41
|
return ACL_INT4;
|
|
42
42
|
case GGML_TYPE_Q8_0:
|
|
43
43
|
return ACL_INT8;
|
|
44
|
+
case GGML_TYPE_I64:
|
|
45
|
+
return ACL_INT64;
|
|
44
46
|
default:
|
|
45
47
|
return ACL_DT_UNDEFINED;
|
|
46
48
|
}
|
|
@@ -54,9 +56,7 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
|
|
54
56
|
// added.
|
|
55
57
|
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
|
|
56
58
|
|
|
57
|
-
int64_t acl_storage_len = 0;
|
|
58
59
|
if (ne == nullptr) {
|
|
59
|
-
acl_storage_len = ggml_nbytes(tensor);
|
|
60
60
|
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
|
61
61
|
acl_ne[i] = tensor->ne[i];
|
|
62
62
|
// The step size of acl is in elements.
|
|
@@ -65,14 +65,18 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
|
|
65
65
|
} else {
|
|
66
66
|
// With bcast
|
|
67
67
|
for (int i = 0; i < dims; i++) {
|
|
68
|
-
acl_storage_len += (ne[i] - 1) * nb[i];
|
|
69
68
|
acl_ne[i] = ne[i];
|
|
70
69
|
acl_stride[i] = nb[i] / ggml_element_size(tensor);
|
|
71
70
|
}
|
|
72
71
|
}
|
|
73
72
|
|
|
74
|
-
// Reverse ne and stride.
|
|
75
73
|
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
|
|
74
|
+
int64_t acl_storage_len = 1;
|
|
75
|
+
for (int i = 0; i < final_dims; i++) {
|
|
76
|
+
acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Reverse ne and stride.
|
|
76
80
|
std::reverse(acl_ne, acl_ne + final_dims);
|
|
77
81
|
std::reverse(acl_stride, acl_stride + final_dims);
|
|
78
82
|
|
|
@@ -101,14 +101,14 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
|
|
|
101
101
|
tmp_stride[i] = nb[i] / type_size;
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
-
|
|
105
|
-
std::reverse(tmp_stride, tmp_stride + dims);
|
|
106
|
-
|
|
107
|
-
int64_t acl_storage_len = 0;
|
|
104
|
+
int64_t acl_storage_len = 1;
|
|
108
105
|
for (int i = 0; i < dims; i++) {
|
|
109
|
-
acl_storage_len += (
|
|
106
|
+
acl_storage_len += (tmp_ne[i] - 1) * tmp_stride[i];
|
|
110
107
|
}
|
|
111
108
|
|
|
109
|
+
std::reverse(tmp_ne, tmp_ne + dims);
|
|
110
|
+
std::reverse(tmp_stride, tmp_stride + dims);
|
|
111
|
+
|
|
112
112
|
aclTensor* acl_tensor =
|
|
113
113
|
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
|
|
114
114
|
format, &acl_storage_len, 1, data_ptr);
|